Example #1
0
def load_local_view(train_ratio=0.9):
    """
    NOTE: this function don't shuffle
    yield twice
        1. length of all training data
        2. (train_x, train_y), (test_x, test_y) of global view (2001 bins)
    """
    # make sure we use the same shuffle indices
    with load_ctx(local_all_pc_flux_filename):
        local_all_pcs = np.loadtxt(local_all_pc_flux_filename)
    with load_ctx(local_all_non_pc_flux_filename):
        local_all_non_pcs = np.loadtxt(local_all_non_pc_flux_filename)

    all_x = np.concatenate([local_all_pcs, local_all_non_pcs])
    all_x = all_x.reshape(*all_x.shape, 1)
    all_y = np.concatenate([
        np.ones(len(local_all_pcs), dtype=np.int),
        np.zeros(len(local_all_non_pcs), dtype=np.int)
    ])
    len_all_x, len_all_y = len(all_x), len(all_y)
    assert len_all_x == len_all_y, f"data and label size different ({len_all_x} != {len_all_y})"

    shuffle_idx = yield len_all_x
    # need to shuffle all_x and all_y
    if shuffle_idx is not None:
        all_x = all_x[shuffle_idx]
        all_y = all_y[shuffle_idx]

    num_train = int(len_all_x * train_ratio)

    train_x, test_x = all_x[:num_train], all_x[num_train:]
    train_y, test_y = all_y[:num_train], all_y[num_train:]

    yield (train_x, train_y), (test_x, test_y)
Example #2
0
def load_global_view(train_ratio=0.9, shuffle_idx=None):
    """
    NOTE: it is a generator, NOT a normal function \n

    The reason for this is that "load_global_view"
    and "load_local_view" have to share shuffle indices. \n
    The indices is passed by other function, but this function
    don't know the length of the shuffle indices, which is yield
    by the function.\n

    yield twice
        1. length of all training data
        2. (train_x, train_y), (test_x, test_y) of global view (2001 bins)
    """
    with load_ctx(all_pc_flux_filename):
        all_pcs = np.loadtxt(all_pc_flux_filename)
    with load_ctx(all_non_pc_flux_filename):
        all_non_pcs = np.loadtxt(all_non_pc_flux_filename)
    all_x = np.concatenate([all_pcs, all_non_pcs])

    all_x = all_x.reshape(*all_x.shape, 1)
    all_y = np.concatenate([
        np.ones(len(all_pcs), dtype=np.int),
        np.zeros(len(all_non_pcs), dtype=np.int)
    ])
    len_all_x, len_all_y = len(all_x), len(all_y)

    assert len_all_x == len_all_y, f"data and label size different ({len_all_x} != {len_all_y})"

    shuffle_idx = yield len_all_x

    if shuffle_idx is not None:
        all_x = all_x[shuffle_idx]
        all_y = all_y[shuffle_idx]

    num_train = int(len_all_x * train_ratio)
    train_x, test_x = all_x[:num_train], all_x[num_train:]
    train_y, test_y = all_y[:num_train], all_y[num_train:]

    yield (train_x, train_y), (test_x, test_y)
Example #3
0
def get_binned_normalized_PC_flux(num=1,
                                  merge=True,
                                  shuffle=False,
                                  num_bins=num_bins,
                                  overwrite=False,
                                  bin_width=None,
                                  return_kepids=False,
                                  scramble_id=None,
                                  nth=1):
    if num == np.inf and os.path.exists(all_pc_flux_filename) \
            and not overwrite:
        print('argument "return_kepids" is ignored')
        with load_ctx(all_pc_flux_filename):
            res = np.loadtxt(all_pc_flux_filename)
        return res

    if shuffle:
        if nth == 1:
            print("shuffle used")
        else:
            shuffle = False  # ignore shuffle
            print("shuffle ignored because nth != 1")

    all_pcs = get_PC_IDs(num=num, shuffle=shuffle, nth=nth)
    # print(all_pcs)
    pcs, others = get_binned_normalized_flux_by_IDs(all_pcs,
                                                    merge=merge,
                                                    overwrite=overwrite,
                                                    num_bins=num_bins,
                                                    bin_width=bin_width,
                                                    scramble_id=scramble_id)

    if others is not None:
        # raise ValueError(f'{others} should be None')
        """
        do nothing
        """

    # 1. all_pc_flux_filename not exist, and num==np.inf
    # 2. overwrite
    # Under the two situations, the final file will be written
    if num == np.inf and (not os.path.exists(all_pc_flux_filename)
                          or overwrite):
        # write all data to a file
        # filename is import from config.py
        with save_ctx(all_pc_flux_filename):
            np.savetxt(all_pc_flux_filename, pcs, fmt=_float_fmt)

    res = pcs if not return_kepids else (pcs, all_pcs)
    return res
Example #4
0
def get_local_binned_normalized_Non_PC_flux(num=1,
                                            merge=True,
                                            shuffle=False,
                                            overwrite=False,
                                            scramble_id=None,
                                            return_kepids=False,
                                            nth=1):
    """
    if num == np.inf, return all flux (default to 1)
    return binned, normalized local view of Non-PC flux
    """
    if num == np.inf and os.path.exists(local_all_non_pc_flux_filename) \
            and not overwrite:
        with load_ctx(local_all_non_pc_flux_filename):
            res = np.loadtxt(local_all_non_pc_flux_filename)
        return res

    if shuffle:
        if nth == 1:
            print("shuffle used")
        else:
            print("shuffle ignored")
            shuffle = False
    all_non_pcs = get_NonPC_IDs(num=num, shuffle=shuffle, nth=nth)
    # print(all_pcs)
    pcs, others = get_binned_local_view_by_IDs(all_non_pcs,
                                               merge=merge,
                                               overwrite=overwrite,
                                               scramble_id=scramble_id)

    if pcs is not None:
        # raise ValueError(f'{pcs} should be None')
        pass

    if num == np.inf and (not os.path.exists(local_all_non_pc_flux_filename)
                          or overwrite):
        # write all data to a file
        # filename is import from config.py
        with save_ctx(local_all_non_pc_flux_filename):
            np.save(local_all_non_pc_flux_filename, others, fmt=_float_fmt)
    if return_kepids:
        return others, all_non_pcs
    return others
Example #5
0
def get_binned_normalized_Non_PC_flux(num=1,
                                      merge=True,
                                      shuffle=False,
                                      overwrite=False,
                                      num_bins=num_bins,
                                      bin_width=None,
                                      return_kepids=False,
                                      scramble_id=None,
                                      nth=1):
    if num == np.inf and os.path.exists(all_non_pc_flux_filename) \
            and not overwrite:
        with load_ctx(all_non_pc_flux_filename):
            res = np.loadtxt(all_non_pc_flux_filename)
        return res

    if shuffle:
        if nth == 1:
            print("shuffle used")
        else:
            shuffle = False  # ignore shuffle
            print("shuffle ignored because nth != 1")

    all_others = get_NonPC_IDs(num=num, shuffle=shuffle, nth=nth)
    pcs, others = get_binned_normalized_flux_by_IDs(all_others,
                                                    merge=merge,
                                                    overwrite=overwrite,
                                                    num_bins=num_bins,
                                                    bin_width=bin_width,
                                                    scramble_id=scramble_id)

    if pcs is not None:
        # raise ValueError(f'{pcs} should be None')
        pass

    if num == np.inf and (not os.path.exists(all_non_pc_flux_filename)
                          or overwrite):
        # write to file (import from config.py)
        with save_ctx(all_non_pc_flux_filename):
            np.savetxt(all_non_pc_flux_filename, others, fmt=_float_fmt)
    res = others if not return_kepids else (others, all_others)
    return res