예제 #1
0
def main():
    args = parse_arguments()
    # checking if the arguments are valid
    assert is_ndarray_folder(args.path), "Invalid path."
    # factorizing the data
    dims = [1] + list(range(5, args.max_dim, 5)) + [args.max_dim]
    table = []
    if args.matrix:
        ndarray = sparse.load_npz(join(args.path, 'matrix.npz')).todense()
    else:
        ndarray = sparse.load_npz(join(args.path, 'tensor.npz')).todense()
    for d in tqdm(dims):
        err_sum, max_err_sum, mean_abs_err_sum, mean_sq_err_sum = 0, 0, 0, 0
        args.dimensions = d
        for _ in range(args.iterations):
            err, max_err, mean_abs_err, mean_sq_err = \
                get_factorization_error(ndarray, args)
            err_sum += err
            max_err_sum += max_err
            mean_abs_err_sum += mean_abs_err
            mean_sq_err_sum += mean_sq_err
        table.append([
            d, err_sum / args.iterations, max_err_sum / args.iterations,
            mean_abs_err_sum / args.iterations,
            mean_sq_err_sum / args.iterations
        ])
    print(
        tabulate(table,
                 headers=[
                     'Dim', 'Err.', 'Max Err.', 'Mean Abs. Err.',
                     'Mean Sq. Err.'
                 ],
                 tablefmt='orgtbl'))
예제 #2
0
def run_cmtf(path,
             dimensions=20,
             is_matrix=True,
             name='current',
             noise='orig',
             iterations=1,
             fixed='False'):
    # checking if the arguments are valid
    assert is_ndarray_folder(path), "Not a valid path."
    assert '_' not in name, "Name cannot contain the '_' symbol."
    # creating some useful paths to store factorization results
    mat_path = join(
        path, 'mat_' + str(dimensions) + '_' + str(iterations) + '_' + name)
    ten_path = join(
        path, 'ten_' + str(dimensions) + '_' + str(iterations) + '_' + name)
    # loading the meta data
    with open(join(path, 'meta_data.json'), 'r') as json_file:
        meta_data = json.load(json_file)
    # removing old factorization with same name (if exists)
    delete_factorization_by_name(name, path)
    # factorizing the data
    start = time.time()
    if is_matrix:
        matrix = sparse.load_npz(join(path, 'matrix.npz')).todense()
        matrices = prepare_ndarrays(matrix, iterations, fixed, noise)
        create_folder_if_absent(mat_path)
        factorize_matrices(matrices, iterations, dimensions, mat_path, path)
    else:
        tensor = sparse.load_npz(join(path, 'tensor.npz')).todense()
        tensors = prepare_ndarrays(tensor, iterations, fixed, noise)
        create_folder_if_absent(ten_path)
        factorize_tensors(tensors, iterations, dimensions, ten_path, meta_data,
                          path)
    end = time.time()
    print('Factorization completed in %d seconds' % (end - start))
예제 #3
0
def test_load_wrong_format_exception(tmp_path):
    x = np.array([1, 2, 3])

    filename = tmp_path / "mat.npz"

    np.savez(filename, x)
    with pytest.raises(RuntimeError):
        load_npz(filename)
예제 #4
0
def test_load_wrong_format_exception():
    with tempfile.TemporaryDirectory() as tmp_path_str:
        tmp_path = pathlib.Path(tmp_path_str)
        x = np.array([1, 2, 3])

        filename = tmp_path / "mat.npz"

        np.savez(filename, x)
        with pytest.raises(RuntimeError):
            load_npz(filename)
예제 #5
0
def test_load_wrong_format_exception():
    x = np.array([1, 2, 3])

    dir_name = tempfile.mkdtemp()
    filename = os.path.join(dir_name, 'mat.npz')

    np.savez(filename, x)
    with pytest.raises(RuntimeError):
        load_npz(filename)

    shutil.rmtree(dir_name)
예제 #6
0
def get_benchmark_test_theta(fuse=False, theta=None, batch_size=64):
    task = 'mortality'
    duration = 48
    timestep = 1.0
    df_label_all = pd.read_csv(
        data_path + 'population/{}_{}h.csv'.format(task, duration)).rename(
            columns={'{}_LABEL'.format(task): 'LABEL'})
    df_label = pd.read_csv(data_path +
                           'population/pop.mortality_benchmark.csv').rename(
                               columns={'{}_LABEL'.format(task): 'LABEL'})

    X = sparse.load_npz(
        data_path +
        'features,comparison/theta={},outcome={},T={},dt={}/X.npz'.format(
            theta, task, duration, timestep)).todense()
    s = sparse.load_npz(
        data_path +
        'features,comparison/theta={},outcome={},T={},dt={}/s.npz'.format(
            theta, task, duration, timestep)).todense()

    te_idx = [
        df_label_all[df_label_all['ICUSTAY_ID'] == ID].index.values[0]
        for ID in df_label[df_label['partition'] == 'test']['ID']
    ]

    def _select_examples(rows):
        return (
            X[rows],
            s[rows],
            df_label_all.iloc[rows][['LABEL']].values,
        )

    Xy_te = _select_examples(te_idx)
    print('ICU stay splits:', len(te_idx))

    te = EHRDataset(*Xy_te, fuse=fuse)

    num_workers = 1
    te_loader = DataLoader(te,
                           batch_size=batch_size,
                           shuffle=False,
                           num_workers=num_workers,
                           pin_memory=True)

    print(te_loader.dataset.y.sum())
    print('')
    print('Time series shape, Static shape, Label shape, Class balance:')
    print('\t', 'te', te_loader.dataset.X.shape, te_loader.dataset.s.shape,
          te_loader.dataset.y.shape, te_loader.dataset.y.mean())

    if fuse:
        print('Fused dimensions:', te_loader.dataset[0][0].shape)

    return te_loader
예제 #7
0
파일: qm.py 프로젝트: sametz/nmrsim
def _so_sparse(nspins):
    """
    Either load a presaved set of spin operators as numpy arrays, or
    calculate them and save them if a presaved set wasn't found.

    Parameters
    ----------
    nspins : int
        the number of spins in the spin system

    Returns
    -------
    (Lz, Lproduct) : a tuple of:
        Lz : 3d sparse.COO array of shape (n, 2^n, 2^n) representing
            [Lz1, Lz2, ...Lzn]
        Lproduct : 4d sparse.COO array of shape (n, n, 2^n, 2^n), representing
            an n x n array (cartesian product) for all combinations of
            Lxa*Lxb + Lya*Lyb + Lza*Lzb, where 1 <= a, b <= n.

    Side Effect
    -----------
    Saves the results as .npz files to the bin directory if they were not
    found there.
    """
    # TODO: once nmrsim demonstrates installing via the PyPI *test* server,
    # need to determine how the saved solutions will be handled. For example,
    # part of the final build may be generating these files then testing.
    # Also, need to consider different users with different system capabilities
    # (e.g. at extreme, Raspberry Pi). Some way to let user select, or select
    # for user?
    filename_Lz = f'Lz{nspins}.npz'
    filename_Lproduct = f'Lproduct{nspins}.npz'
    bin_path = _bin_path()
    path_Lz = bin_path.joinpath(filename_Lz)
    path_Lproduct = bin_path.joinpath(filename_Lproduct)
    # with path_context_Lz as p:
    #     path_Lz = p
    # with path_context_Lproduct as p:
    #     path_Lproduct = p
    try:
        Lz = sparse.load_npz(path_Lz)
        Lproduct = sparse.load_npz(path_Lproduct)
        return Lz, Lproduct
    except FileNotFoundError:
        print('no SO file ', path_Lz, ' found.')
        print(f'creating {filename_Lz} and {filename_Lproduct}')
    Lz, Lproduct = _so_dense(nspins)
    Lz_sparse = sparse.COO(Lz)
    Lproduct_sparse = sparse.COO(Lproduct)
    sparse.save_npz(path_Lz, Lz_sparse)
    sparse.save_npz(path_Lproduct, Lproduct_sparse)

    return Lz_sparse, Lproduct_sparse
예제 #8
0
def load_one(path=PATH + "/" + SAVE_FILE_NAME):
    """
    Load the matrix from a single file.
    """
    data = sparse.load_npz(path).astype(np.int16).todense()
    print("Finished loading " + path)
    return data
예제 #9
0
    def __init__(self, base_path, dataset_name, from_day_incl, dense=True):
        if not isinstance(base_path, pathlib.Path):
            base_path = pathlib.Path(base_path)

        self.from_day_incl = from_day_incl
        self.dataset_name = dataset_name

        self.imls_log_daily = sparse.load_npz(base_path / f"interactions_{dataset_name}_sparse.npz")

        self.dense = dense
        if self.dense:
            self.imls_log_daily = self.imls_log_daily.todense()

        self.num_days = self.imls_log_daily.shape[0]
        self.num_entities = self.imls_log_daily.shape[1]
        self.num_matrices = self.imls_log_daily.shape[-1]

        self.alive_df = pd.read_csv(
            base_path / f"alive_{dataset_name}.csv", parse_dates=["date_emerged"]
        )
        self.num_classes = len(self.alive_df.date_emerged.unique())

        indices_df = pd.read_csv(base_path / f"indices_{dataset_name}.csv")
        self.id_to_idx = dict(indices_df.values)
        self.idx_to_id = dict(indices_df.values[:, ::-1])

        self.bee_ages, self.valid_ages = self.parse_bee_ages(
            self.alive_df, from_day_incl, self.num_days, self.num_entities
        )

        self.labels = self.parse_labels(self.alive_df)
예제 #10
0
파일: qm.py 프로젝트: sametz/panel-test
def cache_tm(nspins):
    """

    Parameters
    ----------
    nspins

    Returns
    -------

    """
    """spin11 test indicates this leads to faster overall simsignals().

    11 spin x 6: 29.6 vs. 35.1 s
    8 spin x 60: 2.2 vs 3.0 s"""
    filename = f'T{nspins}.npz'
    bin_dir = os.path.join(os.path.dirname(__file__), 'bin')
    path = os.path.join(bin_dir, filename)
    try:
        T = sparse.load_npz(path)
        return T
    except FileNotFoundError:
        print(f'creating {filename}')
        T = transition_matrix_dense(nspins)
        T_sparse = sparse.COO(T)
        sparse.save_npz(path, T_sparse)
        return T_sparse
예제 #11
0
def load_each():
    """
    Load each file that has the name format containing the timestamps.
    """
    files = os.listdir(PATH)
    timestamps = []
    files.sort()
    prog = re.compile(r"20\d{6}\.\d{6}.npz$")
    for file in files:
        if prog.match(file):
            timestamps.append(file)
    # to get in order of timestamps

    data = None
    for ts_idx in range(len(timestamps)):
        if (ts_idx + 1) % 10 == 0:
            print(str(ts_idx + 1) + "/" + str(len(timestamps)) + " " + timestamps[ts_idx])
        # prints progress
        try:
            record = sparse.load_npz(PATH + "/" + timestamps[ts_idx])
            record = record.todense().astype(np.int16)
            #record = record.astype(np.int16)
            # if FIELDS != ALL_FIELDS:
            #     record = record[:, :, FIELDS]
            if ts_idx == 0:
                data = np.zeros((len(timestamps), *record.shape), dtype=np.int16)
            data[ts_idx] += record
        except BadZipFile as e:
            print(e, file)
    print("Loaded each")
    return data
예제 #12
0
def extract_relation_ind(rel_file, rel_index_file):
    """
	Save index array for nonzero entries of the 
	sparse relation tensor at rel_file.
	"""
    relations = sparse.load_npz(rel_file)
    relation_ind = sparse.argwhere(relations > 0)
    np.save(rel_index_file, relation_ind)
예제 #13
0
def test_save_load_npz_file(tmp_path, compression, format):
    x = sparse.random((2, 3, 4, 5), density=0.25, format=format)
    y = x.todense()

    filename = tmp_path / "mat.npz"
    save_npz(filename, x, compressed=compression)
    z = load_npz(filename)
    assert_eq(x, z)
    assert_eq(y, z.todense())
예제 #14
0
def so_sparse(nspins):
    filename_Lz = f'sparse_Lz{nspins}.npz'
    filename_Lproduct = f'sparse_Lproduct{nspins}.npz'
    try:
        Lz = sparse.load_npz(filename_Lz)
        Lproduct = sparse.load_npz(filename_Lproduct)
        return Lz, Lproduct
    except FileNotFoundError:
        print(f'creating {filename_Lz} and {filename_Lproduct}')
    sigma_x = np.array([[0, 1 / 2], [1 / 2, 0]])
    sigma_y = np.array([[0, -1j / 2], [1j / 2, 0]])
    sigma_z = np.array([[1 / 2, 0], [0, -1 / 2]])
    unit = np.array([[1, 0], [0, 1]])

    L = np.empty((3, nspins, 2 ** nspins, 2 ** nspins), dtype=np.complex128)  # consider other dtype?
    for n in range(nspins):
        Lx_current = 1
        Ly_current = 1
        Lz_current = 1

        for k in range(nspins):
            if k == n:
                Lx_current = np.kron(Lx_current, sigma_x)
                Ly_current = np.kron(Ly_current, sigma_y)
                Lz_current = np.kron(Lz_current, sigma_z)
            else:
                Lx_current = np.kron(Lx_current, unit)
                Ly_current = np.kron(Ly_current, unit)
                Lz_current = np.kron(Lz_current, unit)

        L[0][n] = Lx_current
        L[1][n] = Ly_current
        L[2][n] = Lz_current
    L_T = L.transpose(1, 0, 2, 3)
    Lproduct = np.tensordot(L_T, L, axes=((1, 3), (0, 2))).swapaxes(1, 2)
    Lz_sparse = sparse.COO(L[2])
    # for i in range(nspins):
    #     for j in range(nspins):
    #         Lproduct[i, j] = csr_matrix(Lproduct[i, j])
    Lproduct_sparse = sparse.COO(Lproduct)
    sparse.save_npz(filename_Lz, Lz_sparse)
    sparse.save_npz(filename_Lproduct, Lproduct_sparse)

    return Lz_sparse, Lproduct_sparse
예제 #15
0
def get_benchmark_splits_ordinal(fuse=False, batch_size=64):
    task = 'mortality'
    duration = 48.0
    timestep = 1.0
    df_label = pd.read_csv('/data4/tangsp/FIDDLE/mimic3_experiments/data/processed/population/pop.mortality_benchmark.csv').rename(columns={'{}_LABEL'.format(task): 'LABEL'})
    X = sparse.load_npz('../data/features,ablations/ordinal,benchmark,outcome={},T={},dt={}/X.npz'.format(task, duration, timestep)).todense()
    s = sparse.load_npz('../data/features,ablations/ordinal,benchmark,outcome={},T={},dt={}/s.npz'.format(task, duration, timestep)).todense()
    
    tr_idx = df_label[df_label['partition'] == 'train'].index.values
    va_idx = df_label[df_label['partition'] == 'val'  ].index.values
    te_idx = df_label[df_label['partition'] == 'test' ].index.values
    
    def _select_examples(rows):
        return (
            X[rows], 
            s[rows], 
            df_label.iloc[rows][['LABEL']].values,
        )
    
    Xy_tr = _select_examples(tr_idx)
    Xy_va = _select_examples(va_idx)
    Xy_te = _select_examples(te_idx)
    print('ICU stay splits:', len(tr_idx), len(va_idx), len(te_idx))
    
    te = EHRDataset(*Xy_te, fuse=fuse)
    va = EHRDataset(*Xy_va, fuse=fuse)
    tr = EHRDataset(*Xy_tr, fuse=fuse)
    
    num_workers = 1
    tr_loader = DataLoader(tr, batch_size=batch_size, shuffle=True , num_workers=num_workers, pin_memory=True)
    va_loader = DataLoader(va, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
    te_loader = DataLoader(te, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
    
    print(tr_loader.dataset.y.sum() + va_loader.dataset.y.sum() + te_loader.dataset.y.sum(), '/', X.shape[0])
    print('')
    print('Time series shape, Static shape, Label shape, Class balance:')
    print('\t', 'tr', tr_loader.dataset.X.shape, tr_loader.dataset.s.shape, tr_loader.dataset.y.shape, tr_loader.dataset.y.mean())
    print('\t', 'va', va_loader.dataset.X.shape, va_loader.dataset.s.shape, va_loader.dataset.y.shape, va_loader.dataset.y.mean())
    print('\t', 'te', te_loader.dataset.X.shape, te_loader.dataset.s.shape, te_loader.dataset.y.shape, te_loader.dataset.y.mean())
    
    if fuse:
        print('Fused dimensions:', tr_loader.dataset[0][0].shape)
    
    return tr_loader, va_loader, te_loader
예제 #16
0
파일: qm.py 프로젝트: sametz/panel-test
def so_sparse(nspins):
    """Either load a presaved set of spin operators as numpy arrays, or
    calculate them and save them if a presaved set wasn't found.

    Parameters
    ----------
    nspins: int
        the number of spins in the spin system

    Returns
    -------
    (Lz, Lproduct): a tuple of:
        Lz: 3d sparse.COO array of shape (n, 2^n, 2^n) representing
        [Lz1, Lz2, ...Lzn]
        Lproduct: 4d sparse.COO array of shape (n, n, 2^n, 2^n), representing
        an n x n array (cartesian product) for all combinations of
        Lxa*Lxb + Lya*Lyb + Lza*Lzb, where 1 <= a, b <= n.

    Side Effect
    -----------
    Saves the results as .npz files to the bin directory if they were not
    found there.
    """
    filename_Lz = f'Lz{nspins}.npz'
    filename_Lproduct = f'Lproduct{nspins}.npz'
    bin_dir = os.path.join(os.path.dirname(__file__), 'bin')
    path_Lz = os.path.join(bin_dir, filename_Lz)
    path_Lproduct = os.path.join(bin_dir, filename_Lproduct)

    try:
        Lz = sparse.load_npz(path_Lz)
        Lproduct = sparse.load_npz(path_Lproduct)
        return Lz, Lproduct
    except FileNotFoundError:
        print('no SO file ', filename_Lz, ' found in: ', bin_dir)
        print(f'creating {filename_Lz} and {filename_Lproduct}')
    Lz, Lproduct = so_dense(nspins)
    Lz_sparse = sparse.COO(Lz)
    Lproduct_sparse = sparse.COO(Lproduct)
    sparse.save_npz(path_Lz, Lz_sparse)
    sparse.save_npz(path_Lproduct, Lproduct_sparse)

    return Lz_sparse, Lproduct_sparse
예제 #17
0
def test_save_load_npz_file(compression, format):
    with tempfile.TemporaryDirectory() as tmp_path_str:
        tmp_path = pathlib.Path(tmp_path_str)
        x = sparse.random((2, 3, 4, 5), density=0.25, format=format)
        y = x.todense()

        filename = tmp_path / "mat.npz"
        save_npz(filename, x, compressed=compression)
        z = load_npz(filename)
        assert_eq(x, z)
        assert_eq(y, z.todense())
예제 #18
0
def readMatrix(filename):
    try:
        return sparse.load_npz(filename.replace(".txt", ".npz"))
    except:
        with open(filename, 'r') as outfile:
            dims = outfile.readline().replace("# Array shape: (", "").replace(
                ")", "").replace("\n", "").split(", ")
            for i in range(len(dims)):
                dims[i] = int(dims[i])

        new_data = np.loadtxt(filename).reshape(dims)
        return new_data
예제 #19
0
def test_save_load_npz_file(compression):
    x = sparse.random((2, 3, 4, 5), density=.25)
    y = x.todense()

    dir_name = tempfile.mkdtemp()
    filename = os.path.join(dir_name, 'mat.npz')

    save_npz(filename, x, compressed=compression)
    z = load_npz(filename)
    assert_eq(x, z)
    assert_eq(y, z.todense())

    shutil.rmtree(dir_name)
예제 #20
0
    def matrix_load(self, path):
        ''' Loads a previously saved matrix from a .npz file (containing the matrix) and a .nfo file (containing the matrix tags). '''

        # load matrix
        matrix = sparse.load_npz(os.path.splitext(path)[0] + '.npz')
        matrix = sparse.DOK(
            matrix)  # convert to dict-of-keys for faster indexing

        # load matrix tags
        with open(os.path.splitext(path)[0] + '.nfo', 'rb') as f:
            tags = pickle.load(f)

        return matrix, tags
예제 #21
0
def load_expectation(expectation_file_name, type_=None):  # pragma: no cover
    """Returns np.ndarray related to the *expectation_file_name*.

    Expectation file path is rooted at tests/expectations.
    """
    thisdir = os.path.dirname(__file__)
    expectation_file_path = os.path.abspath(
        os.path.join(thisdir, "expectations",
                     f"{expectation_file_name}.{type_}"))
    if type_ == "npy":
        expectation_data = np.load(expectation_file_path)
    elif type_ == "npz":
        expectation_data = sparse.load_npz(expectation_file_path)
    elif type_ == "png":
        expectation_data = Image.open(expectation_file_path)
    else:
        raise Exception("Type format not recognized")
    return expectation_data
예제 #22
0
def resolve_relations(db_file, rel_file, meta_file, id_file):
    """
	"""
    conn = open_db_connection(db_file)
    c = conn.cursor()

    # load or compute unique IDs
    if os.path.isfile(meta_file):
        meta = np.load(meta_file)
        off = meta[0]
        num_unique = meta[1]
        unique_ids = np.load(id_file)
    else:
        off = 0
        c.execute("SELECT DISTINCT event1_id FROM Relations;")
        event_ids = set(c.fetchall())
        for id2 in c.execute("SELECT event2_id FROM Relations;"):
            if not id2 in event_ids:
                event_ids.add(id2)
        unique_ids = np.char.array(list(event_ids))
        num_unique = len(event_ids)
        np.save(id_file, unique_ids)
        np.save(meta_file, np.array([off, num_unique]))

    id_lookup = dict()
    for i, id_entr in enumerate(unique_ids):
        id_lookup[id_entr[0]] = i

    # load or compute (compressed) relations
    if os.path.isfile(rel_file):
        relations = sparse.load_npz(rel_file)
    else:
        relations = sparse.DOK((num_unique, num_unique, RELATION_COUNT),
                               dtype=np.float32)
        for row in c.execute("SELECT * FROM Relations;"):
            id_out = row[1]
            id_in = row[2]
            relations[id_lookup[id_out], id_lookup[id_in], :] = row[3:]
        relations = sparse.COO(relations)
        sparse.save_npz(rel_file, relations)

    conn.close()
예제 #23
0
def cache_tm(n):
    """spin11 test indicates this leads to faster overall simsignals().

    11 spin x 6: 29.6 vs. 35.1 s
    8 spin x 60: 2.2 vs 3.0 s"""
    filename = f'transitions{n}.npz'
    try:
        T = sparse.load_npz(filename)
        return T
    except FileNotFoundError:
        print(f'creating {filename}')
        T = np.zeros((n, n))
        for i in range(n - 1):
            for j in range(i + 1, n):
                if bin(i ^ j).count('1') == 1:
                    T[i, j] = 1
        # T = T + T.T
        T += T.T
        T_sparse = sparse.COO(T)
        sparse.save_npz(filename, T_sparse)
        return T_sparse
예제 #24
0
파일: qm.py 프로젝트: sametz/nmrsim
def _tm_cache(nspins):
    """
    Loads a saved sparse transition matrix if it exists, or creates and saves
    one if it is not.

    Parameters
    ----------
    nspins : int
        The number of spins in the spin system.

    Returns
    -------
    T_sparse : sparse.COO
        The sparse transition matrix.

    Side Effects
    ------------
    Saves a sparse array to the bin folder if the required array was not
    found there.
    """
    # Speed tests indicated that using sparse-array transition matrices
    # provides a modest speed improvement on larger spin systems.
    filename = f'T{nspins}.npz'
    # init_path_context = resources.path(nmrsim.bin, '__init__.py')
    # with init_path_context as p:
    #     init_path = p
    # print('path to init: ', init_path)
    # bin_path = init_path.parent
    bin_path = _bin_path()
    path = bin_path.joinpath(filename)
    try:
        T_sparse = sparse.load_npz(path)
        return T_sparse
    except FileNotFoundError:
        print(f'creating {filename}')
        T_sparse = _transition_matrix_dense(nspins)
        T_sparse = sparse.COO(T_sparse)
        print('_tm_cache will save on path: ', path)
        sparse.save_npz(path, T_sparse)
        return T_sparse
예제 #25
0
 def __init__(self, path, neg_sample_ratio, min_freq, is_matrix=True):
     ndarray_file = 'matrix.npz' if is_matrix else 'tensor.npz'
     self.ndarray = sparse.load_npz(join(path, ndarray_file)).todense()
     # Creating positive instances
     nonzeros = (self.ndarray > min_freq).nonzero()
     print('# of non-zero cells is {}'.format(len(nonzeros[0])))
     print('Density is {}%'.format(len(nonzeros[0]) / self.ndarray.size))
     pos_points = list(zip(*nonzeros))
     # Creating negative instances (if there are enough negatives)
     assert len(nonzeros[0]) * (neg_sample_ratio + 1) < self.ndarray.size
     neg_points, seen_points = [], set(pos_points)
     while len(neg_points) < len(nonzeros[0]) * neg_sample_ratio:
         sample = tuple([np.random.choice(d) for d in self.ndarray.shape])
         if sample not in seen_points:
             seen_points.add(tuple(sample))
             neg_points.append(tuple(sample))
     # Combining all points and labels
     self.points = [np.array(x) for x in (pos_points + neg_points)]
     self.labels = [1] * len(pos_points) + [0] * len(neg_points)
     # Shuffling the points
     zipped = list(zip(self.points, self.labels))
     random.shuffle(zipped)
     self.points, self.labels = zip(*zipped)
예제 #26
0
np.savetxt('fig8_rayflare.txt', RAT['A_bulk'][0])

from angles import make_angle_vector
from config import results_path
from sparse import load_npz


_, _, angle_vector = make_angle_vector(options['n_theta_bins'], options['phi_symmetry'],
                                       options['c_azimuth'])

wl_to_plot = 1100e-9

wl_index = np.argmin(np.abs(wavelengths-wl_to_plot))

sprs = load_npz(os.path.join(results_path, options['project_name'], SC[0].name + 'rearRT.npz'))

full = sprs[wl_index].todense()

summat = theta_summary(full, angle_vector, options['n_theta_bins'], 'rear')

summat_r = summat[options['n_theta_bins']:, :]

summat_r= summat_r.rename({r'$\theta_{in}$': 'a', r'$\theta_{out}$': 'b'})

summat_r= summat_r.assign_coords(a=np.sin(summat_r.coords['a']).data,
                                                  b=np.sin(summat_r.coords['b']).data)


summat_r= summat_r.rename({'a': r'$\sin(\theta_{in})$', 'b': r'$\sin(\theta_{out})$'})
예제 #27
0
def matrix_multiplication(bulk_mats,
                          bulk_thick,
                          options,
                          layer_widths=[],
                          n_layers=[],
                          layer_names=[],
                          calc_prof_list=[]):
    n_bulks = len(bulk_mats)
    n_interfaces = n_bulks + 1

    theta_intv, phi_intv, angle_vector = make_angle_vector(
        options['n_theta_bins'], options['phi_symmetry'], options['c_azimuth'])
    n_a_in = int(len(angle_vector) / 2)

    num_wl = len(options['wavelengths'])

    #wls = np.linspace(600, 1100, num_wl)*1e-9
    #pol = 's'

    # bulk thickness in m

    thetas = angle_vector[:n_a_in, 1]

    v0 = make_v0(options['theta_in'], options['phi_in'], num_wl,
                 options['n_theta_bins'], options['c_azimuth'],
                 options['phi_symmetry'])

    up2down, down2up = out_to_in_matrix(options['phi_symmetry'], angle_vector,
                                        theta_intv, phi_intv)

    D = []
    for i1 in range(n_bulks):
        D.append(
            make_D(bulk_mats[i1].alpha(options['wavelengths']), bulk_thick[i1],
                   thetas))

    #unique_thetas = np.unique(thetas)

    # front incidence matrices
    Rf = []
    Tf = []
    Af = []
    Pf = []
    If = []
    side = 1

    for i1 in range(n_interfaces):
        mat_path = os.path.join(results_path, options['project_name'],
                                layer_names[i1] + 'frontRT.npz')
        absmat_path = os.path.join(results_path, options['project_name'],
                                   layer_names[i1] + 'frontA.npz')

        fullmat = load_npz(mat_path)
        absmat = load_npz(absmat_path)

        #if len(fullmat.shape) == 3:
        Rf.append(fullmat[:, :n_a_in, :])
        Tf.append(fullmat[:, n_a_in:, :])
        Af.append(absmat)

        #else:
        #    print(fullmat.shape)
        #    Rf.append(fullmat[:n_a_in, :])
        #    Tf.append(fullmat[n_a_in:, :])
        #    Af.append(absmat)

        if len(calc_prof_list[i1]) > 0:
            #profile, intgr = make_profile_data(options, unique_thetas, n_a_in, side,
            #                                   layer_names[i1], n_layers[i1], layer_widths[i1])
            profmat_path = os.path.join(results_path, options['project_name'],
                                        layer_names[i1] + 'frontprofmat.nc')
            prof_int = xr.load_dataset(profmat_path)
            profile = prof_int['profile']
            intgr = prof_int['intgr']
            Pf.append(profile)
            If.append(intgr)

        else:
            Pf.append([])
            If.append([])

    # rear incidence matrices
    Rb = []
    Tb = []
    Ab = []
    Pb = []
    Ib = []
    paramsb = []
    side = -1

    for i1 in range(n_interfaces - 1):
        mat_path = os.path.join(results_path, options['project_name'],
                                layer_names[i1] + 'rearRT.npz')
        absmat_path = os.path.join(results_path, options['project_name'],
                                   layer_names[i1] + 'rearA.npz')

        fullmat = load_npz(mat_path)
        absmat = load_npz(absmat_path)

        #if len(fullmat.shape) == 3:
        Rb.append(fullmat[:, :n_a_in, :])
        Tb.append(fullmat[:, n_a_in:, :])
        Ab.append(absmat)

        #else:
        #    Rb.append(fullmat[:n_a_in, :])
        #    Tb.append(fullmat[n_a_in:, :])
        #    Ab.append(absmat)

        if len(calc_prof_list[i1]) > 0:
            #profile, intgr = make_profile_data(options, unique_thetas, n_a_in, side,
            #                                   layer_names[i1], n_layers[i1], layer_widths[i1])
            profmat_path = os.path.join(results_path, options['project_name'],
                                        layer_names[i1] + 'rearprofmat.nc')
            prof_int = xr.load_dataset(profmat_path)
            profile = prof_int['profile']
            intgr = prof_int['intgr']
            Pb.append(profile)
            Ib.append(intgr)

        else:
            Pb.append([])
            Ib.append([])

    len_calcs = np.array([len(x) for x in calc_prof_list])
    print(len_calcs)
    print(np.any(len_calcs > 0))

    if np.any(len_calcs > 0):
        print('a')
        a = [[] for _ in range(n_interfaces)]
        a_prof = [[] for _ in range(n_interfaces)]
        vr = [[] for _ in range(n_bulks)]
        vt = [[] for _ in range(n_bulks)]
        A = [[] for _ in range(n_bulks)]
        A_prof = [[] for _ in range(n_bulks)]

        vf_1 = [[] for _ in range(n_interfaces)]
        vb_1 = [[] for _ in range(n_interfaces)]
        vf_2 = [[] for _ in range(n_interfaces)]
        vb_2 = [[] for _ in range(n_interfaces)]

        for i1 in range(n_bulks):

            #z = xr.DataArray(np.arange(0, bulk_thick[i1], options['nm_spacing']*1e-9), dims='z')
            # v0 is actually travelling down, but no reason to start in 'outgoing' ray format.
            vf_1[i1] = dot_wl(Tf[i1], v0)  # pass through front surface
            vr[i1].append(dot_wl(Rf[i1], v0))  # reflected from front surface
            a[i1].append(dot_wl(
                Af[i1], v0))  # absorbed in front surface at first interaction
            #print(v0)
            #print(If[i1])

            if len(If[i1] > 0):
                v_xr = xr.DataArray(v0,
                                    dims=['wl', 'global_index'],
                                    coords={
                                        'wl': If[i1].coords['wl'],
                                        'global_index': np.arange(0, n_a_in)
                                    })
                int_power = xr.dot(v_xr, If[i1], dims='global_index')
                scale = (np.sum(dot_wl(Af[i1], v0), 1) / int_power).fillna(0)

                a_prof[i1].append(
                    (scale * xr.dot(v_xr, Pf[i1], dims='global_index')).data)

            power = np.sum(vf_1[i1], axis=1)

            # rep
            i2 = 1

            while np.any(power > options['I_thresh']):
                print(i2)
                #print(power)
                vf_1[i1] = dot_wl_u2d(down2up,
                                      vf_1[i1])  # outgoing to incoming
                vb_1[i1] = dot_wl(D[i1],
                                  vf_1[i1])  # pass through bulk, downwards
                # vb_1 already an incoming ray

                if len(If[i1 + 1]) > 0:

                    v_xr = xr.DataArray(vb_1[i1],
                                        dims=['wl', 'global_index'],
                                        coords={
                                            'wl': If[i1 + 1].coords['wl'],
                                            'global_index':
                                            np.arange(0, n_a_in)
                                        })
                    int_power = xr.dot(v_xr, If[i1 + 1], dims='global_index')
                    scale = (np.sum(dot_wl(Af[i1 + 1], vb_1[i1]), 1) /
                             int_power).fillna(0)
                    #('front profile')

                    a_prof[i1 + 1].append(
                        (scale *
                         xr.dot(v_xr, Pf[i1 + 1], dims='global_index')).data)

                #remaining_power.append(np.sum(vb_1, axis=1))
                A[i1].append(np.sum(vf_1[i1], 1) - np.sum(vb_1[i1], 1))

                nz_thetas = vf_1[i1] != 0

                vb_2[i1] = dot_wl(
                    Rf[i1 + 1],
                    vb_1[i1])  # reflect from back surface. incoming -> up

                vf_2[i1] = dot_wl(D[i1],
                                  vb_2[i1])  # pass through bulk, upwards

                #print('rear profile')
                if len(Ib[i1]) > 0:
                    v_xr = xr.DataArray(vf_2[i1],
                                        dims=['wl', 'global_index'],
                                        coords={
                                            'wl': Ib[i1].coords['wl'],
                                            'global_index':
                                            np.arange(0, n_a_in)
                                        })
                    int_power = xr.dot(v_xr, Ib[i1], dims='global_index')
                    scale = (np.sum(dot_wl(Ab[i1], vf_2[i1]), 1) /
                             int_power).fillna(0)
                    a_prof[i1].append(
                        (scale *
                         xr.dot(v_xr, Pb[i1], dims='global_index')).data)

                #remaining_power.append(np.sum(vf_2, axis=1))

                A[i1].append(np.sum(vb_2[i1], 1) - np.sum(vf_2[i1], 1))

                vf_2[i1] = dot_wl_u2d(up2down,
                                      vf_2[i1])  # prepare for rear incidence
                vf_1[i1] = dot_wl(Rb[i1],
                                  vf_2[i1])  # reflect from front surface
                power = np.sum(vf_1[i1], axis=1)

                # nz_thetas = vb_2[i1] != 0

                vr[i1].append(
                    dot_wl(Tb[i1], vf_2[i1])
                )  # matrix travelling up in medium 0, i.e. reflected overall by being transmitted through front surface
                vt[i1].append(
                    dot_wl(Tf[i1 + 1], vb_1[i1])
                )  # transmitted into medium below through back surface
                a[i1 + 1].append(dot_wl(Af[i1 + 1],
                                        vb_1[i1]))  # absorbed in 2nd surface
                a[i1].append(dot_wl(
                    Ab[i1],
                    vf_2[i1]))  # absorbed in 1st surface (from the back)

                i2 += 1

        vr = [np.array(item) for item in vr]
        vt = [np.array(item) for item in vt]
        a = [np.array(item) for item in a]
        A = [np.array(item) for item in A]
        a_prof = [np.array(item) for item in a_prof]

        results_per_pass = {'r': vr, 't': vt, 'a': a, 'A': A, 'a_prof': a_prof}

        # for i2 in range(3):
        #     for i1 in range(n_interfaces):
        #         plt.figure()
        #         z = np.arange(0, np.sum(layer_widths[i1]), options['nm_spacing'])
        #         plt.plot(z, a_prof[i1][i2, 0, :].T)
        #         plt.title(str(i2) + 'interface ' + str(i1))
        #         plt.show()
        sum_dims = ['bulk_index', 'wl']
        sum_coords = {
            'bulk_index': np.arange(0, n_bulks),
            'wl': options['wavelengths']
        }
        R = xr.DataArray(np.array([np.sum(item, (0, 2)) for item in vr]),
                         dims=sum_dims,
                         coords=sum_coords,
                         name='R')
        T = xr.DataArray(np.array([np.sum(item, (0, 2)) for item in vt]),
                         dims=sum_dims,
                         coords=sum_coords,
                         name='T')
        A_bulk = xr.DataArray(np.array([np.sum(item, 0) for item in A]),
                              dims=sum_dims,
                              coords=sum_coords,
                              name='A_bulk')

        A_interface = xr.DataArray(
            np.array([np.sum(item, (0, 2)) for item in a]),
            dims=['surf_index', 'wl'],
            coords={
                'surf_index': np.arange(0, n_interfaces),
                'wl': options['wavelengths']
            },
            name='A_interface')
        profile = []
        for j1, item in enumerate(a_prof):
            if len(item) > 0:
                profile.append(
                    xr.DataArray(np.sum(item, 0),
                                 dims=['wl', 'z'],
                                 coords={'wl': options['wavelengths']},
                                 name='A_profile' + str(j1))
                )  # not necessarily same number of z coords per layer stack

        bulk_profile = np.array(A_prof)

        RAT = xr.merge([R, A_bulk, A_interface, T])
        # for i2 in range(num_wl):
        #     plt.figure()
        #     for i1 in range(n_interfaces):
        #         z = np.arange(0, np.sum(layer_widths[i1]), options['nm_spacing'])
        #         plt.plot(z, profile[i1][i2].T)
        #
        #     plt.show()
        #
        # plt.figure()
        # for i2 in range(5):
        #     i1= 0
        #     z = np.arange(0, np.sum(layer_widths[i1]), options['nm_spacing'])
        #     plt.plot(z, a_prof[i1][i2, 0, :])
        #
        # plt.figure()
        # plt.plot(options['wavelengths'], R.T)
        # plt.plot(options['wavelengths'], T.T)
        # plt.plot(options['wavelengths'], A_interface.T)
        # plt.plot(options['wavelengths'], A_bulk.T)
        # plt.plot(options['wavelengths'], R[0] + T[0] + A_interface[0] + A_interface[1]+A_bulk[0])
        # plt.legend(['R', 'T', 'front', 'back', 'bulk'])
        # plt.show()

        #return R, T, A_bulk, A_interface, profile
        return RAT, results_per_pass, profile, bulk_profile

    else:
        print('b')
        a = [[] for _ in range(n_interfaces)]
        vr = [[] for _ in range(n_bulks)]
        vt = [[] for _ in range(n_bulks)]
        A = [[] for _ in range(n_bulks)]

        vf_1 = [[] for _ in range(n_interfaces)]
        vb_1 = [[] for _ in range(n_interfaces)]
        vf_2 = [[] for _ in range(n_interfaces)]
        vb_2 = [[] for _ in range(n_interfaces)]

        for i1 in range(n_bulks):

            vf_1[i1] = dot_wl(Tf[i1], v0)  # pass through front surface
            vr[i1].append(dot_wl(Rf[i1], v0))  # reflected from front surface
            a[i1].append(dot_wl(
                Af[i1], v0))  # absorbed in front surface at first interaction
            power = np.sum(vf_1[i1], axis=1)

            # rep
            i2 = 1

            while np.any(power > options['I_thresh']):
                print(i2)

                print('before d2u', np.sum(vf_1[i1]))
                vf_1[i1] = dot_wl_u2d(down2up,
                                      vf_1[i1])  # outgoing to incoming
                print('after 2du', np.sum(vf_1[i1]))
                #print('vf_1 after', vf_1[i1])
                vb_1[i1] = dot_wl(D[i1],
                                  vf_1[i1])  # pass through bulk, downwards
                print('before back ref', np.sum(vb_1[i1]))
                # remaining_power.append(np.sum(vb_1, axis=1))
                A[i1].append(np.sum(vf_1[i1], 1) - np.sum(vb_1[i1], 1))

                vb_2[i1] = dot_wl(Rf[i1 + 1],
                                  vb_1[i1])  # reflect from back surface
                print('after back ref', np.sum(vb_2[i1]))
                vf_2[i1] = dot_wl(D[i1],
                                  vb_2[i1])  # pass through bulk, upwards
                #print('vb_2', vb_2[i1])
                print('after u2d', np.sum(vf_2[i1]))
                vf_2[i1] = dot_wl_u2d(up2down,
                                      vf_2[i1])  # prepare for rear incidence
                print('after u2d/before front ref', np.sum(vf_2[i1]))
                vf_1[i1] = dot_wl(Rb[i1],
                                  vf_2[i1])  # reflect from front surface
                print('after front ref', np.sum(vf_1[i1]))
                #print('Rf, Rb, and vf2', Rf[i1][20].todense(), Rb[i1][20].todense(), vf_2[i1][20])
                #print('powersrem', np.sum(vb_2[i1], 1), np.sum(vf_2[i1], 1), np.sum(vf_1[i1], 1))
                # remaining_power.append(np.sum(vf_2, axis=1))
                A[i1].append(np.sum(vb_2[i1], 1) - np.sum(vf_2[i1], 1))
                power = np.sum(vf_1[i1], axis=1)
                #print('power', power)

                vr[i1].append(
                    dot_wl(Tb[i1], vf_2[i1])
                )  # matrix travelling up in medium 0, i.e. reflected overall by being transmitted through front surface
                print('lost in front ref', np.sum(vr[i1]))
                #print('Tf, vb1', Tf[i1 + 1][20].todense(), vb_1[i1][20])
                vt[i1].append(
                    dot_wl(Tf[i1 + 1], vb_1[i1])
                )  # transmitted into medium below through back surface
                print('lost in back ref', np.sum(vt[i1]))
                a[i1 + 1].append(dot_wl(Af[i1 + 1],
                                        vb_1[i1]))  # absorbed in 2nd surface
                a[i1].append(dot_wl(
                    Ab[i1],
                    vf_2[i1]))  # absorbed in 1st surface (from the back)

                i2 += 1

        vr = [np.array(item) for item in vr]
        vt = [np.array(item) for item in vt]
        a = [np.array(item) for item in a]
        A = [np.array(item) for item in A]

        results_per_pass = {'r': vr, 't': vt, 'a': a, 'A': A}

        sum_dims = ['bulk_index', 'wl']
        sum_coords = {
            'bulk_index': np.arange(0, n_bulks),
            'wl': options['wavelengths']
        }
        R = xr.DataArray(np.array([np.sum(item, (0, 2)) for item in vr]),
                         dims=sum_dims,
                         coords=sum_coords,
                         name='R')
        if i2 > 1:
            A_bulk = xr.DataArray(np.array([np.sum(item, 0) for item in A]),
                                  dims=sum_dims,
                                  coords=sum_coords,
                                  name='A_bulk')

            T = xr.DataArray(np.array([np.sum(item, (0, 2)) for item in vt]),
                             dims=sum_dims,
                             coords=sum_coords,
                             name='T')

            RAT = xr.merge([R, A_bulk, T])

        else:
            RAT = xr.merge([R])

        return RAT, results_per_pass
plt.plot(options['wavelengths'] * 1e9,
         TMM_res['A_per_layer'][len(front_materials) + 1],
         label='Ge')
plt.plot(options['wavelengths'] * 1e9,
         TMM_res['A_per_layer'][1] + TMM_res['A_per_layer'][2],
         label='ARC')
plt.plot(options['wavelengths'] * 1e9,
         TMM_res['A_per_layer'][3],
         label='InGaP')
plt.plot(options['wavelengths'] * 1e9, TMM_res['A_per_layer'][4], label='GaAs')
plt.plot(options['wavelengths'] * 1e9,
         TMM_res['A_per_layer'][5],
         label='SiGeSn')
plt.xlabel('Wavelength (nm)')
plt.ylabel('Reflection / Absorption')
#plt.legend()
plt.show()

from sparse import load_npz

RTmat = load_npz(
    '/home/phoebe/Documents/rayflare/results/test_matrix2/GaInP_GaAs_SiGeSn_RTfrontRT.npz'
)

TMMmat = load_npz(
    '/home/phoebe/Documents/rayflare/results/test_matrix2/GaInP_GaAs_SiGeSn_TMMfrontRT.npz'
)

RTmat_0 = RTmat[0].todense()

TMMmat_0 = TMMmat[0].todense()
예제 #29
0
    
    return clf

def save_test_predictions(y_true, y_score, model_name, save_dir):
#     import pathlib
#     pathlib.Path(save_dir).mkdir(parents=True, exist_ok=True)
    
    fname = save_dir + '{}.test.npz'.format(model_name)
    np.savez(
        open(fname, 'wb'),
        y_score = y_score,
        y_true  = y_true,
    )
    print('Test predictions saved to', fname)



import sparse
X = sparse.load_npz('output.clinical/X.npz').todense().squeeze()
s = sparse.load_npz('output.icd[0,1,2]/s.npz').todense()
X = np.concatenate((s, X), axis=1)

Xtr = X[df.partition=="train"]
ytr = df[df.partition=="train"]['label']
Xte = X[df.partition=="test"]
yte = df[df.partition=="test"]['label']
print(Xtr.shape, ytr.shape, Xte.shape, yte.shape)

train_model(Xtr, ytr, Xte, yte, 'LR', 'clinical+ICD[0,1,2]')
train_model(Xtr, ytr, Xte, yte, 'RF', 'clinical+ICD[0,1,2]')
예제 #30
0
    'lookuptable_angles': 200,
    #'prof_layers': [1,2],
    'n_rays': 500000,
    'random_angles': False,
    'nx': 15,
    'ny': 15,
    'parallel': True,
    'n_jobs': -1,
    'phi_symmetry': np.pi / 2,
    'only_incidence_angle': True
}

Si = material('Si_OPTOS')()

sprs = load_npz(
    os.path.join(results_path, options['project_name'],
                 'planar_back' + str(options['n_rays']) + 'frontRT.npz'))

_, _, angle_vector = make_angle_vector(options['n_theta_bins'],
                                       options['phi_symmetry'],
                                       options['c_azimuth'])

R = sprs.todense()[:, 0:int(len(angle_vector) / 2), :]

a, unique_index = np.unique(angle_vector[:, 1], return_index=True)
unique_index = unique_index[unique_index < 1300]

only_one_theta = R[:, unique_index, unique_index]
plt.figure()
a = plt.imshow(only_one_theta, extent=[0, 1, 900, 1200], aspect='auto')
plt.colorbar(a)