def main(): args = parse_arguments() # checking if the arguments are valid assert is_ndarray_folder(args.path), "Invalid path." # factorizing the data dims = [1] + list(range(5, args.max_dim, 5)) + [args.max_dim] table = [] if args.matrix: ndarray = sparse.load_npz(join(args.path, 'matrix.npz')).todense() else: ndarray = sparse.load_npz(join(args.path, 'tensor.npz')).todense() for d in tqdm(dims): err_sum, max_err_sum, mean_abs_err_sum, mean_sq_err_sum = 0, 0, 0, 0 args.dimensions = d for _ in range(args.iterations): err, max_err, mean_abs_err, mean_sq_err = \ get_factorization_error(ndarray, args) err_sum += err max_err_sum += max_err mean_abs_err_sum += mean_abs_err mean_sq_err_sum += mean_sq_err table.append([ d, err_sum / args.iterations, max_err_sum / args.iterations, mean_abs_err_sum / args.iterations, mean_sq_err_sum / args.iterations ]) print( tabulate(table, headers=[ 'Dim', 'Err.', 'Max Err.', 'Mean Abs. Err.', 'Mean Sq. Err.' ], tablefmt='orgtbl'))
def run_cmtf(path, dimensions=20, is_matrix=True, name='current', noise='orig', iterations=1, fixed='False'): # checking if the arguments are valid assert is_ndarray_folder(path), "Not a valid path." assert '_' not in name, "Name cannot contain the '_' symbol." # creating some useful paths to store factorization results mat_path = join( path, 'mat_' + str(dimensions) + '_' + str(iterations) + '_' + name) ten_path = join( path, 'ten_' + str(dimensions) + '_' + str(iterations) + '_' + name) # loading the meta data with open(join(path, 'meta_data.json'), 'r') as json_file: meta_data = json.load(json_file) # removing old factorization with same name (if exists) delete_factorization_by_name(name, path) # factorizing the data start = time.time() if is_matrix: matrix = sparse.load_npz(join(path, 'matrix.npz')).todense() matrices = prepare_ndarrays(matrix, iterations, fixed, noise) create_folder_if_absent(mat_path) factorize_matrices(matrices, iterations, dimensions, mat_path, path) else: tensor = sparse.load_npz(join(path, 'tensor.npz')).todense() tensors = prepare_ndarrays(tensor, iterations, fixed, noise) create_folder_if_absent(ten_path) factorize_tensors(tensors, iterations, dimensions, ten_path, meta_data, path) end = time.time() print('Factorization completed in %d seconds' % (end - start))
def test_load_wrong_format_exception(tmp_path): x = np.array([1, 2, 3]) filename = tmp_path / "mat.npz" np.savez(filename, x) with pytest.raises(RuntimeError): load_npz(filename)
def test_load_wrong_format_exception(): with tempfile.TemporaryDirectory() as tmp_path_str: tmp_path = pathlib.Path(tmp_path_str) x = np.array([1, 2, 3]) filename = tmp_path / "mat.npz" np.savez(filename, x) with pytest.raises(RuntimeError): load_npz(filename)
def test_load_wrong_format_exception(): x = np.array([1, 2, 3]) dir_name = tempfile.mkdtemp() filename = os.path.join(dir_name, 'mat.npz') np.savez(filename, x) with pytest.raises(RuntimeError): load_npz(filename) shutil.rmtree(dir_name)
def get_benchmark_test_theta(fuse=False, theta=None, batch_size=64): task = 'mortality' duration = 48 timestep = 1.0 df_label_all = pd.read_csv( data_path + 'population/{}_{}h.csv'.format(task, duration)).rename( columns={'{}_LABEL'.format(task): 'LABEL'}) df_label = pd.read_csv(data_path + 'population/pop.mortality_benchmark.csv').rename( columns={'{}_LABEL'.format(task): 'LABEL'}) X = sparse.load_npz( data_path + 'features,comparison/theta={},outcome={},T={},dt={}/X.npz'.format( theta, task, duration, timestep)).todense() s = sparse.load_npz( data_path + 'features,comparison/theta={},outcome={},T={},dt={}/s.npz'.format( theta, task, duration, timestep)).todense() te_idx = [ df_label_all[df_label_all['ICUSTAY_ID'] == ID].index.values[0] for ID in df_label[df_label['partition'] == 'test']['ID'] ] def _select_examples(rows): return ( X[rows], s[rows], df_label_all.iloc[rows][['LABEL']].values, ) Xy_te = _select_examples(te_idx) print('ICU stay splits:', len(te_idx)) te = EHRDataset(*Xy_te, fuse=fuse) num_workers = 1 te_loader = DataLoader(te, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True) print(te_loader.dataset.y.sum()) print('') print('Time series shape, Static shape, Label shape, Class balance:') print('\t', 'te', te_loader.dataset.X.shape, te_loader.dataset.s.shape, te_loader.dataset.y.shape, te_loader.dataset.y.mean()) if fuse: print('Fused dimensions:', te_loader.dataset[0][0].shape) return te_loader
def _so_sparse(nspins): """ Either load a presaved set of spin operators as numpy arrays, or calculate them and save them if a presaved set wasn't found. Parameters ---------- nspins : int the number of spins in the spin system Returns ------- (Lz, Lproduct) : a tuple of: Lz : 3d sparse.COO array of shape (n, 2^n, 2^n) representing [Lz1, Lz2, ...Lzn] Lproduct : 4d sparse.COO array of shape (n, n, 2^n, 2^n), representing an n x n array (cartesian product) for all combinations of Lxa*Lxb + Lya*Lyb + Lza*Lzb, where 1 <= a, b <= n. Side Effect ----------- Saves the results as .npz files to the bin directory if they were not found there. """ # TODO: once nmrsim demonstrates installing via the PyPI *test* server, # need to determine how the saved solutions will be handled. For example, # part of the final build may be generating these files then testing. # Also, need to consider different users with different system capabilities # (e.g. at extreme, Raspberry Pi). Some way to let user select, or select # for user? filename_Lz = f'Lz{nspins}.npz' filename_Lproduct = f'Lproduct{nspins}.npz' bin_path = _bin_path() path_Lz = bin_path.joinpath(filename_Lz) path_Lproduct = bin_path.joinpath(filename_Lproduct) # with path_context_Lz as p: # path_Lz = p # with path_context_Lproduct as p: # path_Lproduct = p try: Lz = sparse.load_npz(path_Lz) Lproduct = sparse.load_npz(path_Lproduct) return Lz, Lproduct except FileNotFoundError: print('no SO file ', path_Lz, ' found.') print(f'creating {filename_Lz} and {filename_Lproduct}') Lz, Lproduct = _so_dense(nspins) Lz_sparse = sparse.COO(Lz) Lproduct_sparse = sparse.COO(Lproduct) sparse.save_npz(path_Lz, Lz_sparse) sparse.save_npz(path_Lproduct, Lproduct_sparse) return Lz_sparse, Lproduct_sparse
def load_one(path=PATH + "/" + SAVE_FILE_NAME): """ Load the matrix from a single file. """ data = sparse.load_npz(path).astype(np.int16).todense() print("Finished loading " + path) return data
def __init__(self, base_path, dataset_name, from_day_incl, dense=True): if not isinstance(base_path, pathlib.Path): base_path = pathlib.Path(base_path) self.from_day_incl = from_day_incl self.dataset_name = dataset_name self.imls_log_daily = sparse.load_npz(base_path / f"interactions_{dataset_name}_sparse.npz") self.dense = dense if self.dense: self.imls_log_daily = self.imls_log_daily.todense() self.num_days = self.imls_log_daily.shape[0] self.num_entities = self.imls_log_daily.shape[1] self.num_matrices = self.imls_log_daily.shape[-1] self.alive_df = pd.read_csv( base_path / f"alive_{dataset_name}.csv", parse_dates=["date_emerged"] ) self.num_classes = len(self.alive_df.date_emerged.unique()) indices_df = pd.read_csv(base_path / f"indices_{dataset_name}.csv") self.id_to_idx = dict(indices_df.values) self.idx_to_id = dict(indices_df.values[:, ::-1]) self.bee_ages, self.valid_ages = self.parse_bee_ages( self.alive_df, from_day_incl, self.num_days, self.num_entities ) self.labels = self.parse_labels(self.alive_df)
def cache_tm(nspins): """ Parameters ---------- nspins Returns ------- """ """spin11 test indicates this leads to faster overall simsignals(). 11 spin x 6: 29.6 vs. 35.1 s 8 spin x 60: 2.2 vs 3.0 s""" filename = f'T{nspins}.npz' bin_dir = os.path.join(os.path.dirname(__file__), 'bin') path = os.path.join(bin_dir, filename) try: T = sparse.load_npz(path) return T except FileNotFoundError: print(f'creating {filename}') T = transition_matrix_dense(nspins) T_sparse = sparse.COO(T) sparse.save_npz(path, T_sparse) return T_sparse
def load_each(): """ Load each file that has the name format containing the timestamps. """ files = os.listdir(PATH) timestamps = [] files.sort() prog = re.compile(r"20\d{6}\.\d{6}.npz$") for file in files: if prog.match(file): timestamps.append(file) # to get in order of timestamps data = None for ts_idx in range(len(timestamps)): if (ts_idx + 1) % 10 == 0: print(str(ts_idx + 1) + "/" + str(len(timestamps)) + " " + timestamps[ts_idx]) # prints progress try: record = sparse.load_npz(PATH + "/" + timestamps[ts_idx]) record = record.todense().astype(np.int16) #record = record.astype(np.int16) # if FIELDS != ALL_FIELDS: # record = record[:, :, FIELDS] if ts_idx == 0: data = np.zeros((len(timestamps), *record.shape), dtype=np.int16) data[ts_idx] += record except BadZipFile as e: print(e, file) print("Loaded each") return data
def extract_relation_ind(rel_file, rel_index_file): """ Save index array for nonzero entries of the sparse relation tensor at rel_file. """ relations = sparse.load_npz(rel_file) relation_ind = sparse.argwhere(relations > 0) np.save(rel_index_file, relation_ind)
def test_save_load_npz_file(tmp_path, compression, format): x = sparse.random((2, 3, 4, 5), density=0.25, format=format) y = x.todense() filename = tmp_path / "mat.npz" save_npz(filename, x, compressed=compression) z = load_npz(filename) assert_eq(x, z) assert_eq(y, z.todense())
def so_sparse(nspins): filename_Lz = f'sparse_Lz{nspins}.npz' filename_Lproduct = f'sparse_Lproduct{nspins}.npz' try: Lz = sparse.load_npz(filename_Lz) Lproduct = sparse.load_npz(filename_Lproduct) return Lz, Lproduct except FileNotFoundError: print(f'creating {filename_Lz} and {filename_Lproduct}') sigma_x = np.array([[0, 1 / 2], [1 / 2, 0]]) sigma_y = np.array([[0, -1j / 2], [1j / 2, 0]]) sigma_z = np.array([[1 / 2, 0], [0, -1 / 2]]) unit = np.array([[1, 0], [0, 1]]) L = np.empty((3, nspins, 2 ** nspins, 2 ** nspins), dtype=np.complex128) # consider other dtype? for n in range(nspins): Lx_current = 1 Ly_current = 1 Lz_current = 1 for k in range(nspins): if k == n: Lx_current = np.kron(Lx_current, sigma_x) Ly_current = np.kron(Ly_current, sigma_y) Lz_current = np.kron(Lz_current, sigma_z) else: Lx_current = np.kron(Lx_current, unit) Ly_current = np.kron(Ly_current, unit) Lz_current = np.kron(Lz_current, unit) L[0][n] = Lx_current L[1][n] = Ly_current L[2][n] = Lz_current L_T = L.transpose(1, 0, 2, 3) Lproduct = np.tensordot(L_T, L, axes=((1, 3), (0, 2))).swapaxes(1, 2) Lz_sparse = sparse.COO(L[2]) # for i in range(nspins): # for j in range(nspins): # Lproduct[i, j] = csr_matrix(Lproduct[i, j]) Lproduct_sparse = sparse.COO(Lproduct) sparse.save_npz(filename_Lz, Lz_sparse) sparse.save_npz(filename_Lproduct, Lproduct_sparse) return Lz_sparse, Lproduct_sparse
def get_benchmark_splits_ordinal(fuse=False, batch_size=64): task = 'mortality' duration = 48.0 timestep = 1.0 df_label = pd.read_csv('/data4/tangsp/FIDDLE/mimic3_experiments/data/processed/population/pop.mortality_benchmark.csv').rename(columns={'{}_LABEL'.format(task): 'LABEL'}) X = sparse.load_npz('../data/features,ablations/ordinal,benchmark,outcome={},T={},dt={}/X.npz'.format(task, duration, timestep)).todense() s = sparse.load_npz('../data/features,ablations/ordinal,benchmark,outcome={},T={},dt={}/s.npz'.format(task, duration, timestep)).todense() tr_idx = df_label[df_label['partition'] == 'train'].index.values va_idx = df_label[df_label['partition'] == 'val' ].index.values te_idx = df_label[df_label['partition'] == 'test' ].index.values def _select_examples(rows): return ( X[rows], s[rows], df_label.iloc[rows][['LABEL']].values, ) Xy_tr = _select_examples(tr_idx) Xy_va = _select_examples(va_idx) Xy_te = _select_examples(te_idx) print('ICU stay splits:', len(tr_idx), len(va_idx), len(te_idx)) te = EHRDataset(*Xy_te, fuse=fuse) va = EHRDataset(*Xy_va, fuse=fuse) tr = EHRDataset(*Xy_tr, fuse=fuse) num_workers = 1 tr_loader = DataLoader(tr, batch_size=batch_size, shuffle=True , num_workers=num_workers, pin_memory=True) va_loader = DataLoader(va, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True) te_loader = DataLoader(te, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True) print(tr_loader.dataset.y.sum() + va_loader.dataset.y.sum() + te_loader.dataset.y.sum(), '/', X.shape[0]) print('') print('Time series shape, Static shape, Label shape, Class balance:') print('\t', 'tr', tr_loader.dataset.X.shape, tr_loader.dataset.s.shape, tr_loader.dataset.y.shape, tr_loader.dataset.y.mean()) print('\t', 'va', va_loader.dataset.X.shape, va_loader.dataset.s.shape, va_loader.dataset.y.shape, va_loader.dataset.y.mean()) print('\t', 'te', te_loader.dataset.X.shape, te_loader.dataset.s.shape, te_loader.dataset.y.shape, te_loader.dataset.y.mean()) if fuse: print('Fused dimensions:', tr_loader.dataset[0][0].shape) return tr_loader, va_loader, te_loader
def so_sparse(nspins): """Either load a presaved set of spin operators as numpy arrays, or calculate them and save them if a presaved set wasn't found. Parameters ---------- nspins: int the number of spins in the spin system Returns ------- (Lz, Lproduct): a tuple of: Lz: 3d sparse.COO array of shape (n, 2^n, 2^n) representing [Lz1, Lz2, ...Lzn] Lproduct: 4d sparse.COO array of shape (n, n, 2^n, 2^n), representing an n x n array (cartesian product) for all combinations of Lxa*Lxb + Lya*Lyb + Lza*Lzb, where 1 <= a, b <= n. Side Effect ----------- Saves the results as .npz files to the bin directory if they were not found there. """ filename_Lz = f'Lz{nspins}.npz' filename_Lproduct = f'Lproduct{nspins}.npz' bin_dir = os.path.join(os.path.dirname(__file__), 'bin') path_Lz = os.path.join(bin_dir, filename_Lz) path_Lproduct = os.path.join(bin_dir, filename_Lproduct) try: Lz = sparse.load_npz(path_Lz) Lproduct = sparse.load_npz(path_Lproduct) return Lz, Lproduct except FileNotFoundError: print('no SO file ', filename_Lz, ' found in: ', bin_dir) print(f'creating {filename_Lz} and {filename_Lproduct}') Lz, Lproduct = so_dense(nspins) Lz_sparse = sparse.COO(Lz) Lproduct_sparse = sparse.COO(Lproduct) sparse.save_npz(path_Lz, Lz_sparse) sparse.save_npz(path_Lproduct, Lproduct_sparse) return Lz_sparse, Lproduct_sparse
def test_save_load_npz_file(compression, format): with tempfile.TemporaryDirectory() as tmp_path_str: tmp_path = pathlib.Path(tmp_path_str) x = sparse.random((2, 3, 4, 5), density=0.25, format=format) y = x.todense() filename = tmp_path / "mat.npz" save_npz(filename, x, compressed=compression) z = load_npz(filename) assert_eq(x, z) assert_eq(y, z.todense())
def readMatrix(filename): try: return sparse.load_npz(filename.replace(".txt", ".npz")) except: with open(filename, 'r') as outfile: dims = outfile.readline().replace("# Array shape: (", "").replace( ")", "").replace("\n", "").split(", ") for i in range(len(dims)): dims[i] = int(dims[i]) new_data = np.loadtxt(filename).reshape(dims) return new_data
def test_save_load_npz_file(compression): x = sparse.random((2, 3, 4, 5), density=.25) y = x.todense() dir_name = tempfile.mkdtemp() filename = os.path.join(dir_name, 'mat.npz') save_npz(filename, x, compressed=compression) z = load_npz(filename) assert_eq(x, z) assert_eq(y, z.todense()) shutil.rmtree(dir_name)
def matrix_load(self, path): ''' Loads a previously saved matrix from a .npz file (containing the matrix) and a .nfo file (containing the matrix tags). ''' # load matrix matrix = sparse.load_npz(os.path.splitext(path)[0] + '.npz') matrix = sparse.DOK( matrix) # convert to dict-of-keys for faster indexing # load matrix tags with open(os.path.splitext(path)[0] + '.nfo', 'rb') as f: tags = pickle.load(f) return matrix, tags
def load_expectation(expectation_file_name, type_=None): # pragma: no cover """Returns np.ndarray related to the *expectation_file_name*. Expectation file path is rooted at tests/expectations. """ thisdir = os.path.dirname(__file__) expectation_file_path = os.path.abspath( os.path.join(thisdir, "expectations", f"{expectation_file_name}.{type_}")) if type_ == "npy": expectation_data = np.load(expectation_file_path) elif type_ == "npz": expectation_data = sparse.load_npz(expectation_file_path) elif type_ == "png": expectation_data = Image.open(expectation_file_path) else: raise Exception("Type format not recognized") return expectation_data
def resolve_relations(db_file, rel_file, meta_file, id_file): """ """ conn = open_db_connection(db_file) c = conn.cursor() # load or compute unique IDs if os.path.isfile(meta_file): meta = np.load(meta_file) off = meta[0] num_unique = meta[1] unique_ids = np.load(id_file) else: off = 0 c.execute("SELECT DISTINCT event1_id FROM Relations;") event_ids = set(c.fetchall()) for id2 in c.execute("SELECT event2_id FROM Relations;"): if not id2 in event_ids: event_ids.add(id2) unique_ids = np.char.array(list(event_ids)) num_unique = len(event_ids) np.save(id_file, unique_ids) np.save(meta_file, np.array([off, num_unique])) id_lookup = dict() for i, id_entr in enumerate(unique_ids): id_lookup[id_entr[0]] = i # load or compute (compressed) relations if os.path.isfile(rel_file): relations = sparse.load_npz(rel_file) else: relations = sparse.DOK((num_unique, num_unique, RELATION_COUNT), dtype=np.float32) for row in c.execute("SELECT * FROM Relations;"): id_out = row[1] id_in = row[2] relations[id_lookup[id_out], id_lookup[id_in], :] = row[3:] relations = sparse.COO(relations) sparse.save_npz(rel_file, relations) conn.close()
def cache_tm(n): """spin11 test indicates this leads to faster overall simsignals(). 11 spin x 6: 29.6 vs. 35.1 s 8 spin x 60: 2.2 vs 3.0 s""" filename = f'transitions{n}.npz' try: T = sparse.load_npz(filename) return T except FileNotFoundError: print(f'creating {filename}') T = np.zeros((n, n)) for i in range(n - 1): for j in range(i + 1, n): if bin(i ^ j).count('1') == 1: T[i, j] = 1 # T = T + T.T T += T.T T_sparse = sparse.COO(T) sparse.save_npz(filename, T_sparse) return T_sparse
def _tm_cache(nspins): """ Loads a saved sparse transition matrix if it exists, or creates and saves one if it is not. Parameters ---------- nspins : int The number of spins in the spin system. Returns ------- T_sparse : sparse.COO The sparse transition matrix. Side Effects ------------ Saves a sparse array to the bin folder if the required array was not found there. """ # Speed tests indicated that using sparse-array transition matrices # provides a modest speed improvement on larger spin systems. filename = f'T{nspins}.npz' # init_path_context = resources.path(nmrsim.bin, '__init__.py') # with init_path_context as p: # init_path = p # print('path to init: ', init_path) # bin_path = init_path.parent bin_path = _bin_path() path = bin_path.joinpath(filename) try: T_sparse = sparse.load_npz(path) return T_sparse except FileNotFoundError: print(f'creating {filename}') T_sparse = _transition_matrix_dense(nspins) T_sparse = sparse.COO(T_sparse) print('_tm_cache will save on path: ', path) sparse.save_npz(path, T_sparse) return T_sparse
def __init__(self, path, neg_sample_ratio, min_freq, is_matrix=True): ndarray_file = 'matrix.npz' if is_matrix else 'tensor.npz' self.ndarray = sparse.load_npz(join(path, ndarray_file)).todense() # Creating positive instances nonzeros = (self.ndarray > min_freq).nonzero() print('# of non-zero cells is {}'.format(len(nonzeros[0]))) print('Density is {}%'.format(len(nonzeros[0]) / self.ndarray.size)) pos_points = list(zip(*nonzeros)) # Creating negative instances (if there are enough negatives) assert len(nonzeros[0]) * (neg_sample_ratio + 1) < self.ndarray.size neg_points, seen_points = [], set(pos_points) while len(neg_points) < len(nonzeros[0]) * neg_sample_ratio: sample = tuple([np.random.choice(d) for d in self.ndarray.shape]) if sample not in seen_points: seen_points.add(tuple(sample)) neg_points.append(tuple(sample)) # Combining all points and labels self.points = [np.array(x) for x in (pos_points + neg_points)] self.labels = [1] * len(pos_points) + [0] * len(neg_points) # Shuffling the points zipped = list(zip(self.points, self.labels)) random.shuffle(zipped) self.points, self.labels = zip(*zipped)
np.savetxt('fig8_rayflare.txt', RAT['A_bulk'][0]) from angles import make_angle_vector from config import results_path from sparse import load_npz _, _, angle_vector = make_angle_vector(options['n_theta_bins'], options['phi_symmetry'], options['c_azimuth']) wl_to_plot = 1100e-9 wl_index = np.argmin(np.abs(wavelengths-wl_to_plot)) sprs = load_npz(os.path.join(results_path, options['project_name'], SC[0].name + 'rearRT.npz')) full = sprs[wl_index].todense() summat = theta_summary(full, angle_vector, options['n_theta_bins'], 'rear') summat_r = summat[options['n_theta_bins']:, :] summat_r= summat_r.rename({r'$\theta_{in}$': 'a', r'$\theta_{out}$': 'b'}) summat_r= summat_r.assign_coords(a=np.sin(summat_r.coords['a']).data, b=np.sin(summat_r.coords['b']).data) summat_r= summat_r.rename({'a': r'$\sin(\theta_{in})$', 'b': r'$\sin(\theta_{out})$'})
def matrix_multiplication(bulk_mats, bulk_thick, options, layer_widths=[], n_layers=[], layer_names=[], calc_prof_list=[]): n_bulks = len(bulk_mats) n_interfaces = n_bulks + 1 theta_intv, phi_intv, angle_vector = make_angle_vector( options['n_theta_bins'], options['phi_symmetry'], options['c_azimuth']) n_a_in = int(len(angle_vector) / 2) num_wl = len(options['wavelengths']) #wls = np.linspace(600, 1100, num_wl)*1e-9 #pol = 's' # bulk thickness in m thetas = angle_vector[:n_a_in, 1] v0 = make_v0(options['theta_in'], options['phi_in'], num_wl, options['n_theta_bins'], options['c_azimuth'], options['phi_symmetry']) up2down, down2up = out_to_in_matrix(options['phi_symmetry'], angle_vector, theta_intv, phi_intv) D = [] for i1 in range(n_bulks): D.append( make_D(bulk_mats[i1].alpha(options['wavelengths']), bulk_thick[i1], thetas)) #unique_thetas = np.unique(thetas) # front incidence matrices Rf = [] Tf = [] Af = [] Pf = [] If = [] side = 1 for i1 in range(n_interfaces): mat_path = os.path.join(results_path, options['project_name'], layer_names[i1] + 'frontRT.npz') absmat_path = os.path.join(results_path, options['project_name'], layer_names[i1] + 'frontA.npz') fullmat = load_npz(mat_path) absmat = load_npz(absmat_path) #if len(fullmat.shape) == 3: Rf.append(fullmat[:, :n_a_in, :]) Tf.append(fullmat[:, n_a_in:, :]) Af.append(absmat) #else: # print(fullmat.shape) # Rf.append(fullmat[:n_a_in, :]) # Tf.append(fullmat[n_a_in:, :]) # Af.append(absmat) if len(calc_prof_list[i1]) > 0: #profile, intgr = make_profile_data(options, unique_thetas, n_a_in, side, # layer_names[i1], n_layers[i1], layer_widths[i1]) profmat_path = os.path.join(results_path, options['project_name'], layer_names[i1] + 'frontprofmat.nc') prof_int = xr.load_dataset(profmat_path) profile = prof_int['profile'] intgr = prof_int['intgr'] Pf.append(profile) If.append(intgr) else: Pf.append([]) If.append([]) # rear incidence matrices Rb = [] Tb = [] Ab = [] Pb = [] Ib = [] paramsb = [] side = -1 for i1 in range(n_interfaces - 1): mat_path = os.path.join(results_path, options['project_name'], layer_names[i1] + 'rearRT.npz') absmat_path = os.path.join(results_path, options['project_name'], layer_names[i1] + 'rearA.npz') fullmat = load_npz(mat_path) absmat = load_npz(absmat_path) #if len(fullmat.shape) == 3: Rb.append(fullmat[:, :n_a_in, :]) Tb.append(fullmat[:, n_a_in:, :]) Ab.append(absmat) #else: # Rb.append(fullmat[:n_a_in, :]) # Tb.append(fullmat[n_a_in:, :]) # Ab.append(absmat) if len(calc_prof_list[i1]) > 0: #profile, intgr = make_profile_data(options, unique_thetas, n_a_in, side, # layer_names[i1], n_layers[i1], layer_widths[i1]) profmat_path = os.path.join(results_path, options['project_name'], layer_names[i1] + 'rearprofmat.nc') prof_int = xr.load_dataset(profmat_path) profile = prof_int['profile'] intgr = prof_int['intgr'] Pb.append(profile) Ib.append(intgr) else: Pb.append([]) Ib.append([]) len_calcs = np.array([len(x) for x in calc_prof_list]) print(len_calcs) print(np.any(len_calcs > 0)) if np.any(len_calcs > 0): print('a') a = [[] for _ in range(n_interfaces)] a_prof = [[] for _ in range(n_interfaces)] vr = [[] for _ in range(n_bulks)] vt = [[] for _ in range(n_bulks)] A = [[] for _ in range(n_bulks)] A_prof = [[] for _ in range(n_bulks)] vf_1 = [[] for _ in range(n_interfaces)] vb_1 = [[] for _ in range(n_interfaces)] vf_2 = [[] for _ in range(n_interfaces)] vb_2 = [[] for _ in range(n_interfaces)] for i1 in range(n_bulks): #z = xr.DataArray(np.arange(0, bulk_thick[i1], options['nm_spacing']*1e-9), dims='z') # v0 is actually travelling down, but no reason to start in 'outgoing' ray format. vf_1[i1] = dot_wl(Tf[i1], v0) # pass through front surface vr[i1].append(dot_wl(Rf[i1], v0)) # reflected from front surface a[i1].append(dot_wl( Af[i1], v0)) # absorbed in front surface at first interaction #print(v0) #print(If[i1]) if len(If[i1] > 0): v_xr = xr.DataArray(v0, dims=['wl', 'global_index'], coords={ 'wl': If[i1].coords['wl'], 'global_index': np.arange(0, n_a_in) }) int_power = xr.dot(v_xr, If[i1], dims='global_index') scale = (np.sum(dot_wl(Af[i1], v0), 1) / int_power).fillna(0) a_prof[i1].append( (scale * xr.dot(v_xr, Pf[i1], dims='global_index')).data) power = np.sum(vf_1[i1], axis=1) # rep i2 = 1 while np.any(power > options['I_thresh']): print(i2) #print(power) vf_1[i1] = dot_wl_u2d(down2up, vf_1[i1]) # outgoing to incoming vb_1[i1] = dot_wl(D[i1], vf_1[i1]) # pass through bulk, downwards # vb_1 already an incoming ray if len(If[i1 + 1]) > 0: v_xr = xr.DataArray(vb_1[i1], dims=['wl', 'global_index'], coords={ 'wl': If[i1 + 1].coords['wl'], 'global_index': np.arange(0, n_a_in) }) int_power = xr.dot(v_xr, If[i1 + 1], dims='global_index') scale = (np.sum(dot_wl(Af[i1 + 1], vb_1[i1]), 1) / int_power).fillna(0) #('front profile') a_prof[i1 + 1].append( (scale * xr.dot(v_xr, Pf[i1 + 1], dims='global_index')).data) #remaining_power.append(np.sum(vb_1, axis=1)) A[i1].append(np.sum(vf_1[i1], 1) - np.sum(vb_1[i1], 1)) nz_thetas = vf_1[i1] != 0 vb_2[i1] = dot_wl( Rf[i1 + 1], vb_1[i1]) # reflect from back surface. incoming -> up vf_2[i1] = dot_wl(D[i1], vb_2[i1]) # pass through bulk, upwards #print('rear profile') if len(Ib[i1]) > 0: v_xr = xr.DataArray(vf_2[i1], dims=['wl', 'global_index'], coords={ 'wl': Ib[i1].coords['wl'], 'global_index': np.arange(0, n_a_in) }) int_power = xr.dot(v_xr, Ib[i1], dims='global_index') scale = (np.sum(dot_wl(Ab[i1], vf_2[i1]), 1) / int_power).fillna(0) a_prof[i1].append( (scale * xr.dot(v_xr, Pb[i1], dims='global_index')).data) #remaining_power.append(np.sum(vf_2, axis=1)) A[i1].append(np.sum(vb_2[i1], 1) - np.sum(vf_2[i1], 1)) vf_2[i1] = dot_wl_u2d(up2down, vf_2[i1]) # prepare for rear incidence vf_1[i1] = dot_wl(Rb[i1], vf_2[i1]) # reflect from front surface power = np.sum(vf_1[i1], axis=1) # nz_thetas = vb_2[i1] != 0 vr[i1].append( dot_wl(Tb[i1], vf_2[i1]) ) # matrix travelling up in medium 0, i.e. reflected overall by being transmitted through front surface vt[i1].append( dot_wl(Tf[i1 + 1], vb_1[i1]) ) # transmitted into medium below through back surface a[i1 + 1].append(dot_wl(Af[i1 + 1], vb_1[i1])) # absorbed in 2nd surface a[i1].append(dot_wl( Ab[i1], vf_2[i1])) # absorbed in 1st surface (from the back) i2 += 1 vr = [np.array(item) for item in vr] vt = [np.array(item) for item in vt] a = [np.array(item) for item in a] A = [np.array(item) for item in A] a_prof = [np.array(item) for item in a_prof] results_per_pass = {'r': vr, 't': vt, 'a': a, 'A': A, 'a_prof': a_prof} # for i2 in range(3): # for i1 in range(n_interfaces): # plt.figure() # z = np.arange(0, np.sum(layer_widths[i1]), options['nm_spacing']) # plt.plot(z, a_prof[i1][i2, 0, :].T) # plt.title(str(i2) + 'interface ' + str(i1)) # plt.show() sum_dims = ['bulk_index', 'wl'] sum_coords = { 'bulk_index': np.arange(0, n_bulks), 'wl': options['wavelengths'] } R = xr.DataArray(np.array([np.sum(item, (0, 2)) for item in vr]), dims=sum_dims, coords=sum_coords, name='R') T = xr.DataArray(np.array([np.sum(item, (0, 2)) for item in vt]), dims=sum_dims, coords=sum_coords, name='T') A_bulk = xr.DataArray(np.array([np.sum(item, 0) for item in A]), dims=sum_dims, coords=sum_coords, name='A_bulk') A_interface = xr.DataArray( np.array([np.sum(item, (0, 2)) for item in a]), dims=['surf_index', 'wl'], coords={ 'surf_index': np.arange(0, n_interfaces), 'wl': options['wavelengths'] }, name='A_interface') profile = [] for j1, item in enumerate(a_prof): if len(item) > 0: profile.append( xr.DataArray(np.sum(item, 0), dims=['wl', 'z'], coords={'wl': options['wavelengths']}, name='A_profile' + str(j1)) ) # not necessarily same number of z coords per layer stack bulk_profile = np.array(A_prof) RAT = xr.merge([R, A_bulk, A_interface, T]) # for i2 in range(num_wl): # plt.figure() # for i1 in range(n_interfaces): # z = np.arange(0, np.sum(layer_widths[i1]), options['nm_spacing']) # plt.plot(z, profile[i1][i2].T) # # plt.show() # # plt.figure() # for i2 in range(5): # i1= 0 # z = np.arange(0, np.sum(layer_widths[i1]), options['nm_spacing']) # plt.plot(z, a_prof[i1][i2, 0, :]) # # plt.figure() # plt.plot(options['wavelengths'], R.T) # plt.plot(options['wavelengths'], T.T) # plt.plot(options['wavelengths'], A_interface.T) # plt.plot(options['wavelengths'], A_bulk.T) # plt.plot(options['wavelengths'], R[0] + T[0] + A_interface[0] + A_interface[1]+A_bulk[0]) # plt.legend(['R', 'T', 'front', 'back', 'bulk']) # plt.show() #return R, T, A_bulk, A_interface, profile return RAT, results_per_pass, profile, bulk_profile else: print('b') a = [[] for _ in range(n_interfaces)] vr = [[] for _ in range(n_bulks)] vt = [[] for _ in range(n_bulks)] A = [[] for _ in range(n_bulks)] vf_1 = [[] for _ in range(n_interfaces)] vb_1 = [[] for _ in range(n_interfaces)] vf_2 = [[] for _ in range(n_interfaces)] vb_2 = [[] for _ in range(n_interfaces)] for i1 in range(n_bulks): vf_1[i1] = dot_wl(Tf[i1], v0) # pass through front surface vr[i1].append(dot_wl(Rf[i1], v0)) # reflected from front surface a[i1].append(dot_wl( Af[i1], v0)) # absorbed in front surface at first interaction power = np.sum(vf_1[i1], axis=1) # rep i2 = 1 while np.any(power > options['I_thresh']): print(i2) print('before d2u', np.sum(vf_1[i1])) vf_1[i1] = dot_wl_u2d(down2up, vf_1[i1]) # outgoing to incoming print('after 2du', np.sum(vf_1[i1])) #print('vf_1 after', vf_1[i1]) vb_1[i1] = dot_wl(D[i1], vf_1[i1]) # pass through bulk, downwards print('before back ref', np.sum(vb_1[i1])) # remaining_power.append(np.sum(vb_1, axis=1)) A[i1].append(np.sum(vf_1[i1], 1) - np.sum(vb_1[i1], 1)) vb_2[i1] = dot_wl(Rf[i1 + 1], vb_1[i1]) # reflect from back surface print('after back ref', np.sum(vb_2[i1])) vf_2[i1] = dot_wl(D[i1], vb_2[i1]) # pass through bulk, upwards #print('vb_2', vb_2[i1]) print('after u2d', np.sum(vf_2[i1])) vf_2[i1] = dot_wl_u2d(up2down, vf_2[i1]) # prepare for rear incidence print('after u2d/before front ref', np.sum(vf_2[i1])) vf_1[i1] = dot_wl(Rb[i1], vf_2[i1]) # reflect from front surface print('after front ref', np.sum(vf_1[i1])) #print('Rf, Rb, and vf2', Rf[i1][20].todense(), Rb[i1][20].todense(), vf_2[i1][20]) #print('powersrem', np.sum(vb_2[i1], 1), np.sum(vf_2[i1], 1), np.sum(vf_1[i1], 1)) # remaining_power.append(np.sum(vf_2, axis=1)) A[i1].append(np.sum(vb_2[i1], 1) - np.sum(vf_2[i1], 1)) power = np.sum(vf_1[i1], axis=1) #print('power', power) vr[i1].append( dot_wl(Tb[i1], vf_2[i1]) ) # matrix travelling up in medium 0, i.e. reflected overall by being transmitted through front surface print('lost in front ref', np.sum(vr[i1])) #print('Tf, vb1', Tf[i1 + 1][20].todense(), vb_1[i1][20]) vt[i1].append( dot_wl(Tf[i1 + 1], vb_1[i1]) ) # transmitted into medium below through back surface print('lost in back ref', np.sum(vt[i1])) a[i1 + 1].append(dot_wl(Af[i1 + 1], vb_1[i1])) # absorbed in 2nd surface a[i1].append(dot_wl( Ab[i1], vf_2[i1])) # absorbed in 1st surface (from the back) i2 += 1 vr = [np.array(item) for item in vr] vt = [np.array(item) for item in vt] a = [np.array(item) for item in a] A = [np.array(item) for item in A] results_per_pass = {'r': vr, 't': vt, 'a': a, 'A': A} sum_dims = ['bulk_index', 'wl'] sum_coords = { 'bulk_index': np.arange(0, n_bulks), 'wl': options['wavelengths'] } R = xr.DataArray(np.array([np.sum(item, (0, 2)) for item in vr]), dims=sum_dims, coords=sum_coords, name='R') if i2 > 1: A_bulk = xr.DataArray(np.array([np.sum(item, 0) for item in A]), dims=sum_dims, coords=sum_coords, name='A_bulk') T = xr.DataArray(np.array([np.sum(item, (0, 2)) for item in vt]), dims=sum_dims, coords=sum_coords, name='T') RAT = xr.merge([R, A_bulk, T]) else: RAT = xr.merge([R]) return RAT, results_per_pass
plt.plot(options['wavelengths'] * 1e9, TMM_res['A_per_layer'][len(front_materials) + 1], label='Ge') plt.plot(options['wavelengths'] * 1e9, TMM_res['A_per_layer'][1] + TMM_res['A_per_layer'][2], label='ARC') plt.plot(options['wavelengths'] * 1e9, TMM_res['A_per_layer'][3], label='InGaP') plt.plot(options['wavelengths'] * 1e9, TMM_res['A_per_layer'][4], label='GaAs') plt.plot(options['wavelengths'] * 1e9, TMM_res['A_per_layer'][5], label='SiGeSn') plt.xlabel('Wavelength (nm)') plt.ylabel('Reflection / Absorption') #plt.legend() plt.show() from sparse import load_npz RTmat = load_npz( '/home/phoebe/Documents/rayflare/results/test_matrix2/GaInP_GaAs_SiGeSn_RTfrontRT.npz' ) TMMmat = load_npz( '/home/phoebe/Documents/rayflare/results/test_matrix2/GaInP_GaAs_SiGeSn_TMMfrontRT.npz' ) RTmat_0 = RTmat[0].todense() TMMmat_0 = TMMmat[0].todense()
return clf def save_test_predictions(y_true, y_score, model_name, save_dir): # import pathlib # pathlib.Path(save_dir).mkdir(parents=True, exist_ok=True) fname = save_dir + '{}.test.npz'.format(model_name) np.savez( open(fname, 'wb'), y_score = y_score, y_true = y_true, ) print('Test predictions saved to', fname) import sparse X = sparse.load_npz('output.clinical/X.npz').todense().squeeze() s = sparse.load_npz('output.icd[0,1,2]/s.npz').todense() X = np.concatenate((s, X), axis=1) Xtr = X[df.partition=="train"] ytr = df[df.partition=="train"]['label'] Xte = X[df.partition=="test"] yte = df[df.partition=="test"]['label'] print(Xtr.shape, ytr.shape, Xte.shape, yte.shape) train_model(Xtr, ytr, Xte, yte, 'LR', 'clinical+ICD[0,1,2]') train_model(Xtr, ytr, Xte, yte, 'RF', 'clinical+ICD[0,1,2]')
'lookuptable_angles': 200, #'prof_layers': [1,2], 'n_rays': 500000, 'random_angles': False, 'nx': 15, 'ny': 15, 'parallel': True, 'n_jobs': -1, 'phi_symmetry': np.pi / 2, 'only_incidence_angle': True } Si = material('Si_OPTOS')() sprs = load_npz( os.path.join(results_path, options['project_name'], 'planar_back' + str(options['n_rays']) + 'frontRT.npz')) _, _, angle_vector = make_angle_vector(options['n_theta_bins'], options['phi_symmetry'], options['c_azimuth']) R = sprs.todense()[:, 0:int(len(angle_vector) / 2), :] a, unique_index = np.unique(angle_vector[:, 1], return_index=True) unique_index = unique_index[unique_index < 1300] only_one_theta = R[:, unique_index, unique_index] plt.figure() a = plt.imshow(only_one_theta, extent=[0, 1, 900, 1200], aspect='auto') plt.colorbar(a)