tissue_names = [] num_internal_nodes = 0 X = [] y = [] enhancers_list = list(enhancers_dict.keys()) for name in nodes: if name == 'internal node': num_internal_nodes += 1 else: tissue_names.append(name) print(tissue_names) parent_path_mat = build_parent_path_mat(pc_mat) num_edges = len(parent_path_mat) delta_mat = np.zeros(shape=(embedding_size, num_edges)) root_vector = np.zeros(shape=embedding_size) tissue_dfs = {} for t in tissue_names: t_df = pd.read_csv(os.path.join('data_files', 'CT_enhancer_features_matrices', t + '_enhancer_features_matrix.csv'), index_col='cCRE_id') t_df = t_df.loc[enhancers_list] tissue_dfs[t] = t_df for enhancer in enhancers_list: X.append(get_one_hot_encoding(enhancers_dict[enhancer]))
for row in samples_df.itertuples(): added_in_X_and_y = False genome_id = getattr(row, 'ID') for i, examples_list in enumerate(node_examples): if genome_id in examples_list: if not added_in_X_and_y: phenotype = eval(getattr(row, 'Phenotype'))[0] # the y value features = eval(getattr(row, 'Features')) # the x value y.append(phenotype) X.append(features) added_in_X_and_y = True mapping.append((example_number, i)) if added_in_X_and_y: example_number += 1 parent_path_tensor = build_parent_path_mat(parent_child) num_features = len(X[0]) num_nodes = len(parent_child[0]) num_edges = len(parent_path_tensor) root_weights = np.zeros(shape=num_features) edge_tensor_matrix = np.zeros(shape=(num_features, num_edges)) test_auc_output = [] val_auc_output = [] average_time_seed = 0 for s in args.seeds: init_time = time.time() print('New seed: ' + str(s))
if node_list[-1].left is not None: node_queue.append(node_list[-1].left) if node_list[-1].right is not None: node_queue.append(node_list[-1].right) num_nodes = len(node_list) num_edges = num_nodes - 1 # constructing the parent-child matrix, would be nice to find a faster way to do this parent_child_mat = np.zeros(shape=(num_nodes, num_nodes), dtype=np.float32) for child_idx in range(1, len(node_list)): # excluding the root parent_idx = node_list.index(node_list[child_idx].parent) parent_child_mat[parent_idx, child_idx] = 1.0 pp_mat = build_parent_path_mat(parent_child_mat, num_edges=num_edges) # split the leaves into train and test train_idx, valid_idx = split_indices(range(len(leaves))) # constructing train and valid x and y matrices train_col_idx = [leaves[i][0] for i in train_idx] valid_col_idx = [leaves[i][0] for i in valid_idx] train_col_idx_tensor = torch.tensor(train_col_idx, device=device) valid_col_idx_tensor = torch.tensor(valid_col_idx, device=device) train_x = torch.tensor(np.asarray( [leaves[i][1].x for i in train_idx]), device=device,