tissue_names = []
    num_internal_nodes = 0
    X = []
    y = []

    enhancers_list = list(enhancers_dict.keys())

    for name in nodes:
        if name == 'internal node':
            num_internal_nodes += 1
        else:
            tissue_names.append(name)

    print(tissue_names)

    parent_path_mat = build_parent_path_mat(pc_mat)
    num_edges = len(parent_path_mat)
    delta_mat = np.zeros(shape=(embedding_size, num_edges))
    root_vector = np.zeros(shape=embedding_size)

    tissue_dfs = {}

    for t in tissue_names:
        t_df = pd.read_csv(os.path.join('data_files', 'CT_enhancer_features_matrices',
                                        t + '_enhancer_features_matrix.csv'), index_col='cCRE_id')
        t_df = t_df.loc[enhancers_list]
        tissue_dfs[t] = t_df

    for enhancer in enhancers_list:
        X.append(get_one_hot_encoding(enhancers_dict[enhancer]))
    for row in samples_df.itertuples():
        added_in_X_and_y = False
        genome_id = getattr(row, 'ID')
        for i, examples_list in enumerate(node_examples):
            if genome_id in examples_list:
                if not added_in_X_and_y:
                    phenotype = eval(getattr(row, 'Phenotype'))[0]  # the y value
                    features = eval(getattr(row, 'Features'))  # the x value
                    y.append(phenotype)
                    X.append(features)
                    added_in_X_and_y = True
                mapping.append((example_number, i))
        if added_in_X_and_y:
            example_number += 1

    parent_path_tensor = build_parent_path_mat(parent_child)
    num_features = len(X[0])
    num_nodes = len(parent_child[0])
    num_edges = len(parent_path_tensor)

    root_weights = np.zeros(shape=num_features)
    edge_tensor_matrix = np.zeros(shape=(num_features, num_edges))

    test_auc_output = []
    val_auc_output = []
    average_time_seed = 0

    for s in args.seeds:
        init_time = time.time()

        print('New seed: ' + str(s))
Example #3
0
                    if node_list[-1].left is not None:
                        node_queue.append(node_list[-1].left)
                    if node_list[-1].right is not None:
                        node_queue.append(node_list[-1].right)
                num_nodes = len(node_list)
                num_edges = num_nodes - 1

                # constructing the parent-child matrix, would be nice to find a faster way to do this
                parent_child_mat = np.zeros(shape=(num_nodes, num_nodes),
                                            dtype=np.float32)
                for child_idx in range(1,
                                       len(node_list)):  # excluding the root
                    parent_idx = node_list.index(node_list[child_idx].parent)
                    parent_child_mat[parent_idx, child_idx] = 1.0

                pp_mat = build_parent_path_mat(parent_child_mat,
                                               num_edges=num_edges)

                # split the leaves into train and test
                train_idx, valid_idx = split_indices(range(len(leaves)))

                # constructing train and valid x and y matrices
                train_col_idx = [leaves[i][0] for i in train_idx]
                valid_col_idx = [leaves[i][0] for i in valid_idx]
                train_col_idx_tensor = torch.tensor(train_col_idx,
                                                    device=device)
                valid_col_idx_tensor = torch.tensor(valid_col_idx,
                                                    device=device)

                train_x = torch.tensor(np.asarray(
                    [leaves[i][1].x for i in train_idx]),
                                       device=device,