is_first_degree = True
        is_landmark = False
elif args.nodes_rand > 0:
    is_first_degree = False
    is_landmark = False
    graph_name = "R-{}".format(args.nodes_rand)
else:
    is_first_degree = False
    is_landmark = False
    graph_name = "all_nodes"

# Read in data
try:
    assert args.dataset in ['tcga', 'gtex', 'geo']
    if args.dataset == 'tcga':
        dataset = TCGADataset()
    elif args.dataset == 'gtex':
        dataset = GTexDataset()
    elif args.dataset == 'geo':
        dataset = GEODataset(file_path='/network/data1/genomics/D-GEX/bgedv2.hdf5',
                             seed=seed, load_full=False, nb_examples=40000)

except Exception:
    tb = traceback.format_exc()
    print(tb)
    print("Please enter a valid argument for the dataset. Valid options are tcga, gtex and geo")
    import sys
    sys.exit()

# Create list of the genes to perform inference on
# If assessing first-degree neighbours, then train only for those genes
Example #2
0
from data.datasets import TCGADataset
from data.gene_graphs import GeneManiaGraph, RegNetGraph, HumanNetV2Graph, \
    FunCoupGraph
from data.utils import record_result
from tqdm import tqdm

PATH = "/home/user/gil/Expression project/GCN_EXP_predict_TP53_protein_activity/data/gene_tumor_specific"

seed = 0
cuda = torch.cuda.is_available()

graph_dict = {"regnet": RegNetGraph, "genemania": GeneManiaGraph,
              "humannetv2": HumanNetV2Graph, "funcoup": FunCoupGraph}

# Read in data: TCGA
dataset = TCGADataset()
# TCGA BRCA samples only
nb_examples = pd.read_csv(
    PATH + "/BRCA_samples.csv",  # relative python path to subdirectory
    header=0,  # first row is header.
    index_col=0
)
dataset.df = dataset.df.reindex(nb_examples.T.iloc[0])
dataset.sample_names = nb_examples.T.iloc[0].tolist()
# labels: TCGA_BRCA
labels = pd.read_csv(
    PATH + "/BRCA_labels.csv",  # relative python path to subdirectory
    header=0,  # first row is header.
    index_col=0
)
labels = labels.T.iloc[0]
}
# # Select graph and set variables
# if args.graph:
#     # Check graph arg is valid
#     assert args.graph in graph_dict.keys()
#     graph_name = args.graph
#     gene_graph = graph_dict[graph_name]()
#     is_first_degree = True
# else:
#     is_first_degree = False
#     graph_name = "all_nodes"
# # adj for GCN
# adj = gene_graph.adj()

# Read in data: TCGA
TCGA_dataset = TCGADataset()

# tuning
num_layer = 2
channels = 30
embedding = 40
dropout = False
batch_size = 10
is_first_degree = True

# load graph and create adj matrix.
graph = "funcoup"
gene_graph = graph_dict[graph]()
# adj for GCN
adj = gene_graph.adj()
# GCN
# model_name = "GCN"
# graph = "humannetv2"
# # create gene graph
# gene_graph = graph_dict[graph]()
# # adj for GCN
# adj = gene_graph.adj()
# is_first_degree = True

# MLP
model_name = "MLP"
graph = "all nodes"
is_first_degree = False

# Dataset: TCGA
dataset = TCGADataset()
# load TCGA BRCA samples
nb_examples = pd.read_csv(
    PATH + "BRCA_samples.csv",  # relative python path to subdirectory
    header=0,
    index_col=0)
dataset.df = dataset.df.loc[nb_examples.T.iloc[0]]
dataset.sample_names = nb_examples['x'].tolist()
# labels: load TCGA_BRCA
labels = pd.read_csv(
    PATH + "BRCA_labels.csv",  # relative python path to subdirectory
    header=0,  # first row is header.
    index_col=0)
labels = labels.T.iloc[0]
dataset.labels = labels.values