is_first_degree = True is_landmark = False elif args.nodes_rand > 0: is_first_degree = False is_landmark = False graph_name = "R-{}".format(args.nodes_rand) else: is_first_degree = False is_landmark = False graph_name = "all_nodes" # Read in data try: assert args.dataset in ['tcga', 'gtex', 'geo'] if args.dataset == 'tcga': dataset = TCGADataset() elif args.dataset == 'gtex': dataset = GTexDataset() elif args.dataset == 'geo': dataset = GEODataset(file_path='/network/data1/genomics/D-GEX/bgedv2.hdf5', seed=seed, load_full=False, nb_examples=40000) except Exception: tb = traceback.format_exc() print(tb) print("Please enter a valid argument for the dataset. Valid options are tcga, gtex and geo") import sys sys.exit() # Create list of the genes to perform inference on # If assessing first-degree neighbours, then train only for those genes
from data.datasets import TCGADataset from data.gene_graphs import GeneManiaGraph, RegNetGraph, HumanNetV2Graph, \ FunCoupGraph from data.utils import record_result from tqdm import tqdm PATH = "/home/user/gil/Expression project/GCN_EXP_predict_TP53_protein_activity/data/gene_tumor_specific" seed = 0 cuda = torch.cuda.is_available() graph_dict = {"regnet": RegNetGraph, "genemania": GeneManiaGraph, "humannetv2": HumanNetV2Graph, "funcoup": FunCoupGraph} # Read in data: TCGA dataset = TCGADataset() # TCGA BRCA samples only nb_examples = pd.read_csv( PATH + "/BRCA_samples.csv", # relative python path to subdirectory header=0, # first row is header. index_col=0 ) dataset.df = dataset.df.reindex(nb_examples.T.iloc[0]) dataset.sample_names = nb_examples.T.iloc[0].tolist() # labels: TCGA_BRCA labels = pd.read_csv( PATH + "/BRCA_labels.csv", # relative python path to subdirectory header=0, # first row is header. index_col=0 ) labels = labels.T.iloc[0]
} # # Select graph and set variables # if args.graph: # # Check graph arg is valid # assert args.graph in graph_dict.keys() # graph_name = args.graph # gene_graph = graph_dict[graph_name]() # is_first_degree = True # else: # is_first_degree = False # graph_name = "all_nodes" # # adj for GCN # adj = gene_graph.adj() # Read in data: TCGA TCGA_dataset = TCGADataset() # tuning num_layer = 2 channels = 30 embedding = 40 dropout = False batch_size = 10 is_first_degree = True # load graph and create adj matrix. graph = "funcoup" gene_graph = graph_dict[graph]() # adj for GCN adj = gene_graph.adj()
# GCN # model_name = "GCN" # graph = "humannetv2" # # create gene graph # gene_graph = graph_dict[graph]() # # adj for GCN # adj = gene_graph.adj() # is_first_degree = True # MLP model_name = "MLP" graph = "all nodes" is_first_degree = False # Dataset: TCGA dataset = TCGADataset() # load TCGA BRCA samples nb_examples = pd.read_csv( PATH + "BRCA_samples.csv", # relative python path to subdirectory header=0, index_col=0) dataset.df = dataset.df.loc[nb_examples.T.iloc[0]] dataset.sample_names = nb_examples['x'].tolist() # labels: load TCGA_BRCA labels = pd.read_csv( PATH + "BRCA_labels.csv", # relative python path to subdirectory header=0, # first row is header. index_col=0) labels = labels.T.iloc[0] dataset.labels = labels.values