Python TCGADataset Examples

Programming Language: Python

Namespace/Package Name: data.datasets

Class/Type: TCGADataset

Examples at hotexamples.com: 4

Python TCGADataset - 4 examples found. These are the top rated real world Python examples of data.datasets.TCGADataset extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

labels(2)

df(2)

sample_names(2)

TCGADataset(1)

Example #1

Show file

File: single_gene_inference.py Project: jjc2718/gene-graph-conv

        is_first_degree = True
        is_landmark = False
elif args.nodes_rand > 0:
    is_first_degree = False
    is_landmark = False
    graph_name = "R-{}".format(args.nodes_rand)
else:
    is_first_degree = False
    is_landmark = False
    graph_name = "all_nodes"

# Read in data
try:
    assert args.dataset in ['tcga', 'gtex', 'geo']
    if args.dataset == 'tcga':
        dataset = TCGADataset()
    elif args.dataset == 'gtex':
        dataset = GTexDataset()
    elif args.dataset == 'geo':
        dataset = GEODataset(file_path='/network/data1/genomics/D-GEX/bgedv2.hdf5',
                             seed=seed, load_full=False, nb_examples=40000)

except Exception:
    tb = traceback.format_exc()
    print(tb)
    print("Please enter a valid argument for the dataset. Valid options are tcga, gtex and geo")
    import sys
    sys.exit()

# Create list of the genes to perform inference on
# If assessing first-degree neighbours, then train only for those genes

Example #2

Show file

from data.datasets import TCGADataset
from data.gene_graphs import GeneManiaGraph, RegNetGraph, HumanNetV2Graph, \
    FunCoupGraph
from data.utils import record_result
from tqdm import tqdm

PATH = "/home/user/gil/Expression project/GCN_EXP_predict_TP53_protein_activity/data/gene_tumor_specific"

seed = 0
cuda = torch.cuda.is_available()

graph_dict = {"regnet": RegNetGraph, "genemania": GeneManiaGraph,
              "humannetv2": HumanNetV2Graph, "funcoup": FunCoupGraph}

# Read in data: TCGA
dataset = TCGADataset()
# TCGA BRCA samples only
nb_examples = pd.read_csv(
    PATH + "/BRCA_samples.csv",  # relative python path to subdirectory
    header=0,  # first row is header.
    index_col=0
)
dataset.df = dataset.df.reindex(nb_examples.T.iloc[0])
dataset.sample_names = nb_examples.T.iloc[0].tolist()
# labels: TCGA_BRCA
labels = pd.read_csv(
    PATH + "/BRCA_labels.csv",  # relative python path to subdirectory
    header=0,  # first row is header.
    index_col=0
)
labels = labels.T.iloc[0]

Example #3

Show file

File: GCN_based_on_gene_tumor_type.py Project: gilbenc/GCN_EXP_predict_TP53_protein_activity

}
# # Select graph and set variables
# if args.graph:
#     # Check graph arg is valid
#     assert args.graph in graph_dict.keys()
#     graph_name = args.graph
#     gene_graph = graph_dict[graph_name]()
#     is_first_degree = True
# else:
#     is_first_degree = False
#     graph_name = "all_nodes"
# # adj for GCN
# adj = gene_graph.adj()

# Read in data: TCGA
TCGA_dataset = TCGADataset()

# tuning
num_layer = 2
channels = 30
embedding = 40
dropout = False
batch_size = 10
is_first_degree = True

# load graph and create adj matrix.
graph = "funcoup"
gene_graph = graph_dict[graph]()
# adj for GCN
adj = gene_graph.adj()

Example #4

Show file

File: GCN_num_samples.py Project: gilbenc/GCN_EXP_predict_TP53_protein_activity

# GCN
# model_name = "GCN"
# graph = "humannetv2"
# # create gene graph
# gene_graph = graph_dict[graph]()
# # adj for GCN
# adj = gene_graph.adj()
# is_first_degree = True

# MLP
model_name = "MLP"
graph = "all nodes"
is_first_degree = False

# Dataset: TCGA
dataset = TCGADataset()
# load TCGA BRCA samples
nb_examples = pd.read_csv(
    PATH + "BRCA_samples.csv",  # relative python path to subdirectory
    header=0,
    index_col=0)
dataset.df = dataset.df.loc[nb_examples.T.iloc[0]]
dataset.sample_names = nb_examples['x'].tolist()
# labels: load TCGA_BRCA
labels = pd.read_csv(
    PATH + "BRCA_labels.csv",  # relative python path to subdirectory
    header=0,  # first row is header.
    index_col=0)
labels = labels.T.iloc[0]
dataset.labels = labels.values