def test_ARC2016_16(with_small, overwrite): import pydream2015 from os.path import split, join pkgdir = split(pydream2015.__file__)[0] input_dir = join(pkgdir, 'test_input') output_dir = join(pkgdir, 'test_output') pydream2015.initdatapath(input_dir, output_dir) ppiscore = 950 alpha = 0.95 ppifile = pydream2015.MYDATA_PPI_STRING_TRANSLATED gexfile = pydream2015.MYDATA_GENEEXPR_FILLED signalfile = join(input_dir, 'team/mschoi/arc2016_16_table(a)cnv.csv') resfile = 'smooth_cnv_str.csv' run(gexfile, ppifile, signalfile, resfile, ppiscore, alpha) signalfile = join(input_dir, 'team/mschoi/arc2016_16_table(b)methyl.csv') resfile = 'smooth_methyl_str.csv' run(gexfile, ppifile, signalfile, resfile, ppiscore, alpha)
def test_divide_train_leaderboard(): sys.path.append('..') import pydream2015 indir = join(pathsplit(pydream2015.__file__)[0], 'test_input') outdir = join(pathsplit(pydream2015.__file__)[0], 'test_output') indir = os.path.abspath(indir) outdir = os.path.abspath(outdir) pydream2015.initdatapath(indir, outdir) trainids, testids = pydream2015.util.divide_train_leaderboard()
def test_calc_dd3d(): sys.path.append('../..') import pydream2015 from pydream2015 import feature pydream2015.initdatapath('../test_input', '../test_output') in_dd3d = pydream2015.DATA_DRUG_DESC_3D out_dd3d_conv = pydream2015.MYDATA_DRUG_DESC_3D_CONV if not exists(out_dd3d_conv): feature.calc_dd3d(in_dd3d, out_dd3d_conv) else: print '>>', out_dd3d_conv, '- skipped'
def test_fill_missinggex(): sys.path.append('../..') import pydream2015 pydream2015.initdatapath('../test_input', '../test_output') in_therapy = pydream2015.DATA_COMBITHERAPY in_gex = pydream2015.DATA_GENEEXPR in_cellinfo = pydream2015.DATA_CELLINFO out_gexfilled = pydream2015.MYDATA_GENEEXPR_FILLED fill_missinggex(in_therapy, in_gex, in_cellinfo, out_gexfilled) assert exists(out_gexfilled)
def test_divide_combi(): import pydream2015 indir = join(pathsplit(pydream2015.__file__)[0], 'test_input') outdir = join(pathsplit(pydream2015.__file__)[0], 'test_output') indir = os.path.abspath(indir) outdir = os.path.abspath(outdir) pydream2015.initdatapath(indir, outdir) therapy_traindata = pd.read_csv(pydream2015.DATA_COMBITHERAPY) trainids, testids = pydream2015.util.divide_combi(therapy_traindata, ratio=0.3) print(len(trainids)) print(len(testids)) pass
def test_make_biomartsdict(): sys.path.append('../..') import pydream2015 pydream2015.initdatapath('../test_input', '../test_output') in_ensemblid = pydream2015.DATA_ENSEMBLID out_dict = pydream2015.MYDATA_DICT print('>>', out_dict) if not exists(out_dict): make_biomartsdict(in_ensemblid, out_dict, test=False) else: print('- skipped') assert exists(out_dict)
def test_translate_STRING(): sys.path.append('../..') import pydream2015 pydream2015.initdatapath('../test_input', '../test_output') in_ppi = pydream2015.DATA_PPI_STRING in_dict = pydream2015.MYDATA_DICT out_processed = pydream2015.MYDATA_PPI_STRING_PROCESSED out_translated = pydream2015.MYDATA_PPI_STRING_TRANSLATED out_hist = pydream2015.MYDATA_PPI_STRING_HIST translate_STRING(in_ppi, in_dict, out_processed, out_translated, out_hist, test=False) assert exists(out_processed) assert exists(out_translated) assert exists(out_hist)
# This file is part of Pydream2015. #************************************************************************* import os, sys, cPickle as pickle, re, json, pandas as pd, numpy as np, pytest import pydream2015 from sets import Set from multiprocessing import Pool from os.path import exists, split as pathsplit, join from pdb import set_trace from os.path import split,join indir = join(split(pydream2015.__file__)[0], 'test_input') outdir = join(split(pydream2015.__file__)[0], 'test_output') pydream2015.initdatapath(indir, outdir) therapy = pydream2015.DATA_COMBITHERAPY therapy_test = pydream2015.DATA_COMBITHERAPY_TEST therapy_leader = pydream2015.DATA_COMBITHERAPY_LEADER gexdata = pydream2015.MYDATA_GENEEXPR_FILLED mutdata = pydream2015.DATA_MUTATION dd3dc = pydream2015.MYDATA_DRUG_DESC_3D_CONV smoothed_CNV_STRING = pydream2015.MYDATA_SMOOTHED_CNV_STRING
def test_reshape_therapy(with_small, overwrite, outputfile='therapy_expanded.csv'): overwrite = True if exists(outputfile) and (overwrite == False): return import sys sys.path.append('..') import pydream2015 from pydream2015.util import update_progress indir = join(pathsplit(pydream2015.__file__)[0], 'test_input') outdir = join(pathsplit(pydream2015.__file__)[0], 'test_output') indir = os.path.abspath(indir) outdir = os.path.abspath(outdir) pydream2015.initdatapath(indir, outdir) output_df = pd.DataFrame([], columns=['CELL_LINE', 'COMBINATION_ID', \ 'COMPOUND_A', 'COMPOUND_B', 'DOSE_A', 'DOSE_B', 'UNIT_A', \ 'UNIT_B', 'RESPONSE', 'METHOD', 'FILE_SOURCE']) df1 = pd.read_csv(pydream2015.DATA_COMBITHERAPY) # 디렉토리 내용가져오기 raw_data_dir = pydream2015.DIR_TRAINING_COMBI_THERAPY files = glob.glob(join(raw_data_dir, '*.csv')) idx = 0 for kk, thisfile in enumerate(files): update_progress(kk, len(files)) filedata = pd.read_csv(thisfile, index_col='Unnamed: 0') dim_agent_1 = filedata.index.values.tolist().index('(=Agent 1)') dim_agent_2 = filedata.columns.values.tolist().index('(=Agent 2)') agent2_doses = filedata.columns.values.tolist()[0:dim_agent_2] agent1_doses = filedata.index.values.tolist()[0:dim_agent_1] lbl_agent_1 = filedata.loc['Agent1', agent2_doses[0]] lbl_agent_2 = filedata.loc['Agent2', agent2_doses[0]] unit_1 = filedata.loc['Unit1', agent2_doses[0]] unit_2 = filedata.loc['Unit2', agent2_doses[0]] title = filedata.loc['Title', agent2_doses[0]] for agent1 in agent1_doses: for agent2 in agent2_doses: response = filedata.loc[agent1, agent2] output_df.loc[idx, 'CELL_LINE'] = title output_df.loc[ idx, 'COMBINATION_ID'] = lbl_agent_1 + '.' + lbl_agent_2 output_df.loc[idx, 'COMPOUND_A'] = lbl_agent_1 output_df.loc[idx, 'COMPOUND_B'] = lbl_agent_2 output_df.loc[idx, 'DOSE_A'] = agent1 output_df.loc[idx, 'DOSE_B'] = agent2 output_df.loc[idx, 'UNIT_A'] = unit_1 output_df.loc[idx, 'UNIT_B'] = unit_2 output_df.loc[idx, 'RESPONSE'] = response if (agent1 == "0") and (agent2 == "0"): output_df.loc[idx, 'METHOD'] = 'CONTROL' elif (agent1 == "0") or (agent2 == "0"): output_df.loc[idx, 'METHOD'] = 'MONO' else: output_df.loc[idx, 'METHOD'] = 'COMBI' output_df.loc[idx, 'FILE_SOURCE'] = os.path.split(thisfile)[1] idx += 1 output_df.to_csv('therapy_expanded.csv', index=False)