index_col=None) print 'Exclude:' print mapper.exclude.head() if 'ID' not in mapper.exclude.columns and ( 'CHR' not in mapper.exclude.columns or 'bp' not in mapper.exclude.columns): raise ValueError( '{} table does not have ID or CHR,bp columns'.format( args.snp_id_exc)) mapper.load(args.mapper) # Load the mapper files mapper.load_flip(args.mapper, encode=args.encoded) # often args.encoded is is null mapper.cluster = args.cluster # Is n by default mapper.node = args.node Analyser = HaseAnalyser() pard = [] with Timer() as t: for i, j in enumerate(args.derivatives): pard.append(Reader('partial')) pard[i].start(j, study_name=args.study_name[i]) pard[i].folder.load() print "time to set PD is {}s".format(t.secs) PD = [ False if isinstance(i.folder._data.b4, type(None)) else True for i in pard ]
#ARG_CHECKER.check(args,mode='meta-stage') ##### Init data readers ##### if args.derivatives is None: raise ValueError('For meta-stage analysis partial derivatives data are required!') mapper=Mapper() mapper.chunk_size=MAPPER_CHUNK_SIZE mapper.genotype_names=args.study_name mapper.reference_name=args.ref_name mapper.load(args.mapper) mapper.load_flip(args.mapper) mapper.cluster=args.cluster mapper.node=args.node Analyser=HaseAnalyser() pard=[] for i,j in enumerate(args.derivatives): pard.append(Reader('partial') ) pard[i].start(j, study_name=args.study_name[i]) pard[i].folder.load() PD=[False if isinstance(i.folder._data.b4, type(None) ) else True for i in pard] if np.sum(PD)!=len(pard) and np.sum(PD)!=0: raise ValueError('All study should have b4 data for partial derivatives!')
"--out", type=str, required=True, help="path to save result folder") parser.add_argument( "-df", type=float, default=None, help="degree of freedom = ( #subjects in study - #covariates - 1 )") parser.add_argument("-N", type=int, default=None, help="file number to read") #TODO (low) add reference panel args = parser.parse_args() Analyser = HaseAnalyser() print args Analyser.DF = args.df Analyser.result_path = args.r Analyser.file_number = args.N results = OrderedDict() results['RSID'] = np.array([]) results['p_value'] = np.array([]) results['t-stat'] = np.array([]) results['phenotype'] = np.array([]) results['SE'] = np.array([]) results['MAF'] = np.array([]) results['BETA'] = np.array([])
from hdgwas.tools import Timer,HaseAnalyser, Reference import argparse import pandas as pd import numpy as np if __name__=="__main__": os.environ['HASEDIR']=os.path.dirname(os.path.dirname(os.path.abspath(__file__))) parser = argparse.ArgumentParser(description='Script analyse results of HASE') parser.add_argument("-r", required=True,help="path to hase results") parser.add_argument("-o", "--out", type=str, required=True,help="path to save result folder") parser.add_argument("-df", type=float,default=None, help="degree of freedom = ( #subjects in study - #covariates - 1 )") #TODO (low) add reference panel args = parser.parse_args() Analyser=HaseAnalyser() print args Analyser.DF=args.df Analyser.result_path=args.r results={} results['RSID']=np.array([]) results['p_value']=np.array([]) results['t-stat']=np.array([]) results['phenotype']=np.array([]) results['SE']=np.array([]) results['MAF']=np.array([]) results['BETA'] = np.array([]) while True: