SNPs_index, keys = mapper.get() else: ch = mapper.chunk_pop() if ch is None: SNPs_index = None break SNPs_index, keys = mapper.get(chunk_number=ch) if isinstance(SNPs_index, type(None)): break Analyser.rsid = keys if np.sum(PD) == 0: genotype = np.array([]) with Timer() as t_g: genotype = merge_genotype(gen, SNPs_index, mapper) genotype = genotype[:, row_index[0]] print "Time to get G {}s".format(t_g.secs) #TODO (low) add interaction a_test = np.array([]) b_cov = np.array([]) C = np.array([]) a_cov = np.array([]) b4 = np.array([]) if args.protocol is not None: if protocol.enable: regression_model = protocol.regression_model() else: regression_model = None
def haseregression(phen, gen, cov, mapper, Analyser, maf, intercept=True, interaction=None): g = tuple([i.folder._data for i in gen]) row_index, ids = study_indexes(phenotype=phen.folder._data, genotype=g, covariates=cov.folder._data) if mapper is not None: SNP = [0, 0, mapper.n_keys] else: SNP = [0, 0, 'unknown'] covariates = cov.get_next(index=row_index[2]) a_cov = A_covariates(covariates, intercept=intercept) while True: gc.collect() if mapper is not None: if mapper.cluster == 'n': SNPs_index, keys = mapper.get() else: ch = mapper.chunk_pop() if ch is None: SNPs_index = None break SNPs_index, keys = mapper.get(chunk_number=ch) if isinstance(SNPs_index, type(None)): break Analyser.rsid = keys else: SNPs_index = None with Timer() as t: genotype = merge_genotype(gen, SNPs_index, mapper) print('time to read and merge genotype {}s'.format(t.secs)) gc.collect() if genotype is None: print 'All genotype processed!' break SNP[0] += genotype.shape[0] genotype = genotype[:, row_index[0]] if mapper is None: Analyser.rsid = np.array(range(genotype.shape[0])) MAF = np.mean(genotype, axis=1) / 2 STD = np.std(genotype, axis=1) if maf != 0: filter = (MAF > maf) & (MAF < 1 - maf) & (MAF != 0.5) genotype = genotype[filter, :] Analyser.MAF = MAF[filter] Analyser.rsid = Analyser.rsid[filter] if genotype.shape[0] == 0: print 'NO SNPs > MAF' continue else: Analyser.MAF = MAF SNP[1] += genotype.shape[0] while True: phenotype = phen.get_next(index=row_index[1]) if isinstance(phenotype, type(None)): phen.folder.processed = 0 print 'All phenotypes processed!' break if phen.permutation: np.random.shuffle(phenotype) b_cov = B_covariates(covariates, phenotype, intercept=intercept) C = C_matrix(phenotype) if interaction is not None: pass a_test = A_tests(covariates, genotype, intercept=intercept) a_inv = A_inverse(a_cov, a_test) N_con = a_inv.shape[1] - 1 DF = (phenotype.shape[0] - a_inv.shape[1]) b4 = B4(phenotype, genotype) t_stat, SE = HASE(b4, a_inv, b_cov, C, N_con, DF) print('Read {}, processed {}, total {}'.format( SNP[0], SNP[1], SNP[2])) Analyser.t_stat = t_stat Analyser.SE = SE if mapper is not None and mapper.cluster == 'y': Analyser.cluster = True Analyser.chunk = ch Analyser.node = mapper.node[1] if phen.permutation: Analyser.permutation = True Analyser.save_result( phen.folder._data.names[phen.folder._data.start:phen.folder. _data.finish]) t_stat = None Analyser.t_stat = None del b4 del C del b_cov del a_inv del a_test del t_stat gc.collect() if Analyser.cluster: np.save( os.path.join(Analyser.out, str(Analyser.node) + '_node_RSID.npy'), Analyser.rsid_dic)
while True: if args.cluster=='n': SNPs_index, keys=mapper.get_next() else: chunk=mapper.chunk_pop() if chunk is None: SNPs_index=None break print chunk SNPs_index, keys=mapper.get_chunk(chunk) if SNPs_index is None: break RSID.append(keys) data=merge_genotype(gen, SNPs_index) #TODO (high) add mapper print data.shape if args.cluster=='n': h5_gen_file = tables.openFile( os.path.join(args.out,str(hdf5_iter)+'_'+h5_name+'.h5'), 'w', title=args.save_name) else:#TODO (high) check! h5_gen_file = tables.openFile( os.path.join(args.out,str(chunk[0])+'_' +str(chunk[1])+'_'+h5_name+'.h5'), 'w', title=args.save_name) hdf5_iter+=1 atom = tables.Int8Atom() # TODO (low) check data format genotype = h5_gen_file.createCArray(h5_gen_file.root, 'genotype', atom, (data.shape), title='Genotype', filters=pytable_filter) genotype[:] = data
def haseregression(phen,gen,cov, mapper, Analyser, maf,intercept=True): g=tuple( [i.folder._data for i in gen ] ) row_index, ids = study_indexes(phenotype=phen.folder._data, genotype=g, covariates=cov.folder._data) if mapper is not None: SNP=[0,0,mapper.n_keys] else: SNP=[0,0,'unknown'] covariates=cov.get_next(index=row_index[2]) a_cov=A_covariates(covariates,intercept=intercept) while True: gc.collect() if mapper is not None: if mapper.cluster=='n': SNPs_index, keys=mapper.get() else: ch=mapper.chunk_pop() if ch is None: SNPs_index=None break print ch SNPs_index, keys=mapper.get(chunk_number=ch) if isinstance(SNPs_index, type(None)): break Analyser.rsid=keys else: SNPs_index=None with Timer() as t: genotype=merge_genotype(gen, SNPs_index, mapper) print ('time to read and merge genotype {}s'.format(t.secs)) gc.collect() if genotype is None: print 'All genotype processed!' break SNP[0]+=genotype.shape[0] genotype=genotype[:,row_index[0]] if mapper is None: Analyser.rsid=np.array(range(genotype.shape[0])) MAF=np.mean(genotype, axis=1)/2 if maf!=0: filter=(MAF>maf) & (MAF<1-maf) & (MAF!=0.5) genotype=genotype[filter,:] Analyser.MAF=MAF[filter] Analyser.rsid=Analyser.rsid[filter] if genotype.shape[0]==0: print 'NO SNPs > MAF' continue else: Analyser.MAF=MAF SNP[1]+=genotype.shape[0] while True: phenotype=phen.get_next(index=row_index[1]) if isinstance(phenotype, type(None)): phen.folder.processed=0 print 'All phenotypes processed!' break if phen.permutation: np.random.shuffle(phenotype) b_cov=B_covariates(covariates,phenotype,intercept=intercept) C=C_matrix(phenotype) a_test=A_tests(covariates,genotype,intercept=intercept) a_inv=A_inverse(a_cov,a_test) N_con=a_inv.shape[1] - 1 DF=(phenotype.shape[0] - a_inv.shape[1]) b4=B4(phenotype,genotype) t_stat, SE=HASE(b4, a_inv, b_cov, C, N_con, DF) print('Read {}, processed {}, total {}'.format(SNP[0],SNP[1],SNP[2] )) Analyser.t_stat=t_stat Analyser.SE=SE if mapper is not None and mapper.cluster == 'y': Analyser.cluster=True Analyser.chunk=ch Analyser.node=mapper.node[1] if phen.permutation: Analyser.permutation=True Analyser.save_result( phen.folder._data.names[phen.folder._data.start:phen.folder._data.finish]) t_stat=None Analyser.t_stat=None del b4 del C del b_cov del a_inv del a_test del t_stat gc.collect() if Analyser.cluster: np.save(os.path.join(Analyser.out,str(Analyser.node)+'_node_RSID.npy'),Analyser.rsid_dic)
SNPs_index, keys=mapper.get() else: ch=mapper.chunk_pop() if ch is None: SNPs_index=None break print ch SNPs_index, keys=mapper.get(chunk_number=ch) if isinstance(SNPs_index, type(None)): break Analyser.rsid=keys if np.sum(PD)==0: genotype=np.array([]) genotype=merge_genotype(gen, SNPs_index, mapper, flip_flag=False) genotype=genotype[:,row_index[0]] #TODO (low) add interaction a_test=np.array([]) b_cov=np.array([]) C=np.array([]) a_cov=np.array([]) b4=np.array([]) if args.protocol is not None: if protocol.enable: regression_model=protocol.regression_model() else: regression_model=None