def lmem(args, start, stop, output): print "Processing SNPs", start, "-", stop, "..." print "Reading phenotypes and RRM..." Y = np.genfromtxt(iolib.ropen(args.phenotype), missing_values="NA", dtype=np.float, skip_header=args.header) if len(Y.shape) == 1: tmp = np.empty((len(Y), 1), np.float) tmp[:, 0] = Y Y = tmp nsample, npheno = Y.shape genfile = iolib.genReader(args.genfile, nsample, args.linebuffer, start, stop) outf = csv.writer(iolib.wopen(output), delimiter="\t") K = np.genfromtxt(iolib.ropen(args.kinship), dtype=np.float) print "K.shape =", K.shape print "Y.shape =", Y.shape print nsample, "samples and", npheno, "phenotypes" missing = [] null_model = [] print "Calculating rotation matrices..." for mpheno in range(npheno): missing.append(np.logical_not(np.isnan(Y[:, mpheno]))) ii = missing[mpheno] null_model.append(lmem_func.get_delta(K[ii][:, ii], Y[ii, mpheno])) # Ut,denom,beta_null,sigma_g_null,loglik_null,delta_null = get_delta(K[ii][:,ii],Y[ii,mpheno])) print "done" print "Fitting LMMs..." time0 = time.time() processed = 0 for rsid1, rsid2, pos, G in genfile: output = [] nsnp = G.shape[1] for mpheno in range(npheno): ii = missing[mpheno] output.append( lmem_func.fitlmm(null_model[mpheno]['offset'], null_model[mpheno]['Ut'], null_model[mpheno]['denom'], G[ii], null_model[mpheno]['uty'])) for i in range(nsnp): outf.writerow([rsid1[i], pos[i], rsid2[i]] + sum([[ null_model[mpheno]['beta_null'], null_model[mpheno] ['sigma_g_null'], null_model[mpheno]['loglik_null'] ] + output[mpheno][0][i].tolist() + [ output[mpheno][1][i], output[mpheno][2][i], output[mpheno][3][i] ] for mpheno in range(npheno)], [])) processed += nsnp print processed, "loci processed" print "Took", time.time() - time0, "seconds"
def lmem(args,start,stop,output): print "Processing SNPs",start,"-",stop,"..." print "Reading phenotypes and RRM..." Y = np.genfromtxt(iolib.ropen(args.phenotype),missing_values="NA",dtype=np.float,skip_header=args.header) if len(Y.shape)==1: tmp = np.empty((len(Y),1),np.float) tmp[:,0] = Y Y = tmp nsample,npheno = Y.shape genfile = iolib.genReader(args.genfile,nsample,args.linebuffer,start,stop) outf = csv.writer(iolib.wopen(output),delimiter="\t") K = np.genfromtxt(iolib.ropen(args.kinship),dtype=np.float) print "K.shape =",K.shape print "Y.shape =",Y.shape print nsample,"samples and",npheno,"phenotypes" missing = [] null_model = [] print "Calculating rotation matrices..." for mpheno in range(npheno): missing.append(np.logical_not(np.isnan(Y[:,mpheno]))) ii = missing[mpheno] null_model.append(lmem_func.get_delta(K[ii][:,ii],Y[ii,mpheno])) # Ut,denom,beta_null,sigma_g_null,loglik_null,delta_null = get_delta(K[ii][:,ii],Y[ii,mpheno])) print "done" print "Fitting LMMs..." time0 = time.time() processed = 0 for rsid1,rsid2,pos,G in genfile: output = [] nsnp = G.shape[1] for mpheno in range(npheno): ii = missing[mpheno] output.append(lmem_func.fitlmm(null_model[mpheno]['offset'],null_model[mpheno]['Ut'],null_model[mpheno]['denom'],G[ii],null_model[mpheno]['uty'])) for i in range(nsnp): outf.writerow([rsid1[i],pos[i],rsid2[i]] +sum([[null_model[mpheno]['beta_null'],null_model[mpheno]['sigma_g_null'],null_model[mpheno]['loglik_null']] +output[mpheno][0][i].tolist()+[output[mpheno][1][i],output[mpheno][2][i],output[mpheno][3][i]] for mpheno in range(npheno)],[])) processed+=nsnp print processed,"loci processed" print "Took",time.time() - time0,"seconds"
def gen2dosage(fname,nsample): gen_dt = np.dtype([('rsid1',np.str_,50),('rsid2',np.str_,50),('pos',np.uint),('ref',np.str_,1),('alt',np.str_,1)]+ [("g"+str(idx),np.float) for idx in range(3*nsample)]) tmp = np.genfromtxt(iolib.ropen(fname),dtype=gen_dt) dosage = np.empty((tmp.shape[0],nsample),np.float32) for i in range(nsample): i1 = "g"+str(i*3+1) i2 = "g"+str(i*3+2) dosage[:,i] = tmp[i1] + 2*tmp[i2] return tmp['rsid1'],tmp['rsid2'],tmp['pos'],dosage
print processed, "loci processed" print "Took", time.time() - time0, "seconds" if __name__ == '__main__': # logger = mp.log_to_stderr() # logger.setLevel(logging.INFO) assert args.header >= 0 iolib.checkfile(args.kinship) iolib.checkfile(args.genfile) if args.output[-3:] == ".gz": args.output = args.output[:-3] outf = csv.writer(iolib.wopen(args.output + ".gz"), delimiter="\t") nl = iolib.nlines(iolib.ropen(args.genfile)) print nl, "SNPs" neach = int(math.ceil(nl / args.nprocess)) chunks = range(0, nl, neach) + [nl] print chunks pool = mp.Pool(processes=args.nprocess) Y = np.genfromtxt(iolib.ropen(args.phenotype), missing_values="NA", dtype=np.float, skip_header=args.header) if len(Y.shape) == 1: nsample = Y.shape[0] npheno = 1
print processed,"loci processed" print "Took",time.time() - time0,"seconds" if __name__ == '__main__': # logger = mp.log_to_stderr() # logger.setLevel(logging.INFO) assert args.header>=0 iolib.checkfile(args.kinship) iolib.checkfile(args.genfile) if args.output[-3:]==".gz": args.output = args.output[:-3] outf = csv.writer(iolib.wopen(args.output+".gz"),delimiter="\t") nl = iolib.nlines(iolib.ropen(args.genfile)) print nl,"SNPs" neach = int(math.ceil(nl/args.nprocess)) chunks = range(0,nl,neach) + [nl] print chunks pool = mp.Pool(processes=args.nprocess) Y = np.genfromtxt(iolib.ropen(args.phenotype),missing_values="NA",dtype=np.float,skip_header=args.header) if len(Y.shape)==1: nsample = Y.shape[0] npheno = 1 else: nsample,npheno = Y.shape