def lmem(args, start, stop, output): print "Processing SNPs", start, "-", stop, "..." print "Reading phenotypes and RRM..." Y = np.genfromtxt(iolib.ropen(args.phenotype), missing_values="NA", dtype=np.float, skip_header=args.header) if len(Y.shape) == 1: tmp = np.empty((len(Y), 1), np.float) tmp[:, 0] = Y Y = tmp nsample, npheno = Y.shape genfile = iolib.genReader(args.genfile, nsample, args.linebuffer, start, stop) outf = csv.writer(iolib.wopen(output), delimiter="\t") K = np.genfromtxt(iolib.ropen(args.kinship), dtype=np.float) print "K.shape =", K.shape print "Y.shape =", Y.shape print nsample, "samples and", npheno, "phenotypes" missing = [] null_model = [] print "Calculating rotation matrices..." for mpheno in range(npheno): missing.append(np.logical_not(np.isnan(Y[:, mpheno]))) ii = missing[mpheno] null_model.append(lmem_func.get_delta(K[ii][:, ii], Y[ii, mpheno])) # Ut,denom,beta_null,sigma_g_null,loglik_null,delta_null = get_delta(K[ii][:,ii],Y[ii,mpheno])) print "done" print "Fitting LMMs..." time0 = time.time() processed = 0 for rsid1, rsid2, pos, G in genfile: output = [] nsnp = G.shape[1] for mpheno in range(npheno): ii = missing[mpheno] output.append( lmem_func.fitlmm(null_model[mpheno]['offset'], null_model[mpheno]['Ut'], null_model[mpheno]['denom'], G[ii], null_model[mpheno]['uty'])) for i in range(nsnp): outf.writerow([rsid1[i], pos[i], rsid2[i]] + sum([[ null_model[mpheno]['beta_null'], null_model[mpheno] ['sigma_g_null'], null_model[mpheno]['loglik_null'] ] + output[mpheno][0][i].tolist() + [ output[mpheno][1][i], output[mpheno][2][i], output[mpheno][3][i] ] for mpheno in range(npheno)], [])) processed += nsnp print processed, "loci processed" print "Took", time.time() - time0, "seconds"
def lmem(args,start,stop,output): print "Processing SNPs",start,"-",stop,"..." print "Reading phenotypes and RRM..." Y = np.genfromtxt(iolib.ropen(args.phenotype),missing_values="NA",dtype=np.float,skip_header=args.header) if len(Y.shape)==1: tmp = np.empty((len(Y),1),np.float) tmp[:,0] = Y Y = tmp nsample,npheno = Y.shape genfile = iolib.genReader(args.genfile,nsample,args.linebuffer,start,stop) outf = csv.writer(iolib.wopen(output),delimiter="\t") K = np.genfromtxt(iolib.ropen(args.kinship),dtype=np.float) print "K.shape =",K.shape print "Y.shape =",Y.shape print nsample,"samples and",npheno,"phenotypes" missing = [] null_model = [] print "Calculating rotation matrices..." for mpheno in range(npheno): missing.append(np.logical_not(np.isnan(Y[:,mpheno]))) ii = missing[mpheno] null_model.append(lmem_func.get_delta(K[ii][:,ii],Y[ii,mpheno])) # Ut,denom,beta_null,sigma_g_null,loglik_null,delta_null = get_delta(K[ii][:,ii],Y[ii,mpheno])) print "done" print "Fitting LMMs..." time0 = time.time() processed = 0 for rsid1,rsid2,pos,G in genfile: output = [] nsnp = G.shape[1] for mpheno in range(npheno): ii = missing[mpheno] output.append(lmem_func.fitlmm(null_model[mpheno]['offset'],null_model[mpheno]['Ut'],null_model[mpheno]['denom'],G[ii],null_model[mpheno]['uty'])) for i in range(nsnp): outf.writerow([rsid1[i],pos[i],rsid2[i]] +sum([[null_model[mpheno]['beta_null'],null_model[mpheno]['sigma_g_null'],null_model[mpheno]['loglik_null']] +output[mpheno][0][i].tolist()+[output[mpheno][1][i],output[mpheno][2][i],output[mpheno][3][i]] for mpheno in range(npheno)],[])) processed+=nsnp print processed,"loci processed" print "Took",time.time() - time0,"seconds"
#!/usr/bin/python -O import sys,time,os,glob,pickle,cPickle, csv,gzip,numpy as np,resource,argparse,string import numpy as np import iolib parser = argparse.ArgumentParser(description='calculates the RRM on plink binary file set') parser.add_argument('plinkfile', metavar='plinkfile', type=str, help='a binary plink file set') parser.add_argument('-snps', metavar='snps', type=str,default='', help='list of snp ids to use for rrm calculation') parser.add_argument('-out', metavar='out', type=str,default='', help='outfile') parser.add_argument('--ibs', metavar='out', type=bool,default=False, help='use kinship coefficient rather than realised relationship') args = parser.parse_args() if args.out=='': outfile = csv.writer(iolib.wopen(args.plinkfile+".rrm.gz"),delimiter="\t") else: outfile = csv.writer(iolib.wopen(args.out+".rrm.gz"),delimiter="\t") if args.snps!='': snps = iolib.scan(args.snps) print "Calculating RRM from subset of",len(snps),"SNPs" else: snps = None infile = iolib.plinkReader(args.plinkfile,snps=snps) n = infile.nsample rrm = np.zeros((n,n),np.float) rrm_diag = np.zeros(n,np.float) print "Calculating RRM..."
processed += nsnp print processed, "loci processed" print "Took", time.time() - time0, "seconds" if __name__ == '__main__': # logger = mp.log_to_stderr() # logger.setLevel(logging.INFO) assert args.header >= 0 iolib.checkfile(args.kinship) iolib.checkfile(args.genfile) if args.output[-3:] == ".gz": args.output = args.output[:-3] outf = csv.writer(iolib.wopen(args.output + ".gz"), delimiter="\t") nl = iolib.nlines(iolib.ropen(args.genfile)) print nl, "SNPs" neach = int(math.ceil(nl / args.nprocess)) chunks = range(0, nl, neach) + [nl] print chunks pool = mp.Pool(processes=args.nprocess) Y = np.genfromtxt(iolib.ropen(args.phenotype), missing_values="NA", dtype=np.float, skip_header=args.header) if len(Y.shape) == 1:
processed+=nsnp print processed,"loci processed" print "Took",time.time() - time0,"seconds" if __name__ == '__main__': # logger = mp.log_to_stderr() # logger.setLevel(logging.INFO) assert args.header>=0 iolib.checkfile(args.kinship) iolib.checkfile(args.genfile) if args.output[-3:]==".gz": args.output = args.output[:-3] outf = csv.writer(iolib.wopen(args.output+".gz"),delimiter="\t") nl = iolib.nlines(iolib.ropen(args.genfile)) print nl,"SNPs" neach = int(math.ceil(nl/args.nprocess)) chunks = range(0,nl,neach) + [nl] print chunks pool = mp.Pool(processes=args.nprocess) Y = np.genfromtxt(iolib.ropen(args.phenotype),missing_values="NA",dtype=np.float,skip_header=args.header) if len(Y.shape)==1: nsample = Y.shape[0] npheno = 1 else: