Ejemplo n.º 1
0
def lmem(args, start, stop, output):
    print "Processing SNPs", start, "-", stop, "..."
    print "Reading phenotypes and RRM..."

    Y = np.genfromtxt(iolib.ropen(args.phenotype),
                      missing_values="NA",
                      dtype=np.float,
                      skip_header=args.header)
    if len(Y.shape) == 1:
        tmp = np.empty((len(Y), 1), np.float)
        tmp[:, 0] = Y
        Y = tmp
    nsample, npheno = Y.shape

    genfile = iolib.genReader(args.genfile, nsample, args.linebuffer, start,
                              stop)
    outf = csv.writer(iolib.wopen(output), delimiter="\t")
    K = np.genfromtxt(iolib.ropen(args.kinship), dtype=np.float)
    print "K.shape =", K.shape
    print "Y.shape =", Y.shape
    print nsample, "samples and", npheno, "phenotypes"
    missing = []
    null_model = []

    print "Calculating rotation matrices..."
    for mpheno in range(npheno):
        missing.append(np.logical_not(np.isnan(Y[:, mpheno])))
        ii = missing[mpheno]

        null_model.append(lmem_func.get_delta(K[ii][:, ii], Y[ii, mpheno]))

    #    Ut,denom,beta_null,sigma_g_null,loglik_null,delta_null = get_delta(K[ii][:,ii],Y[ii,mpheno]))
    print "done"

    print "Fitting LMMs..."
    time0 = time.time()
    processed = 0
    for rsid1, rsid2, pos, G in genfile:
        output = []
        nsnp = G.shape[1]

        for mpheno in range(npheno):
            ii = missing[mpheno]
            output.append(
                lmem_func.fitlmm(null_model[mpheno]['offset'],
                                 null_model[mpheno]['Ut'],
                                 null_model[mpheno]['denom'], G[ii],
                                 null_model[mpheno]['uty']))

        for i in range(nsnp):
            outf.writerow([rsid1[i], pos[i], rsid2[i]] + sum([[
                null_model[mpheno]['beta_null'], null_model[mpheno]
                ['sigma_g_null'], null_model[mpheno]['loglik_null']
            ] + output[mpheno][0][i].tolist() + [
                output[mpheno][1][i], output[mpheno][2][i], output[mpheno][3][i]
            ] for mpheno in range(npheno)], []))

        processed += nsnp
        print processed, "loci processed"
    print "Took", time.time() - time0, "seconds"
Ejemplo n.º 2
0
Archivo: lmem.py Proyecto: jaredo/gwas
def lmem(args,start,stop,output):
    print "Processing SNPs",start,"-",stop,"..."
    print "Reading phenotypes and RRM..."

    Y = np.genfromtxt(iolib.ropen(args.phenotype),missing_values="NA",dtype=np.float,skip_header=args.header)
    if len(Y.shape)==1:
        tmp = np.empty((len(Y),1),np.float)
        tmp[:,0] = Y
        Y = tmp
    nsample,npheno = Y.shape

    genfile = iolib.genReader(args.genfile,nsample,args.linebuffer,start,stop)
    outf = csv.writer(iolib.wopen(output),delimiter="\t")
    K = np.genfromtxt(iolib.ropen(args.kinship),dtype=np.float)
    print "K.shape =",K.shape
    print "Y.shape =",Y.shape
    print nsample,"samples and",npheno,"phenotypes"
    missing = []
    null_model = []

    print "Calculating rotation matrices..."
    for mpheno in range(npheno):
        missing.append(np.logical_not(np.isnan(Y[:,mpheno])))
        ii = missing[mpheno]

        null_model.append(lmem_func.get_delta(K[ii][:,ii],Y[ii,mpheno]))

    #    Ut,denom,beta_null,sigma_g_null,loglik_null,delta_null = get_delta(K[ii][:,ii],Y[ii,mpheno]))
    print "done"

    print "Fitting LMMs..." 
    time0 = time.time()
    processed = 0
    for rsid1,rsid2,pos,G in genfile:
        output = []
        nsnp = G.shape[1]

        for mpheno in range(npheno):    
            ii = missing[mpheno]
            output.append(lmem_func.fitlmm(null_model[mpheno]['offset'],null_model[mpheno]['Ut'],null_model[mpheno]['denom'],G[ii],null_model[mpheno]['uty']))

        for i in range(nsnp):
            outf.writerow([rsid1[i],pos[i],rsid2[i]]
                          +sum([[null_model[mpheno]['beta_null'],null_model[mpheno]['sigma_g_null'],null_model[mpheno]['loglik_null']] 
                                +output[mpheno][0][i].tolist()+[output[mpheno][1][i],output[mpheno][2][i],output[mpheno][3][i]] for mpheno in range(npheno)],[]))

        processed+=nsnp
        print processed,"loci processed"
    print "Took",time.time() - time0,"seconds"
Ejemplo n.º 3
0
def gen2dosage(fname,nsample):
    gen_dt = np.dtype([('rsid1',np.str_,50),('rsid2',np.str_,50),('pos',np.uint),('ref',np.str_,1),('alt',np.str_,1)]+
                      [("g"+str(idx),np.float) for idx in range(3*nsample)])
                       
    tmp = np.genfromtxt(iolib.ropen(fname),dtype=gen_dt)
    dosage = np.empty((tmp.shape[0],nsample),np.float32)
    for i in range(nsample):
        i1 = "g"+str(i*3+1)
        i2 = "g"+str(i*3+2)
        dosage[:,i] = tmp[i1] + 2*tmp[i2]
    return tmp['rsid1'],tmp['rsid2'],tmp['pos'],dosage
Ejemplo n.º 4
0
        print processed, "loci processed"
    print "Took", time.time() - time0, "seconds"


if __name__ == '__main__':
    # logger = mp.log_to_stderr()
    # logger.setLevel(logging.INFO)
    assert args.header >= 0
    iolib.checkfile(args.kinship)
    iolib.checkfile(args.genfile)

    if args.output[-3:] == ".gz": args.output = args.output[:-3]

    outf = csv.writer(iolib.wopen(args.output + ".gz"), delimiter="\t")

    nl = iolib.nlines(iolib.ropen(args.genfile))
    print nl, "SNPs"
    neach = int(math.ceil(nl / args.nprocess))
    chunks = range(0, nl, neach) + [nl]
    print chunks

    pool = mp.Pool(processes=args.nprocess)

    Y = np.genfromtxt(iolib.ropen(args.phenotype),
                      missing_values="NA",
                      dtype=np.float,
                      skip_header=args.header)

    if len(Y.shape) == 1:
        nsample = Y.shape[0]
        npheno = 1
Ejemplo n.º 5
0
Archivo: lmem.py Proyecto: jaredo/gwas
        print processed,"loci processed"
    print "Took",time.time() - time0,"seconds"


if __name__ == '__main__':
    # logger = mp.log_to_stderr()
    # logger.setLevel(logging.INFO)
    assert args.header>=0
    iolib.checkfile(args.kinship)
    iolib.checkfile(args.genfile)

    if args.output[-3:]==".gz": args.output = args.output[:-3]

    outf = csv.writer(iolib.wopen(args.output+".gz"),delimiter="\t")

    nl = iolib.nlines(iolib.ropen(args.genfile))
    print nl,"SNPs"
    neach = int(math.ceil(nl/args.nprocess))
    chunks = range(0,nl,neach) + [nl]
    print chunks

    pool = mp.Pool(processes=args.nprocess)

    Y = np.genfromtxt(iolib.ropen(args.phenotype),missing_values="NA",dtype=np.float,skip_header=args.header)

    if len(Y.shape)==1:
        nsample = Y.shape[0]
        npheno = 1
    else:
        nsample,npheno = Y.shape