Example #1
0
    # loops over genes
    for gene in genes:

        print '.. Analyzing gene %s' % gene
        gene_group = fout.create_group(gene)

        print '   .. Importing data'
        try:
            Xc, info = data.getGenotypes(gene, return_info=True)
        except:
            print 'Error: no SNPs found in cis'
            continue
        Y = data.getPhenotypes(gene, peer=opt.peer, gauss=True)
        o = gene_group.create_group('snp_info')
        smartDumpDictHdf5(info, o)

        if opt.perm:
            if opt.seed is not None:
                sp.random.seed(opt.seed)
            idxs = sp.random.permutation(Xc.shape[0])
            Xc = Xc[idxs, :]

        if 1:
            print "   .. single trait analysis"
            lmm = QTL.test_lmm(Xc, Y, K=K)
            pv = lmm.getPv()
            RV = {}
            RV['pv'] = pv
            RV['qv'] = FDR.qvalues(pv)
            RV['beta'] = lmm.getBetaSNP()
                except:
                    print geneID, 'failed'
                    continue
                #append the temp table into the big table
                for key in temp.keys():
                    smartAppend(table, key, temp[key])
            f.close()

        for key in table.keys():
            table[key] = sp.array(table[key])

        print '.. correct for multiple testing'
        table['pv_bonf'][table['pv_bonf'] > 1] = 1.
        table['qv_all'] = FDR.qvalues(table['pv_bonf'])
        print 'no eQTLs at FDR 0.10:', (table['qv_all'] < 0.10).sum()
        print 'no genes:', table['qv_all'].shape[0]

        fout = h5py.File(out_file, 'w')
        smartDumpDictHdf5(table, fout)
        fout.close()

    else:

        f = h5py.File(out_file, 'r')
        R = {}
        for key in f.keys():
            R[key] = f[key][:]
        f.close()

        pdb.set_trace()
Example #3
0
    Mloc = sp.loadtxt(f_geneloc, dtype=object, delimiter='\t')
    assert (Mloc[1:, 0] == M[1:, 0]).all(), 'gene id do not match'

    # filtering genes of the following criteria:
    # 1. on autosomal chromosome
    # 2. expressed in at least half of the samples
    #       (where expressed means more than 5 counts)
    chrom = Mloc[1:, 1]
    Iaut = sp.array([(chrom[i] not in ['M', 'X', 'Y'])
                     for i in range(chrom.shape[0])])
    expr = M[1:][:, 1:].astype(float).T
    Iexpr = ((expr > cut1).mean(0) > cut2)
    Igene = sp.logical_and(Iaut, Iexpr)

    # export data
    RV = {}
    RV['matrix'] = expr[:, Igene]
    RV['col_header'] = {}
    RV['col_header']['gene_ID'] = M[1:, 0][Igene]
    RV['col_header']['gene_chrom'] = chrom[Igene].astype(float)
    RV['col_header']['gene_start'] = Mloc[1:, 2][Igene].astype(float)
    RV['col_header']['gene_end'] = Mloc[1:, 3][Igene].astype(float)
    RV['row_header'] = {}
    RV['row_header']['sample_ID'] = M[0, 1:]

    #pdb.set_trace()

    fout = h5py.File(out_file, 'w')
    smartDumpDictHdf5(RV, fout)
    fout.close()
Example #4
0
    fout = h5py.File(out_file, 'w')
    genoGroup = fout.create_group('genotypes')

    RV = {}

    n_chroms = 22
    for chrom_i in range(1, n_chroms+1):

        print '.. copying chromosome %d'%chrom_i

        # load genotype
        in_file = os.path.join(in_dir, 'chrom%d.h5' % chrom_i)
        fin = h5py.File(in_file, 'r')
        chromGroup = genoGroup.create_group('chrom%d' % chrom_i)
        chromGroup.create_dataset('matrix',data=fin['genotypes']['matrix'][:].T)
        chromGroup.create_dataset('RRM',data=fin['RRM'][:])
        h5py.h5o.copy(fin['genotypes'].id, 'col_headers', chromGroup.id, 'col_headers')
        h5py.h5o.copy(fin['genotypes'].id, 'row_headers', chromGroup.id, 'row_headers')

        # summing up Kpop
        if 'RRM' not in RV.keys():
            RV['RRM']  = fin['RRM'][:]
        else:
            RV['RRM'] += fin['RRM'][:]

        fin.close()

    smartDumpDictHdf5(RV, genoGroup)
    fout.close()