Esempio n. 1
0
 def read_dist_out(self, SNP_Z):
     gene_SNP_out_w = dict()
     file = P.sed_file
     in_ = open(file)
     while True:
         ln = in_.readline()
         if ln == '':
             break
         sln1 = ln.split()
         ln = in_.readline()
         sln2 = ln.split()
         if int(sln1[1]) in {1, 2}:
             if sln1[3] not in gene_SNP_out_w:
                 gene_SNP_out_w[sln1[3]] = list()
             if sln2[3] not in gene_SNP_out_w:
                 gene_SNP_out_w[sln2[3]] = list()
             if sln1[2] in SNP_Z:
                 gene_SNP_out_w[sln1[3]].append((sln1[2], U.weight_f(int(sln1[5]))))
             if sln2[2] in SNP_Z:
                 gene_SNP_out_w[sln2[3]].append((sln2[2], U.weight_f(int(sln2[5]))))
         if int(sln1[1]) == 3:
             if sln1[3] not in gene_SNP_out_w:
                 gene_SNP_out_w[sln1[3]] = list()
             if sln1[2] in SNP_Z:
                 gene_SNP_out_w[sln1[3]].append((sln1[2], U.weight_f(int(sln1[5]))))
         if int(sln2[1]) == 3:
             if sln2[3] not in gene_SNP_out_w:
                 gene_SNP_out_w[sln2[3]] = list()
             if sln2[2] in SNP_Z:
                 gene_SNP_out_w[sln2[3]].append((sln2[2], U.weight_f(int(sln2[5]))))
     print('Distances SNP to exons, SNP outside exons and inside genes, from', file)
     return gene_SNP_out_w
Esempio n. 2
0
 def create_seq(self):
     self.seq_pos = U.read_exon()
     self.SNP_pos, _ = U.read_SNP()
     for c in range(24):
         self.seq_pos[c].extend(self.SNP_pos[c])
     [self.seq_pos[i].sort() for i in range(24)]
     self.SNP_io = self.SNP_in_gene()
Esempio n. 3
0
 def fit_distance(self):
     print('Distance Between SNP, all SNP')
     dist = self.read_distance()
     cx, cy = U.cumul_number(dist)
     param = U.curve_fit(cx, cy, U.gamma_cdf)
     U.gamma_draw_fit(cx,
                      cy,
                      U.gamma_cdf,
                      param,
                      title='Fit distance between SNP, ' +
                      P.datas[P.data_src])
Esempio n. 4
0
 def fit_lin_log(self, inf_pv, sup_pv):
     print(
         '\nStep 2- Compute correlation coefficient between log of SNP p-values'
     )
     print('Fixed limits of p-values', P.inf_pv, P.sup_pv)
     self.SNP_pv.sort()
     cx, cy = U.cumul_number(self.SNP_pv)
     cx, cy = cx[1:], cy[1:]
     i_min = np.where(cx > inf_pv)[0][0]
     i_max = np.where(cx < sup_pv)[0][-1]
     lx = np.log10(cx[i_min:i_max])
     ly = np.log10(cy[i_min:i_max])
     plt.figure()
     plt.title(
         P.data +
         ' linear adjustment of log p-value and log cumulative number')
     plt.xticks(ticks=[], labels=[])
     plt.yticks(ticks=[], labels=[])
     plt.plot(lx, ly)
     slope, intercept, r_value, p_value, std_err = stats.linregress(lx, ly)
     plt.plot(lx,
              lx * slope + intercept,
              label='R=' + '{:.4f}'.format(r_value))
     plt.legend()
     print('number of p-values', len(lx), 'out of a total of',
           len(self.SNP_pv))
     print('Real limits of p_values: [',
           cx[i_min],
           ', ',
           cx[i_max],
           ']',
           sep='')
     print('r_value:', r_value)
     print('p_value:', p_value)
     print('std_err:', std_err)
Esempio n. 5
0
 def outliers(self):
     SNP_pv = self.SNP_pv
     print(
         'Step 1- Choice of fork of p-values to eliminate outlier p-values')
     print('Quantiles are computed for p-values in', '[', P.inf_pv, ', ',
           P.sup_pv, ']')
     print(
         'Once choosen, update the parameter to compute correlation coefficient at step 2'
     )
     print(
         'Warning: Python cannot compute the Z-score for p-value < 5.552e-17'
     )
     quant = [
         0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.15, 0.2, 0.8,
         0.85, 0.9, 0.95, 0.99, 0.995, 0.999
     ]
     print('quantile\tvalue')
     for q in quant:
         print(q, np.quantile(SNP_pv, q), sep='\t')
     U.plot_histo(SNP_pv, 1000, title=P.data + ' Histogram of p-values')
Esempio n. 6
0
 def SNP_in_gene(self):
     seq_pos = U.read_gene()
     SNP_io = [dict() for i in range(24)]
     for c in range(24):
         seq_pos[c].extend(self.SNP_pos[c])
     [seq_pos[i].sort() for i in range(24)]
     for c in range(24):
         in_gene = set()
         for sq in seq_pos[c]:
             if sq[1] == 1:
                 in_gene.add(sq[2])
             elif sq[1] == 2:
                 in_gene.remove(sq[2])
             else:
                 SNP_io[c][sq[2]] = set(in_gene)
     return SNP_io
Esempio n. 7
0
def SNP_compute_Z():
    # log = True to get details of excluded p-values
    log = False
    _, SNP_pv = U.read_SNP(log)
    out = open(P.pvZ_file, mode='w')
    print('Infinity result of computing Z score')
    inf_pos, inf_neg = 0, 0
    for snp in SNP_pv:
        z = stats.norm.ppf(1 - SNP_pv[snp])
        if math.isinf(z):
            if z == math.inf:
                inf_pos += 1
            else:
                inf_neg += 1
            print(snp, z, SNP_pv[snp], sep='\t')
        else:
            out.write(snp + '\t' + str(SNP_pv[snp]) + '\t' + str(z) + '\n')
    print('Z computed from p-values of SNP:', inf_pos, '+inf, ', inf_neg, '-inf /', len(SNP_pv))
    out.close()
Esempio n. 8
0
 def __init__(self):
     _, SNP_pv_dict = U.read_SNP(log=log)
     self.SNP_pv = list(SNP_pv_dict.values())