Exemple #1
0
def randomsample_qtl(null,n_jobs,rep=20000):
    screen_output.run_out('random sampling...')
    cdist = []
    ##multiple process####
    p = Pool(processes=n_jobs, initializer=init_child, initargs=(null,))
    result = p.imap(ransample_qtl,range(rep), chunksize=rep//args.n_jobs)
    #result.wait()
    p.close()
    p.join()
    cdist = [r for r in result]
    return sorted(cdist)
Exemple #2
0
 def null_perfect(self, n_jobs, perfect_max, update_null=True, verbose=1):
     #null distribution of IBDs under perfect data
     if verbose == 1:
         screen_output.run_out(
             "calculating null distribution using perfect data approximation..."
         )
     p = Pool(processes=n_jobs)
     inv_list = []
     rep = pow(2, 2 * len(self.family.invnf))
     #print rep
     if rep > perfect_max:
         inv_list = random.sample(xrange(rep), perfect_max)
     else:
         inv_list = range(rep)
     self.pinv_sall = []
     chunk_size = len(inv_list) // n_jobs if len(inv_list) >= n_jobs else 1
     result = p.imap(null_inv,
                     product(inv_list, [(self.family, self)]),
                     chunksize=chunk_size)
     p.close()
     p.join()
     null_ibd = []
     invs = []
     for r in result:
         null_ibd.append((float('%.9f' % r[0]), float('%.9f' % r[1])))
         invs.append(int(r[2]))
     #print len(null_ibd)
     if self.pinv_sall == []:
         self.pinv_sall = [0 for x in xrange(rep)]
         for idx, inv in enumerate(invs):
             self.pinv_sall[inv] = null_ibd[idx][1]
     if update_null:
         self.null_ibd = null_ibd
     #expectation and standard deviation
     ibd_pair = [x[0] for x in null_ibd]
     ibd_all = [x[1] for x in null_ibd]
     self.null_mean = float('%.9f' % (sum(ibd_pair) / len(ibd_pair)))
     self.null_std = self.std(ibd_pair, self.null_mean, False)
     self.sall_null_mean = float('%.9f' % (sum(ibd_all) / len(ibd_all)))
     self.sall_null_std = self.std(ibd_all, self.sall_null_mean, False)
Exemple #3
0
 def null_permute(self, n_jobs, perfect_max, conditional_prob=[]):
     ##get pairwise IBD under each prior inheritance vector while maintain founder genotype
     screen_output.run_out(
         "calculating theoretical null distribution for rvibd...")
     paircount = len(self.family.pairs)
     self.family.expect_pair_ibd = [0 for x in range(paircount)]
     self.family.prior = [[0 for y in range(paircount)]
                          for x in range(paircount)]
     rep = pow(2, 2 * len(self.family.nonfounder))
     #print rep
     #Get all possible parental genotype configurations
     foundergt_list = []
     foundergt_dic = {}
     if conditional_prob == []:
         conditional_prob = self.family.conditional_prob
     for f in self.family.founder:
         foundergt_list += self.family.fam_dict[f]['gt'][2 *
                                                         self.family.mid:2 *
                                                         self.family.mid +
                                                         2]
     try:
         #if there are missing
         gt_id = conditional_prob['~combined'][1]
         for gt, prob in conditional_prob['~combined'][0].iteritems():
             for f in set(self.family.missing_all) & set(
                     self.family.founder):
                 f_idx = self.family.founder.index(f)
                 idx = gt_id.index(f)
                 foundergt_list[2 * f_idx:2 * f_idx + 2] = [
                     ord(x) - 96 for x in gt[2 * idx:2 * idx + 2]
                 ]
             key_fgt = []
             for i in range(len(self.family.founder)):
                 key_fgt.extend(sorted(foundergt_list[2 * i:2 * i + 2]))
             key_fgt = tuple(key_fgt)
             if key_fgt not in foundergt_dic:
                 foundergt_dic[key_fgt] = prob
             else:
                 foundergt_dic[key_fgt] += prob
     except:
         foundergt_dic[tuple(foundergt_list)] = 1
     local_perfect_max = int(perfect_max / len(foundergt_dic.keys()))
     p = Pool(processes=n_jobs)
     for fgt, prob in foundergt_dic.iteritems():
         #For each possible parental genotypes
         #Calculate corresponding IBD given inheritance vector
         sample_flag = False
         full_permutation = [list(fgt)]
         if len(set(fgt)) == 1:
             full_permutation_inv_raw = [(0, list(fgt))]
         else:
             full_permutation_inv_raw = xrange(
                 rep)  #list(product(xrange(rep),full_permutation))
         if len(full_permutation_inv_raw) > local_perfect_max:
             #sample_flag=True
             full_permutation_inv_tmp = random.sample(
                 xrange(rep), local_perfect_max)
         else:
             full_permutation_inv_tmp = full_permutation_inv_raw
         if isinstance(full_permutation_inv_tmp[0], tuple):
             full_permutation_inv = full_permutation_inv_tmp
         else:
             full_permutation_inv = list(
                 product(full_permutation_inv_tmp, full_permutation))
         len_total = len(full_permutation_inv)
         csize = len_total // n_jobs
         if csize < 1:
             csize = 1
         #p = Pool(processes=n_jobs)
         result = p.imap(null_generator_pairibd,
                         [(i, self) for i in full_permutation_inv],
                         chunksize=csize)
         pair_ibd = [r for r in result]
         #start n seperate processes
         for tmp_pair_ibd in set(pair_ibd):
             count = pair_ibd.count(tmp_pair_ibd)
             #print tmp_pair_ibd
             #print count
             tmp_prob = 1 / len_total * count * prob
             for pair_idx in range(paircount):
                 t_ibd = tmp_pair_ibd[pair_idx] * tmp_prob
                 if t_ibd != 0:
                     self.family.expect_pair_ibd[pair_idx] += t_ibd
                     for pair_jdx in range(pair_idx, paircount):
                         self.family.prior[pair_idx][
                             pair_jdx] += t_ibd * tmp_pair_ibd[pair_jdx]
                         self.family.prior[pair_jdx][
                             pair_idx] = self.family.prior[pair_idx][
                                 pair_jdx]
     p.close()
     p.join()
Exemple #4
0
 def null_perfect_rvibd(self,
                        n_jobs,
                        perfect_max,
                        sall_flag=False,
                        infer_flag=2,
                        verbose=1):
     #null distribution of IBDs under perfect data
     if verbose == 1:
         screen_output.run_out(
             "calculating theoretical null distribution for rvibd...")
     rep = pow(2, 2 * len(self.family.nonfounder))
     #print rep
     #Get all possible parental genotype configurations
     foundergt_list = []
     foundergt_dic = {}
     for f in self.family.founder:
         foundergt_list += self.family.fam_dict[f]['gt'][2 *
                                                         self.family.mid:2 *
                                                         self.family.mid +
                                                         2]
     try:
         #if there are missing
         gt_id = self.family.conditional_prob['~combined'][1]
         for gt, prob in self.family.conditional_prob['~combined'][
                 0].iteritems():
             for f in set(self.family.missing_all) & set(
                     self.family.founder):
                 f_idx = self.family.founder.index(f)
                 idx = gt_id.index(f)
                 foundergt_list[2 * f_idx:2 * f_idx + 2] = [
                     ord(x) - 96 for x in gt[2 * idx:2 * idx + 2]
                 ]
             key_fgt = []
             for i in range(len(self.family.founder)):
                 key_fgt.extend(sorted(foundergt_list[2 * i:2 * i + 2]))
             key_fgt = tuple(key_fgt)
             if key_fgt not in foundergt_dic:
                 foundergt_dic[key_fgt] = prob
             else:
                 foundergt_dic[key_fgt] += prob
     except:
         foundergt_dic[tuple(foundergt_list)] = 1
     mean_pair, mean_all, var_pair, var_all = 0, 0, 0, 0
     all_ibd = manager.dict(self.all_ibd)
     if self.pinv_sall == []:
         pinv_sall = Array(
             c_double,
             [0 for x in xrange(pow(2, 2 * len(self.family.invnf)))])
     else:
         pinv_sall = (c_double * len(self.pinv_sall))(*self.pinv_sall)
     pinv_key_dict = manager.dict(self.pinv_key_dict)
     pinv_pair_dict = manager.dict(self.pinv_pair_dict)
     combined_dist = {}
     prob_and_mean = []
     if self.family.simple:
         local_perfect_max = int(perfect_max / len(foundergt_dic.keys()))
     else:
         local_perfect_max = min(
             int(perfect_max / len(foundergt_dic.keys())), 5)
     for fgt, prob in foundergt_dic.iteritems():
         #For each possible parental genotypes
         #Calculate corresponding IBD given inheritance vector
         #full_permutation=list(permutations(list(fgt),len(self.founder)*2))
         sample_flag = False
         full_permutation = [list(fgt)]
         if len(set(fgt)) == 1:
             full_permutation_inv_raw = [(0, list(fgt))]
         else:
             full_permutation_inv_raw = list(
                 product(xrange(rep), full_permutation))
         ###parallel processing###
         inqueue = multiprocessing.Queue()
         null_ibd = manager.list([])
         if len(full_permutation_inv_raw) > local_perfect_max:
             #sample_flag=True
             full_permutation_inv = random.sample(full_permutation_inv_raw,
                                                  local_perfect_max)
         else:
             full_permutation_inv = full_permutation_inv_raw
         len_total = len(full_permutation_inv)
         for i in full_permutation_inv:
             inqueue.put(i)
         #start n seperate processes
         procs = []
         for proc in range(n_jobs):
             p = myProcess(proc,self.family,self,inqueue,all_ibd,\
                 null_ibd,pinv_sall,pinv_key_dict,pinv_pair_dict,sall_flag,infer_flag)
             p.start()
             procs.append(p)
             inqueue.put(None)
         TIMEOUT = 3600
         start = time.time()
         while time.time() - start <= TIMEOUT or len(null_ibd) < 2:
             if any(p.is_alive() for p in procs):
                 time.sleep(.1)  # Just to avoid hogging the CPU
             else:
                 break
         else:
             try:
                 #print("timed out, killing all processes")
                 for p in procs:
                     p.terminate()
                     p.join()
             except:
                 pass
         while not inqueue.empty():
             inqueue.get()
         tmp_ibd_pair = [x[0] for x in null_ibd]
         tmp_mean_pair = float('%.9f' % (sum(tmp_ibd_pair) / len_total))
         tmp_std_pair = self.std(tmp_ibd_pair, tmp_mean_pair, sample_flag)
         mean_pair += prob * tmp_mean_pair
         var_pair += prob * tmp_std_pair**2
         tmp_mean_all, tmp_std_all = 0, 0
         if sall_flag:
             tmp_ibd_all = [x[1] for x in null_ibd]
             tmp_mean_all = float('%.9f' % (sum(tmp_ibd_all) / len_total))
             tmp_std_all = self.std(tmp_ibd_all, tmp_mean_all, sample_flag)
             mean_all += prob * tmp_mean_all
             var_all += prob * tmp_std_all**2
         prob_and_mean.append((prob, tmp_mean_pair, tmp_mean_all))
         dist_s = self.distribution(pall_flag=sall_flag, null_ibd=null_ibd)
         #if len(set(fgt))==1:
         #    print tmp_mean_pair, tmp_mean_all, tmp_std_pair, tmp_std_all, dist_s
         for v_idx, ibd_v in enumerate(dist_s[1]):
             if ibd_v in combined_dist:
                 combined_dist[ibd_v] += dist_s[0][v_idx] * prob
             else:
                 combined_dist[ibd_v] = dist_s[0][v_idx] * prob
     ibd_keys = combined_dist.keys()
     self.dist_s = ([combined_dist[k] for k in ibd_keys], ibd_keys)
     self.all_ibd = all_ibd
     for tmp_ele in prob_and_mean:
         var_pair += tmp_ele[0] * (tmp_ele[1] - mean_pair)**2
         if sall_flag:
             var_all += tmp_ele[0] * (tmp_ele[2] - mean_all)**2
     self.null_mean = mean_pair
     self.sall_null_mean = mean_all
     self.null_std = math.sqrt(var_pair)
     self.sall_null_std = math.sqrt(var_all)
Exemple #5
0
 def nullibd(self,
             rep,
             n_jobs,
             sall_flag=False,
             infer_flag=2,
             simple=False,
             verbose=1):
     #calculate expected mean and std for IBD under H0
     if verbose == 1:
         screen_output.run_out("calculating null distribution...")
     #the number of nonfounders that should be included in inheritance vector
     founderid = self.family.founder
     ###parallel processing###
     inqueue = multiprocessing.Queue()
     all_ibd = manager.dict(self.all_ibd)
     null_ibd = manager.list(self.null_ibd)
     if self.pinv_sall == []:
         pinv_sall = Array(
             c_double,
             [0 for x in xrange(pow(2, 2 * len(self.family.invnf)))])
     else:
         pinv_sall = (c_double * len(self.pinv_sall))(*self.pinv_sall)
     pinv_key_dict = manager.dict(self.pinv_key_dict)
     pinv_pair_dict = manager.dict(self.pinv_pair_dict)
     for i in xrange(rep):
         inqueue.put(i)
     #start n seperate processes
     procs = []
     for proc in range(n_jobs):
         p = myProcess(proc,self.family,self,inqueue,all_ibd,\
                 null_ibd,pinv_sall,pinv_key_dict,pinv_pair_dict,sall_flag,infer_flag)
         p.start()
         procs.append(p)
         inqueue.put(None)
     TIMEOUT = 3600  #10000 if sall_flag else 3600
     start = time.time()
     last_flag = 0
     while time.time() - start <= TIMEOUT:
         if any(p.is_alive() for p in procs):
             if last_flag > 10:
                 for p in procs:
                     p.terminate()
                     p.join()
                 break
             elif len(null_ibd) == rep - 1:
                 last_flag += 1
             else:
                 time.sleep(.1)  # Just to avoid hogging the CPU
         else:
             break
     else:
         try:
             #print("timed out, killing all processes")
             for p in procs:
                 p.terminate()
                 p.join()
         except:
             pass
     while not inqueue.empty():
         inqueue.get()
     self.null_ibd = null_ibd
     self.pinv_pair_dict = pinv_pair_dict
     self.all_ibd = all_ibd
     if sall_flag:
         self.pinv_sall = [x for x in pinv_sall]
         self.pinv_key_dict = pinv_key_dict
     if not simple:
         #expectation and standard deviation
         ibd_pair = [x[0] for x in self.null_ibd]
         self.null_mean = float('%.9f' % (sum(ibd_pair) / len(ibd_pair)))
         self.null_std = self.std(ibd_pair, self.null_mean)
         if sall_flag:
             ibd_all = [x[1] for x in self.null_ibd if x[1] != 0]
             self.sall_null_mean = float('%.9f' %
                                         (sum(ibd_all) / len(ibd_all)))
             self.sall_null_std = self.std(ibd_all, self.sall_null_mean)