win_start = start while win_start + width - gap < min(L, end): win_end = min(win_start + width, end, L) if VERBOSE >= 1: print patient.code, win_start, win_end if VERBOSE >= 2: print 'Get region haplotypes' try: datum = patient.get_local_haplotype_count_trajectories(\ 'genomewide', start=win_start, end=win_end, filters=['noN', 'mincount='+str(countmin), 'freqmin='+str(freqmin), ], VERBOSE=VERBOSE, align=True, return_dict=True) except RoiError: win_start += gap continue if not len(datum['ind']): win_start += gap continue datum['times'] = patient.times[datum['ind']] datum['pcode'] = patient.code
for pname, patient in patients.iterrows(): patient = Patient(patient) patient.discard_nonsequenced_samples() if VERBOSE >= 1: print patient.name, roi if VERBOSE >= 2: print 'Get haplotype trajectories' try: (ht, indt, htseqs) = patient.get_region_count_trajectories(roi[0], VERBOSE=VERBOSE) except IOError: (ht, indt, htseqs) = patient.get_local_haplotype_count_trajectories(roi, VERBOSE=VERBOSE) if VERBOSE >= 2: print 'Align haplotypes and delete rare ones' indht = (ht > 5).any(axis=0) ht = ht[:, indht] htseqs = htseqs[indht] # Eliminate time points that are empty after this filter ind_keep = ht.any(axis=1) indt = indt[ind_keep] ht = ht[ind_keep] # Prepare data structures, both the matrix with sequences and the hft hft = (1.0 * ht.T / ht.sum(axis=1)).T alim = np.array(build_msa(htseqs))
win_start = start while win_start + width - gap < min(L, end): win_end = min(win_start + width, end, L) if VERBOSE >= 1: print patient.code, win_start, win_end if VERBOSE >= 2: print 'Get region haplotypes' try: datum = patient.get_local_haplotype_count_trajectories(\ 'genomewide', start=win_start, end=win_end, filters=['noN', 'mincount='+str(countmin), 'freqmin='+str(freqmin), ], VERBOSE=VERBOSE, align=True, return_dict=True) except RoiError: win_start += gap continue if not len(datum['ind']): win_start += gap continue datum['times'] = patient.times[datum['ind']] datum['pcode'] = patient.code
patient = Patient(patient) patient.discard_nonsequenced_samples() if VERBOSE >= 1: print patient.name, roi if VERBOSE >= 2: print 'Get haplotype trajectories' try: (ht, indt, htseqs) = patient.get_region_count_trajectories(roi[0], VERBOSE=VERBOSE) except IOError: (ht, indt, htseqs) = patient.get_local_haplotype_count_trajectories( roi, VERBOSE=VERBOSE) if VERBOSE >= 2: print 'Align haplotypes and delete rare ones' indht = (ht > 5).any(axis=0) ht = ht[:, indht] htseqs = htseqs[indht] # Eliminate time points that are empty after this filter ind_keep = ht.any(axis=1) indt = indt[ind_keep] ht = ht[ind_keep] # Prepare data structures, both the matrix with sequences and the hft hft = (1.0 * ht.T / ht.sum(axis=1)).T alim = np.array(build_msa(htseqs))