win_start = start
        while win_start + width - gap < min(L, end):
            win_end = min(win_start + width, end, L)

            if VERBOSE >= 1:
                print patient.code, win_start, win_end
    
            if VERBOSE >= 2:
                print 'Get region haplotypes'
            try:
                datum = patient.get_local_haplotype_count_trajectories(\
                               'genomewide',
                               start=win_start,
                               end=win_end,
                               filters=['noN',
                                        'mincount='+str(countmin),
                                        'freqmin='+str(freqmin),
                                       ],
                               VERBOSE=VERBOSE,
                               align=True,
                               return_dict=True)
            except RoiError:
                win_start += gap
                continue

            if not len(datum['ind']):
                win_start += gap
                continue

            datum['times'] = patient.times[datum['ind']]
            datum['pcode'] = patient.code
Esempio n. 2
0
    for pname, patient in patients.iterrows():

        patient = Patient(patient)
        patient.discard_nonsequenced_samples()

        if VERBOSE >= 1:
            print patient.name, roi
    
        if VERBOSE >= 2:
            print 'Get haplotype trajectories'
        try:
            (ht, indt, htseqs) = patient.get_region_count_trajectories(roi[0],
                                                                    VERBOSE=VERBOSE)
        except IOError:
            (ht, indt, htseqs) = patient.get_local_haplotype_count_trajectories(roi,
                                                                    VERBOSE=VERBOSE)
    
        if VERBOSE >= 2:
            print 'Align haplotypes and delete rare ones'
        indht = (ht > 5).any(axis=0)
        ht = ht[:, indht]
        htseqs = htseqs[indht]
        
        # Eliminate time points that are empty after this filter
        ind_keep = ht.any(axis=1)
        indt = indt[ind_keep]
        ht = ht[ind_keep]

        # Prepare data structures, both the matrix with sequences and the hft
        hft = (1.0 * ht.T / ht.sum(axis=1)).T
        alim = np.array(build_msa(htseqs))
        win_start = start
        while win_start + width - gap < min(L, end):
            win_end = min(win_start + width, end, L)

            if VERBOSE >= 1:
                print patient.code, win_start, win_end

            if VERBOSE >= 2:
                print 'Get region haplotypes'
            try:
                datum = patient.get_local_haplotype_count_trajectories(\
                               'genomewide',
                               start=win_start,
                               end=win_end,
                               filters=['noN',
                                        'mincount='+str(countmin),
                                        'freqmin='+str(freqmin),
                                       ],
                               VERBOSE=VERBOSE,
                               align=True,
                               return_dict=True)
            except RoiError:
                win_start += gap
                continue

            if not len(datum['ind']):
                win_start += gap
                continue

            datum['times'] = patient.times[datum['ind']]
            datum['pcode'] = patient.code
Esempio n. 4
0
        patient = Patient(patient)
        patient.discard_nonsequenced_samples()

        if VERBOSE >= 1:
            print patient.name, roi

        if VERBOSE >= 2:
            print 'Get haplotype trajectories'
        try:
            (ht, indt,
             htseqs) = patient.get_region_count_trajectories(roi[0],
                                                             VERBOSE=VERBOSE)
        except IOError:
            (ht, indt,
             htseqs) = patient.get_local_haplotype_count_trajectories(
                 roi, VERBOSE=VERBOSE)

        if VERBOSE >= 2:
            print 'Align haplotypes and delete rare ones'
        indht = (ht > 5).any(axis=0)
        ht = ht[:, indht]
        htseqs = htseqs[indht]

        # Eliminate time points that are empty after this filter
        ind_keep = ht.any(axis=1)
        indt = indt[ind_keep]
        ht = ht[ind_keep]

        # Prepare data structures, both the matrix with sequences and the hft
        hft = (1.0 * ht.T / ht.sum(axis=1)).T
        alim = np.array(build_msa(htseqs))