Exemple #1
0
 def sample_pat(self):
     '''Patient sample of this sequencing sample'''
     if self._sample_pat is None:
         from hivwholeseq.patients.patients import load_samples_sequenced as lssp
         from hivwholeseq.patients.patients import SamplePat
         self._sample_pat = SamplePat(
             lssp(include_wrong=True).loc[self['patient sample']])
     return self._sample_pat
    else:
        samples_focal = samples

    if VERBOSE >= 2:
        print "samples", samples_focal.index.tolist()

    if not fragments:
        fragments = ["F" + str(i) for i in xrange(1, 7)]
    if VERBOSE >= 3:
        print "fragments", fragments

    if submit:
        for fragment in fragments:
            for samplename, sample in samples_focal.iterrows():

                sample = SamplePat(sample)
                if PCR is None:
                    PCRs_sample = (1, 2)
                else:
                    PCRs_sample = [PCR]
                for PCR_sample in PCRs_sample:
                    bamfilename = sample.get_mapped_filtered_filename(fragment, PCR=PCR_sample, decontaminated=False)
                    if not os.path.isfile(bamfilename):
                        continue

                    # if check_already_decontaminated(sample, fragment, PCR_sample):
                    #    continue

                    fork_self(samplename, fragment, VERBOSE=VERBOSE, maxreads=maxreads, summary=summary, PCR=PCR_sample)

        sys.exit()
    counts_all = []
    for fragment in fragments:
        counts = []
        for samplename, sample in samples.iterrows():
            if VERBOSE >= 1:
                print samplename, fragment,
                if VERBOSE >= 2:
                    print ''

            if submit:
                fork_self(samplename, fragment, VERBOSE=VERBOSE, PCR=PCR,
                          block_len=block_len, n_reads_per_ali=n_reads_per_ali)
                continue

            sample = SamplePat(sample)
            pname = sample.patient
            refseq = SeqIO.read(get_initial_reference_filename(pname, fragment), 'fasta')
            refm = np.array(refseq)
            len_reference = len(refseq)

            # NOTE: we need consensi to decontaminate, so
            bamfilename = sample.get_mapped_filtered_filename(fragment,
                                            PCR=PCR,
                                            decontaminated=(not use_raw_reads))
            if not os.path.isfile(bamfilename):
                continue
            
            if VERBOSE >= 1:
                print 'PCR', PCR,
                if VERBOSE >= 2:
Exemple #4
0
def print_info_genomewide(p, title, name, method, VERBOSE=0, require_all=True):
    '''Pretty printer for patient pipeline info'''

    mod_dates = p.mod_dates

    def check_requisite_genomewide(md,
                                   name_requisite,
                                   samplename,
                                   mod_dates,
                                   require_all=True):
        '''Check requisites for genomewide observables'''
        stati = []
        fragments = ['F' + str(i + 1) for i in xrange(6)]
        for fragment in fragments:
            if (name_requisite, fragment, samplename) not in mod_dates:
                stati.append('MISS')
            elif md < mod_dates[(name_requisite, fragment, samplename)]:
                stati.append('OLD')
            else:
                stati.append('OK')

        if 'OLD' in stati:
            return 'OLD'
        else:
            if require_all:
                if 'MISS' in stati:
                    return 'MISS'
                else:
                    return 'OK'
            else:
                if 'OK' in stati:
                    return 'OK'
                else:
                    return 'MISS'

    def check_contamination_genomewide(sample):
        '''Check whether any of the fragment samples is contaminated'''
        fragments = ['F' + str(i + 1) for i in xrange(6)]
        for fragment in fragments:
            if 'contaminated' in sample[fragment]:
                return True
        return False

    import os, sys
    from hivwholeseq.patients.samples import SamplePat

    # NOTE: this function is used to check both entire patients and single samples
    if isinstance(p, SamplePat):
        sample_iter = [(p.name, p)]
    else:
        sample_iter = p.samples.iterrows()

    stati = set()
    line = ('{:<' + str(title_len) + '}').format(title + ':')
    print line
    for samplename, sample in sample_iter:
        sample = SamplePat(sample)
        title = sample.name
        line = ('{:<' + str(title_len) + '}').format(title + ':')

        if isinstance(method, basestring) and hasattr(sample, method):
            fun = getattr(sample, method)
            fn = fun('genomewide')
        else:
            fn = method(sample.patient, samplename, 'genomewide')
        if os.path.isfile(fn):
            md = modification_date(fn)
            mod_dates[(name, 'genomewide', samplename)] = md

            if name is None:
                status = 'OK'

            elif check_contamination_genomewide(sample):
                status = 'CONT'

            else:
                status = check_requisite_genomewide(md,
                                                    name,
                                                    samplename,
                                                    mod_dates,
                                                    require_all=require_all)

        else:
            status = 'MISS'

        # Check the number of reads if requested
        if (status == 'OK') and (fn[-3:] == 'bam') and (VERBOSE >= 3):
            status = str(get_number_reads(fn))

        stati.add(status)
        line = line + ('{:<' + str(cell_len) + '}').format(status)
        print line

    if 'OLD' in stati:
        raise ValueError('OLD status found')
    samplenames = args.samples
    VERBOSE = args.verbose
    use_save = args.save

    samples_pat = lssp()
    if pnames is not None:
        samples_pat = samples_pat.loc[samples_pat.patient.isin(pnames)]
    elif samplenames is not None:
        samples_pat = samples_pat.loc[samples_pat.index.isin(samplenames)]

    if VERBOSE >= 2:
        print 'samples', samples_pat.index.tolist()

    consensi = []
    for samplename_pat, sample_pat in samples_pat.iterrows():
        sample_pat = SamplePat(sample_pat)
        if VERBOSE >= 1:
            print sample_pat.name,

        consensi_pat = {}
        try:
            for i in xrange(6):
                fragment = 'F'+str(i+1)
                consensi_pat[fragment] = sample_pat.get_consensus(fragment)

        except IOError:
            if VERBOSE >= 1:
                print 'warning: some consensus not found: skipping'
            continue

        except ValueError:
Exemple #6
0
def print_info(p, title, name, method, name_requisite=None, VERBOSE=0):
    '''Pretty printer for patient pipeline info'''
    import os, sys
    from hivwholeseq.patients.samples import SamplePat
    from hivwholeseq.utils.mapping import get_number_reads

    mod_dates = p.mod_dates

    # NOTE: this function is used to check both entire patients and single samples
    if isinstance(p, SamplePat):
        sample_iter = [(p.name, p)]
    else:
        sample_iter = p.samples.iterrows()

    fragments = ['F' + str(i + 1) for i in xrange(6)]

    stati = set()
    line = ('{:<' + str(title_len) + '}').format(title + ':')
    print line
    for samplename, sample in sample_iter:
        sample = SamplePat(sample)
        title = sample.name
        line = ('{:<' + str(title_len) + '}').format(title + ':')

        for fragment in fragments:
            if isinstance(method, basestring) and hasattr(sample, method):
                fun = getattr(sample, method)
                fn = fun(fragment)
            else:
                fn = method(sample.patient, samplename, fragment)

            if os.path.isfile(fn):
                md = modification_date(fn)
                mod_dates[(name, fragment, samplename)] = md

                if name_requisite is None:
                    status = 'OK'

                elif ((name_requisite, fragment, samplename) in mod_dates):
                    if md > mod_dates[(name_requisite, fragment, samplename)]:
                        status = 'OK'
                    else:
                        status = 'OLD'
                        print fn, md, mod_dates[(name_requisite, fragment,
                                                 samplename)]

                elif ((name_requisite, fragment) in mod_dates):
                    if md > mod_dates[(name_requisite, fragment)]:
                        status = 'OK'
                    else:
                        status = 'OLD'

                        # NOTE: on Nov 13, 2014 I updated the mod dates of all
                        # references by mistake, without actually changing the
                        # sequences (ironically, probably testing a backup system
                        # for the refs themselves). So if the requisite is a ref
                        # seq and the date is this one, it's OK
                        if ((name_requisite == 'reference') and
                            mod_dates[(name_requisite, fragment)].date() == \
                            datetime.date(2014, 11, 13)):
                            status = 'OK'

                elif 'contaminated' in sample[fragment]:
                    status = 'CONT'

                else:
                    status = 'ERROR'

            else:
                status = 'MISS'

            # Check the number of reads if requested
            if (status == 'OK') and (fn[-3:] == 'bam') and (VERBOSE >= 3):
                status = str(get_number_reads(fn))

            stati.add(status)
            line = line+fragment+': '+\
                ('{:>'+str(cell_len - len(fragment) - 1)+'}').format(status)+'  '
        print line

    if 'OLD' in stati:
        raise ValueError('OLD status found')
    else:
        samples_focal = samples

    if VERBOSE >= 2:
        print 'samples', samples_focal.index.tolist()

    if not fragments:
        fragments = ['F' + str(i) for i in xrange(1, 7)]
    if VERBOSE >= 3:
        print 'fragments', fragments

    if submit:
        for fragment in fragments:
            for samplename, sample in samples_focal.iterrows():

                sample = SamplePat(sample)
                if PCR is None:
                    PCRs_sample = (1, 2)
                else:
                    PCRs_sample = [PCR]
                for PCR_sample in PCRs_sample:
                    bamfilename = sample.get_mapped_filtered_filename(
                        fragment, PCR=PCR_sample, decontaminated=False)
                    if not os.path.isfile(bamfilename):
                        continue

                    #if check_already_decontaminated(sample, fragment, PCR_sample):
                    #    continue

                    fork_self(samplename,
                              fragment,
Exemple #8
0
    parser.add_argument('--samples', nargs='+', help='Samples to analyze')
    parser.add_argument('--verbose',
                        type=int,
                        default=0,
                        help='Verbosity level [0-3]')

    args = parser.parse_args()
    VERBOSE = args.verbose
    samplenames = args.samples

    samples = load_samples_sequenced()
    if samplenames is not None:
        samples = samples.loc[samplenames]

    for samplename, sample in samples.iterrows():
        sample = SamplePat(sample)

        mod_dates = {}

        pretty_print_info(
            sample,
            'Map + filter',
            'filter',
            'get_mapped_filtered_filename',
            None,  #'reference',
            mod_dates,
            VERBOSE=VERBOSE)

        pretty_print_info(sample,
                          'Decontaminate',
                          'decontaminate',
Exemple #9
0
        for samplename, sample in samples.iterrows():
            if VERBOSE >= 1:
                print samplename, fragment,
                if VERBOSE >= 2:
                    print ''

            if submit:
                fork_self(samplename,
                          fragment,
                          VERBOSE=VERBOSE,
                          PCR=PCR,
                          block_len=block_len,
                          n_reads_per_ali=n_reads_per_ali)
                continue

            sample = SamplePat(sample)
            pname = sample.patient
            refseq = SeqIO.read(
                get_initial_reference_filename(pname, fragment), 'fasta')
            refm = np.array(refseq)
            len_reference = len(refseq)

            # NOTE: we need consensi to decontaminate, so
            bamfilename = sample.get_mapped_filtered_filename(
                fragment, PCR=PCR, decontaminated=(not use_raw_reads))
            if not os.path.isfile(bamfilename):
                continue

            if VERBOSE >= 1:
                print 'PCR', PCR,
                if VERBOSE >= 2:
Exemple #10
0
    samplenames = args.samples
    VERBOSE = args.verbose
    use_save = args.save

    samples_pat = lssp()
    if pnames is not None:
        samples_pat = samples_pat.loc[samples_pat.patient.isin(pnames)]
    elif samplenames is not None:
        samples_pat = samples_pat.loc[samples_pat.index.isin(samplenames)]

    if VERBOSE >= 2:
        print 'samples', samples_pat.index.tolist()

    consensi = []
    for samplename_pat, sample_pat in samples_pat.iterrows():
        sample_pat = SamplePat(sample_pat)
        if VERBOSE >= 1:
            print sample_pat.name,

        consensi_pat = {}
        try:
            for i in xrange(6):
                fragment = 'F' + str(i + 1)
                consensi_pat[fragment] = sample_pat.get_consensus(fragment)

        except IOError:
            if VERBOSE >= 1:
                print 'warning: some consensus not found: skipping'
            continue

        except ValueError: