Example #1
0
    def print_info_references(p):
        '''Print info on references'''
        title = 'References'
        line = ('{:<' + str(title_len) + '}').format(title + ':')
        stati = []
        for fragment in ('F' + str(i + 1) for i in xrange(6)):
            fn = p.get_reference_filename(fragment)
            if os.path.isfile(fn):
                status = 'OK'
                p.mod_dates[('reference', fragment)] = modification_date(fn)
            else:
                status = 'MISS'
            stati.append(status)
            line = line + fragment + ': ' + (
                '{:>' + str(cell_len - len(fragment) - 1) +
                '}').format(status) + '  '
        print line

        if frozenset(stati) != frozenset(['OK']):
            print ''
            raise PipelineError('Amplicon reference failed!')

        title = 'Genome ref'
        line = ('{:<' + str(title_len) + '}').format(title + ':')
        fn = p.get_reference_filename('genomewide', 'fasta')
        if os.path.isfile(fn):
            status = 'OK'
            p.mod_dates[('reference', 'genomewide')] = modification_date(fn)
        else:
            status = 'MISS'
        line = line + ('{:<' + str(cell_len) + '}').format(status)
        print line

        if status != 'OK':
            print ''
            raise PipelineError('Genomewide reference failed!')

        check_reference_overlap(p)

        title = 'Annotated'
        line = ('{:<' + str(title_len) + '}').format(title + ':')
        fn = p.get_reference_filename('genomewide', 'gb')
        if os.path.isfile(fn):
            md = modification_date(fn)
            if md >= p.mod_dates[('reference', 'genomewide')]:
                status = 'OK'
            else:
                status = 'OLD'
        else:
            status = 'MISS'
        line = line + ('{:<' + str(cell_len) + '}').format(status)
        print line
        if status != 'OK':
            print ''
            raise PipelineError('Annotated reference failed!')
    def print_info_references(p):
        '''Print info on references'''
        title = 'References'
        line = ('{:<'+str(title_len)+'}').format(title+':')
        stati = []
        for fragment in ('F'+str(i+1) for i in xrange(6)):
            fn = p.get_reference_filename(fragment)
            if os.path.isfile(fn):
                status = 'OK'
                p.mod_dates[('reference', fragment)] = modification_date(fn)
            else:
                status = 'MISS'
            stati.append(status)
            line = line + fragment + ': ' + ('{:>'+str(cell_len - len(fragment) - 1)+'}').format(status) + '  '
        print line
    
        if frozenset(stati) != frozenset(['OK']):
            print ''
            raise PipelineError('Amplicon reference failed!')
    
        title = 'Genome ref'
        line = ('{:<'+str(title_len)+'}').format(title+':')
        fn = p.get_reference_filename('genomewide', 'fasta')
        if os.path.isfile(fn):
            status = 'OK'
            p.mod_dates[('reference', 'genomewide')] = modification_date(fn)
        else:
            status = 'MISS'
        line = line + ('{:<'+str(cell_len)+'}').format(status)
        print line
    
        if status != 'OK':
            print ''
            raise PipelineError('Genomewide reference failed!')
    
        check_reference_overlap(p)

        title = 'Annotated'
        line = ('{:<'+str(title_len)+'}').format(title+':')
        fn = p.get_reference_filename('genomewide', 'gb')
        if os.path.isfile(fn):
            md = modification_date(fn)
            if md >= p.mod_dates[('reference', 'genomewide')]:
                status = 'OK'
            else:
                status = 'OLD'
        else:
            status = 'MISS'
        line = line + ('{:<'+str(cell_len)+'}').format(status)
        print line
        if status != 'OK':
            print ''
            raise PipelineError('Annotated reference failed!')
def print_info_patient(p, title, name, method, name_requisite=None,
                       VERBOSE=0):
    '''Pretty printer for whole-patient info, fragment by fragment'''
    import os, sys

    mod_dates = p.mod_dates

    line = ('{:<'+str(title_len)+'}').format(title+':')
    stati = []
    for fragment in ('F'+str(i+1) for i in xrange(6)):
        if isinstance(method, basestring):
            fun = getattr(p, method)
            fn = fun(fragment)
        else:
            fn = method(p.name, fragment)

        if os.path.isfile(fn):
            md = modification_date(fn)
            mod_dates[('refmap', fragment)] = md

            if name_requisite is None:
                status = 'OK'

            elif ((name_requisite, fragment) in mod_dates):
                if md > mod_dates[(name_requisite, fragment)]:
                    status = 'OK'
                else:
                    status = 'OLD'

        else:
            status = 'MISS'

        stati.append(status)
        line = line + fragment + ': ' + ('{:>'+str(cell_len - len(fragment) - 1)+'}').format(status) + '  '
    print line
Example #4
0
def print_info_patient(p, title, name, method, name_requisite=None, VERBOSE=0):
    '''Pretty printer for whole-patient info, fragment by fragment'''
    import os, sys

    mod_dates = p.mod_dates

    line = ('{:<' + str(title_len) + '}').format(title + ':')
    stati = []
    for fragment in ('F' + str(i + 1) for i in xrange(6)):
        if isinstance(method, basestring):
            fun = getattr(p, method)
            fn = fun(fragment)
        else:
            fn = method(p.name, fragment)

        if os.path.isfile(fn):
            md = modification_date(fn)
            mod_dates[('refmap', fragment)] = md

            if name_requisite is None:
                status = 'OK'

            elif ((name_requisite, fragment) in mod_dates):
                if md > mod_dates[(name_requisite, fragment)]:
                    status = 'OK'
                else:
                    status = 'OLD'

        else:
            status = 'MISS'

        stati.append(status)
        line = line + fragment + ': ' + ('{:>' +
                                         str(cell_len - len(fragment) - 1) +
                                         '}').format(status) + '  '
    print line
Example #5
0
def check_status(sample, step, detail=1):
    '''Check for a sample a certain step of the pipeline at a certain detail'''
    if detail == 1:
        if step == 'premapped':
            return [os.path.isfile(sample.get_premapped_filename())]
        elif step == 'divided':
            return [(fr, os.path.isfile(sample.get_divided_filename(fr)))
                    for fr in sample.regions_complete]
        elif step == 'consensus':
            return [(fr, os.path.isfile(sample.get_consensus_filename(fr)))
                    for fr in sample.regions_generic]
        elif step == 'mapped':
            return [
                (fr,
                 os.path.isfile(sample.get_mapped_filename(fr,
                                                           filtered=False)))
                for fr in sample.regions_generic
            ]
        elif step == 'filtered':
            return [
                (fr,
                 os.path.isfile(sample.get_mapped_filename(fr, filtered=True)))
                for fr in sample.regions_generic
            ]
        elif step == 'mapped_initial':
            return [(fr,
                     os.path.isfile(sample.get_mapped_to_initial_filename(fr)))
                    for fr in sample.regions_generic]
        elif step == 'mapped_filtered':
            # Check whether the mapped filtered is older than the mapped_initial
            from hivwholeseq.utils.generic import modification_date
            out = []
            for fr in sample.regions_generic:
                fn_mi = sample.get_mapped_to_initial_filename(fr)
                fn_mf = sample.get_mapped_filtered_filename(fr)
                if not os.path.isfile(fn_mf):
                    out.append((fr, False))
                    continue

                if not os.path.isfile(fn_mi):
                    out.append((fr, True))
                    continue

                md_mi = modification_date(fn_mi)
                md_mf = modification_date(fn_mf)
                if md_mf < md_mi:
                    out.append((fr, 'OLD'))
                else:
                    out.append((fr, True))
            return out

    elif detail == 2:
        if step in ('filtered', 'consensus'):
            return check_status(sample, step, detail=3)
        else:
            return check_status(sample, step, detail=1)

    elif detail == 3:
        if step == 'premapped':
            if os.path.isfile(sample.get_premapped_filename()):
                return [get_number_reads(sample.get_premapped_filename())]
            else:
                return [False]

        elif step == 'divided':
            stati = []
            for fr in sample.regions_complete:
                fn = sample.get_divided_filename(fr)
                if os.path.isfile(fn):
                    status = (fr, get_number_reads(fn))
                else:
                    status = (fr, False)
                stati.append(status)
            return stati

        elif step == 'consensus':
            stati = []
            for fr in sample.regions_generic:
                fn = sample.get_consensus_filename(fr)
                if os.path.isfile(fn):
                    status = (fr, len(SeqIO.read(fn, 'fasta')))
                else:
                    status = (fr, False)
                stati.append(status)
            return stati

        elif step == 'mapped':
            stati = []
            for fr in sample.regions_generic:
                fn = sample.get_mapped_filename(fr, filtered=False)
                if os.path.isfile(fn):
                    status = (fr, get_number_reads(fn))
                else:
                    status = (fr, False)
                stati.append(status)
            return stati

        elif step == 'filtered':
            stati = []
            for fr in sample.regions_generic:
                fn = sample.get_mapped_filename(fr, filtered=True)
                if os.path.isfile(fn):
                    status = (fr, get_number_reads(fn))
                else:
                    status = (fr, False)
                stati.append(status)
            return stati

        # TODO: add mapped_to_initial and downstream
        elif step in ('mapped_initial', 'mapped_filtered'):
            return check_status(sample, step, detail=1)
Example #6
0
def print_info_genomewide(p, title, name, method, VERBOSE=0, require_all=True):
    '''Pretty printer for patient pipeline info'''

    mod_dates = p.mod_dates

    def check_requisite_genomewide(md,
                                   name_requisite,
                                   samplename,
                                   mod_dates,
                                   require_all=True):
        '''Check requisites for genomewide observables'''
        stati = []
        fragments = ['F' + str(i + 1) for i in xrange(6)]
        for fragment in fragments:
            if (name_requisite, fragment, samplename) not in mod_dates:
                stati.append('MISS')
            elif md < mod_dates[(name_requisite, fragment, samplename)]:
                stati.append('OLD')
            else:
                stati.append('OK')

        if 'OLD' in stati:
            return 'OLD'
        else:
            if require_all:
                if 'MISS' in stati:
                    return 'MISS'
                else:
                    return 'OK'
            else:
                if 'OK' in stati:
                    return 'OK'
                else:
                    return 'MISS'

    def check_contamination_genomewide(sample):
        '''Check whether any of the fragment samples is contaminated'''
        fragments = ['F' + str(i + 1) for i in xrange(6)]
        for fragment in fragments:
            if 'contaminated' in sample[fragment]:
                return True
        return False

    import os, sys
    from hivwholeseq.patients.samples import SamplePat

    # NOTE: this function is used to check both entire patients and single samples
    if isinstance(p, SamplePat):
        sample_iter = [(p.name, p)]
    else:
        sample_iter = p.samples.iterrows()

    stati = set()
    line = ('{:<' + str(title_len) + '}').format(title + ':')
    print line
    for samplename, sample in sample_iter:
        sample = SamplePat(sample)
        title = sample.name
        line = ('{:<' + str(title_len) + '}').format(title + ':')

        if isinstance(method, basestring) and hasattr(sample, method):
            fun = getattr(sample, method)
            fn = fun('genomewide')
        else:
            fn = method(sample.patient, samplename, 'genomewide')
        if os.path.isfile(fn):
            md = modification_date(fn)
            mod_dates[(name, 'genomewide', samplename)] = md

            if name is None:
                status = 'OK'

            elif check_contamination_genomewide(sample):
                status = 'CONT'

            else:
                status = check_requisite_genomewide(md,
                                                    name,
                                                    samplename,
                                                    mod_dates,
                                                    require_all=require_all)

        else:
            status = 'MISS'

        # Check the number of reads if requested
        if (status == 'OK') and (fn[-3:] == 'bam') and (VERBOSE >= 3):
            status = str(get_number_reads(fn))

        stati.add(status)
        line = line + ('{:<' + str(cell_len) + '}').format(status)
        print line

    if 'OLD' in stati:
        raise ValueError('OLD status found')
Example #7
0
def print_info(p, title, name, method, name_requisite=None, VERBOSE=0):
    '''Pretty printer for patient pipeline info'''
    import os, sys
    from hivwholeseq.patients.samples import SamplePat
    from hivwholeseq.utils.mapping import get_number_reads

    mod_dates = p.mod_dates

    # NOTE: this function is used to check both entire patients and single samples
    if isinstance(p, SamplePat):
        sample_iter = [(p.name, p)]
    else:
        sample_iter = p.samples.iterrows()

    fragments = ['F' + str(i + 1) for i in xrange(6)]

    stati = set()
    line = ('{:<' + str(title_len) + '}').format(title + ':')
    print line
    for samplename, sample in sample_iter:
        sample = SamplePat(sample)
        title = sample.name
        line = ('{:<' + str(title_len) + '}').format(title + ':')

        for fragment in fragments:
            if isinstance(method, basestring) and hasattr(sample, method):
                fun = getattr(sample, method)
                fn = fun(fragment)
            else:
                fn = method(sample.patient, samplename, fragment)

            if os.path.isfile(fn):
                md = modification_date(fn)
                mod_dates[(name, fragment, samplename)] = md

                if name_requisite is None:
                    status = 'OK'

                elif ((name_requisite, fragment, samplename) in mod_dates):
                    if md > mod_dates[(name_requisite, fragment, samplename)]:
                        status = 'OK'
                    else:
                        status = 'OLD'
                        print fn, md, mod_dates[(name_requisite, fragment,
                                                 samplename)]

                elif ((name_requisite, fragment) in mod_dates):
                    if md > mod_dates[(name_requisite, fragment)]:
                        status = 'OK'
                    else:
                        status = 'OLD'

                        # NOTE: on Nov 13, 2014 I updated the mod dates of all
                        # references by mistake, without actually changing the
                        # sequences (ironically, probably testing a backup system
                        # for the refs themselves). So if the requisite is a ref
                        # seq and the date is this one, it's OK
                        if ((name_requisite == 'reference') and
                            mod_dates[(name_requisite, fragment)].date() == \
                            datetime.date(2014, 11, 13)):
                            status = 'OK'

                elif 'contaminated' in sample[fragment]:
                    status = 'CONT'

                else:
                    status = 'ERROR'

            else:
                status = 'MISS'

            # Check the number of reads if requested
            if (status == 'OK') and (fn[-3:] == 'bam') and (VERBOSE >= 3):
                status = str(get_number_reads(fn))

            stati.add(status)
            line = line+fragment+': '+\
                ('{:>'+str(cell_len - len(fragment) - 1)+'}').format(status)+'  '
        print line

    if 'OLD' in stati:
        raise ValueError('OLD status found')
def print_info_genomewide(p, title, name, method, VERBOSE=0, require_all=True):
    '''Pretty printer for patient pipeline info'''

    mod_dates = p.mod_dates

    def check_requisite_genomewide(md, name_requisite, samplename, mod_dates,
                                   require_all=True):
        '''Check requisites for genomewide observables'''
        stati = []
        fragments=['F'+str(i+1) for i in xrange(6)]
        for fragment in fragments:
            if (name_requisite, fragment, samplename) not in mod_dates:
                stati.append('MISS')
            elif md < mod_dates[(name_requisite, fragment, samplename)]:
                stati.append('OLD')
            else:
                stati.append('OK')

        if 'OLD' in stati:
            return 'OLD'
        else:
            if require_all:
                if 'MISS' in stati:
                    return 'MISS'
                else:
                    return 'OK'
            else:
                if 'OK' in stati:
                    return 'OK'
                else:
                    return 'MISS'

    def check_contamination_genomewide(sample):
        '''Check whether any of the fragment samples is contaminated'''
        fragments=['F'+str(i+1) for i in xrange(6)]
        for fragment in fragments:
            if 'contaminated' in sample[fragment]:
                return True
        return False

    import os, sys
    from hivwholeseq.patients.samples import SamplePat

    # NOTE: this function is used to check both entire patients and single samples
    if isinstance(p, SamplePat):
        sample_iter = [(p.name, p)]
    else:
        sample_iter = p.samples.iterrows()

    stati = set()    
    line = ('{:<'+str(title_len)+'}').format(title+':')
    print line
    for samplename, sample in sample_iter:
        sample = SamplePat(sample)
        title = sample.name
        line = ('{:<'+str(title_len)+'}').format(title+':')
        
        if isinstance(method, basestring) and hasattr(sample, method):
            fun = getattr(sample, method)
            fn = fun('genomewide')
        else:
            fn = method(sample.patient, samplename, 'genomewide')
        if os.path.isfile(fn):
            md = modification_date(fn)
            mod_dates[(name, 'genomewide', samplename)] = md

            if name is None:
                status = 'OK'

            elif check_contamination_genomewide(sample):
                status = 'CONT'

            else:
                status = check_requisite_genomewide(md, name, samplename, mod_dates,
                                                    require_all=require_all)

        else:
            status = 'MISS'

        # Check the number of reads if requested
        if (status == 'OK') and (fn[-3:] == 'bam') and (VERBOSE >= 3):
            status = str(get_number_reads(fn))

        stati.add(status)
        line = line + ('{:<'+str(cell_len)+'}').format(status)
        print line

    if 'OLD' in stati:
        raise ValueError('OLD status found') 
def print_info(p, title, name, method, name_requisite=None, VERBOSE=0):
    '''Pretty printer for patient pipeline info'''
    import os, sys
    from hivwholeseq.patients.samples import SamplePat
    from hivwholeseq.utils.mapping import get_number_reads

    mod_dates = p.mod_dates

    # NOTE: this function is used to check both entire patients and single samples
    if isinstance(p, SamplePat):
        sample_iter = [(p.name, p)]
    else:
        sample_iter = p.samples.iterrows()

    fragments=['F'+str(i+1) for i in xrange(6)]

    stati = set()
    line = ('{:<'+str(title_len)+'}').format(title+':')
    print line
    for samplename, sample in sample_iter:
        sample = SamplePat(sample)
        title = sample.name
        line = ('{:<'+str(title_len)+'}').format(title+':')
        
        for fragment in fragments:
            if isinstance(method, basestring) and hasattr(sample, method):
                fun = getattr(sample, method)
                fn = fun(fragment)
            else:
                fn = method(sample.patient, samplename, fragment)

            if os.path.isfile(fn):
                md = modification_date(fn)
                mod_dates[(name, fragment, samplename)] = md

                if name_requisite is None:
                    status = 'OK'

                elif ((name_requisite, fragment, samplename) in mod_dates):
                    if md > mod_dates[(name_requisite, fragment, samplename)]:
                        status = 'OK'
                    else:
                        status = 'OLD'
                        print fn, md, mod_dates[(name_requisite, fragment, samplename)]

                elif ((name_requisite, fragment) in mod_dates):
                    if md > mod_dates[(name_requisite, fragment)]:
                        status = 'OK'
                    else:
                        status = 'OLD'

                        # NOTE: on Nov 13, 2014 I updated the mod dates of all
                        # references by mistake, without actually changing the
                        # sequences (ironically, probably testing a backup system
                        # for the refs themselves). So if the requisite is a ref
                        # seq and the date is this one, it's OK
                        if ((name_requisite == 'reference') and
                            mod_dates[(name_requisite, fragment)].date() == \
                            datetime.date(2014, 11, 13)):
                            status = 'OK'


                elif 'contaminated' in sample[fragment]:
                    status = 'CONT'
                
                else:
                    status = 'ERROR'

            else:
                status = 'MISS'

            # Check the number of reads if requested
            if (status == 'OK') and (fn[-3:] == 'bam') and (VERBOSE >= 3):
                status = str(get_number_reads(fn))

            stati.add(status)
            line = line+fragment+': '+\
                ('{:>'+str(cell_len - len(fragment) - 1)+'}').format(status)+'  '
        print line


    if 'OLD' in stati:
        raise ValueError('OLD status found')