def print_info_references(p): '''Print info on references''' title = 'References' line = ('{:<' + str(title_len) + '}').format(title + ':') stati = [] for fragment in ('F' + str(i + 1) for i in xrange(6)): fn = p.get_reference_filename(fragment) if os.path.isfile(fn): status = 'OK' p.mod_dates[('reference', fragment)] = modification_date(fn) else: status = 'MISS' stati.append(status) line = line + fragment + ': ' + ( '{:>' + str(cell_len - len(fragment) - 1) + '}').format(status) + ' ' print line if frozenset(stati) != frozenset(['OK']): print '' raise PipelineError('Amplicon reference failed!') title = 'Genome ref' line = ('{:<' + str(title_len) + '}').format(title + ':') fn = p.get_reference_filename('genomewide', 'fasta') if os.path.isfile(fn): status = 'OK' p.mod_dates[('reference', 'genomewide')] = modification_date(fn) else: status = 'MISS' line = line + ('{:<' + str(cell_len) + '}').format(status) print line if status != 'OK': print '' raise PipelineError('Genomewide reference failed!') check_reference_overlap(p) title = 'Annotated' line = ('{:<' + str(title_len) + '}').format(title + ':') fn = p.get_reference_filename('genomewide', 'gb') if os.path.isfile(fn): md = modification_date(fn) if md >= p.mod_dates[('reference', 'genomewide')]: status = 'OK' else: status = 'OLD' else: status = 'MISS' line = line + ('{:<' + str(cell_len) + '}').format(status) print line if status != 'OK': print '' raise PipelineError('Annotated reference failed!')
def print_info_references(p): '''Print info on references''' title = 'References' line = ('{:<'+str(title_len)+'}').format(title+':') stati = [] for fragment in ('F'+str(i+1) for i in xrange(6)): fn = p.get_reference_filename(fragment) if os.path.isfile(fn): status = 'OK' p.mod_dates[('reference', fragment)] = modification_date(fn) else: status = 'MISS' stati.append(status) line = line + fragment + ': ' + ('{:>'+str(cell_len - len(fragment) - 1)+'}').format(status) + ' ' print line if frozenset(stati) != frozenset(['OK']): print '' raise PipelineError('Amplicon reference failed!') title = 'Genome ref' line = ('{:<'+str(title_len)+'}').format(title+':') fn = p.get_reference_filename('genomewide', 'fasta') if os.path.isfile(fn): status = 'OK' p.mod_dates[('reference', 'genomewide')] = modification_date(fn) else: status = 'MISS' line = line + ('{:<'+str(cell_len)+'}').format(status) print line if status != 'OK': print '' raise PipelineError('Genomewide reference failed!') check_reference_overlap(p) title = 'Annotated' line = ('{:<'+str(title_len)+'}').format(title+':') fn = p.get_reference_filename('genomewide', 'gb') if os.path.isfile(fn): md = modification_date(fn) if md >= p.mod_dates[('reference', 'genomewide')]: status = 'OK' else: status = 'OLD' else: status = 'MISS' line = line + ('{:<'+str(cell_len)+'}').format(status) print line if status != 'OK': print '' raise PipelineError('Annotated reference failed!')
def print_info_patient(p, title, name, method, name_requisite=None, VERBOSE=0): '''Pretty printer for whole-patient info, fragment by fragment''' import os, sys mod_dates = p.mod_dates line = ('{:<'+str(title_len)+'}').format(title+':') stati = [] for fragment in ('F'+str(i+1) for i in xrange(6)): if isinstance(method, basestring): fun = getattr(p, method) fn = fun(fragment) else: fn = method(p.name, fragment) if os.path.isfile(fn): md = modification_date(fn) mod_dates[('refmap', fragment)] = md if name_requisite is None: status = 'OK' elif ((name_requisite, fragment) in mod_dates): if md > mod_dates[(name_requisite, fragment)]: status = 'OK' else: status = 'OLD' else: status = 'MISS' stati.append(status) line = line + fragment + ': ' + ('{:>'+str(cell_len - len(fragment) - 1)+'}').format(status) + ' ' print line
def print_info_patient(p, title, name, method, name_requisite=None, VERBOSE=0): '''Pretty printer for whole-patient info, fragment by fragment''' import os, sys mod_dates = p.mod_dates line = ('{:<' + str(title_len) + '}').format(title + ':') stati = [] for fragment in ('F' + str(i + 1) for i in xrange(6)): if isinstance(method, basestring): fun = getattr(p, method) fn = fun(fragment) else: fn = method(p.name, fragment) if os.path.isfile(fn): md = modification_date(fn) mod_dates[('refmap', fragment)] = md if name_requisite is None: status = 'OK' elif ((name_requisite, fragment) in mod_dates): if md > mod_dates[(name_requisite, fragment)]: status = 'OK' else: status = 'OLD' else: status = 'MISS' stati.append(status) line = line + fragment + ': ' + ('{:>' + str(cell_len - len(fragment) - 1) + '}').format(status) + ' ' print line
def check_status(sample, step, detail=1): '''Check for a sample a certain step of the pipeline at a certain detail''' if detail == 1: if step == 'premapped': return [os.path.isfile(sample.get_premapped_filename())] elif step == 'divided': return [(fr, os.path.isfile(sample.get_divided_filename(fr))) for fr in sample.regions_complete] elif step == 'consensus': return [(fr, os.path.isfile(sample.get_consensus_filename(fr))) for fr in sample.regions_generic] elif step == 'mapped': return [ (fr, os.path.isfile(sample.get_mapped_filename(fr, filtered=False))) for fr in sample.regions_generic ] elif step == 'filtered': return [ (fr, os.path.isfile(sample.get_mapped_filename(fr, filtered=True))) for fr in sample.regions_generic ] elif step == 'mapped_initial': return [(fr, os.path.isfile(sample.get_mapped_to_initial_filename(fr))) for fr in sample.regions_generic] elif step == 'mapped_filtered': # Check whether the mapped filtered is older than the mapped_initial from hivwholeseq.utils.generic import modification_date out = [] for fr in sample.regions_generic: fn_mi = sample.get_mapped_to_initial_filename(fr) fn_mf = sample.get_mapped_filtered_filename(fr) if not os.path.isfile(fn_mf): out.append((fr, False)) continue if not os.path.isfile(fn_mi): out.append((fr, True)) continue md_mi = modification_date(fn_mi) md_mf = modification_date(fn_mf) if md_mf < md_mi: out.append((fr, 'OLD')) else: out.append((fr, True)) return out elif detail == 2: if step in ('filtered', 'consensus'): return check_status(sample, step, detail=3) else: return check_status(sample, step, detail=1) elif detail == 3: if step == 'premapped': if os.path.isfile(sample.get_premapped_filename()): return [get_number_reads(sample.get_premapped_filename())] else: return [False] elif step == 'divided': stati = [] for fr in sample.regions_complete: fn = sample.get_divided_filename(fr) if os.path.isfile(fn): status = (fr, get_number_reads(fn)) else: status = (fr, False) stati.append(status) return stati elif step == 'consensus': stati = [] for fr in sample.regions_generic: fn = sample.get_consensus_filename(fr) if os.path.isfile(fn): status = (fr, len(SeqIO.read(fn, 'fasta'))) else: status = (fr, False) stati.append(status) return stati elif step == 'mapped': stati = [] for fr in sample.regions_generic: fn = sample.get_mapped_filename(fr, filtered=False) if os.path.isfile(fn): status = (fr, get_number_reads(fn)) else: status = (fr, False) stati.append(status) return stati elif step == 'filtered': stati = [] for fr in sample.regions_generic: fn = sample.get_mapped_filename(fr, filtered=True) if os.path.isfile(fn): status = (fr, get_number_reads(fn)) else: status = (fr, False) stati.append(status) return stati # TODO: add mapped_to_initial and downstream elif step in ('mapped_initial', 'mapped_filtered'): return check_status(sample, step, detail=1)
def print_info_genomewide(p, title, name, method, VERBOSE=0, require_all=True): '''Pretty printer for patient pipeline info''' mod_dates = p.mod_dates def check_requisite_genomewide(md, name_requisite, samplename, mod_dates, require_all=True): '''Check requisites for genomewide observables''' stati = [] fragments = ['F' + str(i + 1) for i in xrange(6)] for fragment in fragments: if (name_requisite, fragment, samplename) not in mod_dates: stati.append('MISS') elif md < mod_dates[(name_requisite, fragment, samplename)]: stati.append('OLD') else: stati.append('OK') if 'OLD' in stati: return 'OLD' else: if require_all: if 'MISS' in stati: return 'MISS' else: return 'OK' else: if 'OK' in stati: return 'OK' else: return 'MISS' def check_contamination_genomewide(sample): '''Check whether any of the fragment samples is contaminated''' fragments = ['F' + str(i + 1) for i in xrange(6)] for fragment in fragments: if 'contaminated' in sample[fragment]: return True return False import os, sys from hivwholeseq.patients.samples import SamplePat # NOTE: this function is used to check both entire patients and single samples if isinstance(p, SamplePat): sample_iter = [(p.name, p)] else: sample_iter = p.samples.iterrows() stati = set() line = ('{:<' + str(title_len) + '}').format(title + ':') print line for samplename, sample in sample_iter: sample = SamplePat(sample) title = sample.name line = ('{:<' + str(title_len) + '}').format(title + ':') if isinstance(method, basestring) and hasattr(sample, method): fun = getattr(sample, method) fn = fun('genomewide') else: fn = method(sample.patient, samplename, 'genomewide') if os.path.isfile(fn): md = modification_date(fn) mod_dates[(name, 'genomewide', samplename)] = md if name is None: status = 'OK' elif check_contamination_genomewide(sample): status = 'CONT' else: status = check_requisite_genomewide(md, name, samplename, mod_dates, require_all=require_all) else: status = 'MISS' # Check the number of reads if requested if (status == 'OK') and (fn[-3:] == 'bam') and (VERBOSE >= 3): status = str(get_number_reads(fn)) stati.add(status) line = line + ('{:<' + str(cell_len) + '}').format(status) print line if 'OLD' in stati: raise ValueError('OLD status found')
def print_info(p, title, name, method, name_requisite=None, VERBOSE=0): '''Pretty printer for patient pipeline info''' import os, sys from hivwholeseq.patients.samples import SamplePat from hivwholeseq.utils.mapping import get_number_reads mod_dates = p.mod_dates # NOTE: this function is used to check both entire patients and single samples if isinstance(p, SamplePat): sample_iter = [(p.name, p)] else: sample_iter = p.samples.iterrows() fragments = ['F' + str(i + 1) for i in xrange(6)] stati = set() line = ('{:<' + str(title_len) + '}').format(title + ':') print line for samplename, sample in sample_iter: sample = SamplePat(sample) title = sample.name line = ('{:<' + str(title_len) + '}').format(title + ':') for fragment in fragments: if isinstance(method, basestring) and hasattr(sample, method): fun = getattr(sample, method) fn = fun(fragment) else: fn = method(sample.patient, samplename, fragment) if os.path.isfile(fn): md = modification_date(fn) mod_dates[(name, fragment, samplename)] = md if name_requisite is None: status = 'OK' elif ((name_requisite, fragment, samplename) in mod_dates): if md > mod_dates[(name_requisite, fragment, samplename)]: status = 'OK' else: status = 'OLD' print fn, md, mod_dates[(name_requisite, fragment, samplename)] elif ((name_requisite, fragment) in mod_dates): if md > mod_dates[(name_requisite, fragment)]: status = 'OK' else: status = 'OLD' # NOTE: on Nov 13, 2014 I updated the mod dates of all # references by mistake, without actually changing the # sequences (ironically, probably testing a backup system # for the refs themselves). So if the requisite is a ref # seq and the date is this one, it's OK if ((name_requisite == 'reference') and mod_dates[(name_requisite, fragment)].date() == \ datetime.date(2014, 11, 13)): status = 'OK' elif 'contaminated' in sample[fragment]: status = 'CONT' else: status = 'ERROR' else: status = 'MISS' # Check the number of reads if requested if (status == 'OK') and (fn[-3:] == 'bam') and (VERBOSE >= 3): status = str(get_number_reads(fn)) stati.add(status) line = line+fragment+': '+\ ('{:>'+str(cell_len - len(fragment) - 1)+'}').format(status)+' ' print line if 'OLD' in stati: raise ValueError('OLD status found')
def print_info_genomewide(p, title, name, method, VERBOSE=0, require_all=True): '''Pretty printer for patient pipeline info''' mod_dates = p.mod_dates def check_requisite_genomewide(md, name_requisite, samplename, mod_dates, require_all=True): '''Check requisites for genomewide observables''' stati = [] fragments=['F'+str(i+1) for i in xrange(6)] for fragment in fragments: if (name_requisite, fragment, samplename) not in mod_dates: stati.append('MISS') elif md < mod_dates[(name_requisite, fragment, samplename)]: stati.append('OLD') else: stati.append('OK') if 'OLD' in stati: return 'OLD' else: if require_all: if 'MISS' in stati: return 'MISS' else: return 'OK' else: if 'OK' in stati: return 'OK' else: return 'MISS' def check_contamination_genomewide(sample): '''Check whether any of the fragment samples is contaminated''' fragments=['F'+str(i+1) for i in xrange(6)] for fragment in fragments: if 'contaminated' in sample[fragment]: return True return False import os, sys from hivwholeseq.patients.samples import SamplePat # NOTE: this function is used to check both entire patients and single samples if isinstance(p, SamplePat): sample_iter = [(p.name, p)] else: sample_iter = p.samples.iterrows() stati = set() line = ('{:<'+str(title_len)+'}').format(title+':') print line for samplename, sample in sample_iter: sample = SamplePat(sample) title = sample.name line = ('{:<'+str(title_len)+'}').format(title+':') if isinstance(method, basestring) and hasattr(sample, method): fun = getattr(sample, method) fn = fun('genomewide') else: fn = method(sample.patient, samplename, 'genomewide') if os.path.isfile(fn): md = modification_date(fn) mod_dates[(name, 'genomewide', samplename)] = md if name is None: status = 'OK' elif check_contamination_genomewide(sample): status = 'CONT' else: status = check_requisite_genomewide(md, name, samplename, mod_dates, require_all=require_all) else: status = 'MISS' # Check the number of reads if requested if (status == 'OK') and (fn[-3:] == 'bam') and (VERBOSE >= 3): status = str(get_number_reads(fn)) stati.add(status) line = line + ('{:<'+str(cell_len)+'}').format(status) print line if 'OLD' in stati: raise ValueError('OLD status found')
def print_info(p, title, name, method, name_requisite=None, VERBOSE=0): '''Pretty printer for patient pipeline info''' import os, sys from hivwholeseq.patients.samples import SamplePat from hivwholeseq.utils.mapping import get_number_reads mod_dates = p.mod_dates # NOTE: this function is used to check both entire patients and single samples if isinstance(p, SamplePat): sample_iter = [(p.name, p)] else: sample_iter = p.samples.iterrows() fragments=['F'+str(i+1) for i in xrange(6)] stati = set() line = ('{:<'+str(title_len)+'}').format(title+':') print line for samplename, sample in sample_iter: sample = SamplePat(sample) title = sample.name line = ('{:<'+str(title_len)+'}').format(title+':') for fragment in fragments: if isinstance(method, basestring) and hasattr(sample, method): fun = getattr(sample, method) fn = fun(fragment) else: fn = method(sample.patient, samplename, fragment) if os.path.isfile(fn): md = modification_date(fn) mod_dates[(name, fragment, samplename)] = md if name_requisite is None: status = 'OK' elif ((name_requisite, fragment, samplename) in mod_dates): if md > mod_dates[(name_requisite, fragment, samplename)]: status = 'OK' else: status = 'OLD' print fn, md, mod_dates[(name_requisite, fragment, samplename)] elif ((name_requisite, fragment) in mod_dates): if md > mod_dates[(name_requisite, fragment)]: status = 'OK' else: status = 'OLD' # NOTE: on Nov 13, 2014 I updated the mod dates of all # references by mistake, without actually changing the # sequences (ironically, probably testing a backup system # for the refs themselves). So if the requisite is a ref # seq and the date is this one, it's OK if ((name_requisite == 'reference') and mod_dates[(name_requisite, fragment)].date() == \ datetime.date(2014, 11, 13)): status = 'OK' elif 'contaminated' in sample[fragment]: status = 'CONT' else: status = 'ERROR' else: status = 'MISS' # Check the number of reads if requested if (status == 'OK') and (fn[-3:] == 'bam') and (VERBOSE >= 3): status = str(get_number_reads(fn)) stati.add(status) line = line+fragment+': '+\ ('{:>'+str(cell_len - len(fragment) - 1)+'}').format(status)+' ' print line if 'OLD' in stati: raise ValueError('OLD status found')