def sample_pat(self): '''Patient sample of this sequencing sample''' if self._sample_pat is None: from hivwholeseq.patients.patients import load_samples_sequenced as lssp from hivwholeseq.patients.patients import SamplePat self._sample_pat = SamplePat( lssp(include_wrong=True).loc[self['patient sample']]) return self._sample_pat
def print_info_genomewide(p, title, name, method, VERBOSE=0, require_all=True): '''Pretty printer for patient pipeline info''' mod_dates = p.mod_dates def check_requisite_genomewide(md, name_requisite, samplename, mod_dates, require_all=True): '''Check requisites for genomewide observables''' stati = [] fragments = ['F' + str(i + 1) for i in xrange(6)] for fragment in fragments: if (name_requisite, fragment, samplename) not in mod_dates: stati.append('MISS') elif md < mod_dates[(name_requisite, fragment, samplename)]: stati.append('OLD') else: stati.append('OK') if 'OLD' in stati: return 'OLD' else: if require_all: if 'MISS' in stati: return 'MISS' else: return 'OK' else: if 'OK' in stati: return 'OK' else: return 'MISS' def check_contamination_genomewide(sample): '''Check whether any of the fragment samples is contaminated''' fragments = ['F' + str(i + 1) for i in xrange(6)] for fragment in fragments: if 'contaminated' in sample[fragment]: return True return False import os, sys from hivwholeseq.patients.samples import SamplePat # NOTE: this function is used to check both entire patients and single samples if isinstance(p, SamplePat): sample_iter = [(p.name, p)] else: sample_iter = p.samples.iterrows() stati = set() line = ('{:<' + str(title_len) + '}').format(title + ':') print line for samplename, sample in sample_iter: sample = SamplePat(sample) title = sample.name line = ('{:<' + str(title_len) + '}').format(title + ':') if isinstance(method, basestring) and hasattr(sample, method): fun = getattr(sample, method) fn = fun('genomewide') else: fn = method(sample.patient, samplename, 'genomewide') if os.path.isfile(fn): md = modification_date(fn) mod_dates[(name, 'genomewide', samplename)] = md if name is None: status = 'OK' elif check_contamination_genomewide(sample): status = 'CONT' else: status = check_requisite_genomewide(md, name, samplename, mod_dates, require_all=require_all) else: status = 'MISS' # Check the number of reads if requested if (status == 'OK') and (fn[-3:] == 'bam') and (VERBOSE >= 3): status = str(get_number_reads(fn)) stati.add(status) line = line + ('{:<' + str(cell_len) + '}').format(status) print line if 'OLD' in stati: raise ValueError('OLD status found')
else: samples_focal = samples if VERBOSE >= 2: print 'samples', samples_focal.index.tolist() if not fragments: fragments = ['F' + str(i) for i in xrange(1, 7)] if VERBOSE >= 3: print 'fragments', fragments if submit: for fragment in fragments: for samplename, sample in samples_focal.iterrows(): sample = SamplePat(sample) if PCR is None: PCRs_sample = (1, 2) else: PCRs_sample = [PCR] for PCR_sample in PCRs_sample: bamfilename = sample.get_mapped_filtered_filename( fragment, PCR=PCR_sample, decontaminated=False) if not os.path.isfile(bamfilename): continue #if check_already_decontaminated(sample, fragment, PCR_sample): # continue fork_self(samplename, fragment,
def print_info(p, title, name, method, name_requisite=None, VERBOSE=0): '''Pretty printer for patient pipeline info''' import os, sys from hivwholeseq.patients.samples import SamplePat from hivwholeseq.utils.mapping import get_number_reads mod_dates = p.mod_dates # NOTE: this function is used to check both entire patients and single samples if isinstance(p, SamplePat): sample_iter = [(p.name, p)] else: sample_iter = p.samples.iterrows() fragments = ['F' + str(i + 1) for i in xrange(6)] stati = set() line = ('{:<' + str(title_len) + '}').format(title + ':') print line for samplename, sample in sample_iter: sample = SamplePat(sample) title = sample.name line = ('{:<' + str(title_len) + '}').format(title + ':') for fragment in fragments: if isinstance(method, basestring) and hasattr(sample, method): fun = getattr(sample, method) fn = fun(fragment) else: fn = method(sample.patient, samplename, fragment) if os.path.isfile(fn): md = modification_date(fn) mod_dates[(name, fragment, samplename)] = md if name_requisite is None: status = 'OK' elif ((name_requisite, fragment, samplename) in mod_dates): if md > mod_dates[(name_requisite, fragment, samplename)]: status = 'OK' else: status = 'OLD' print fn, md, mod_dates[(name_requisite, fragment, samplename)] elif ((name_requisite, fragment) in mod_dates): if md > mod_dates[(name_requisite, fragment)]: status = 'OK' else: status = 'OLD' # NOTE: on Nov 13, 2014 I updated the mod dates of all # references by mistake, without actually changing the # sequences (ironically, probably testing a backup system # for the refs themselves). So if the requisite is a ref # seq and the date is this one, it's OK if ((name_requisite == 'reference') and mod_dates[(name_requisite, fragment)].date() == \ datetime.date(2014, 11, 13)): status = 'OK' elif 'contaminated' in sample[fragment]: status = 'CONT' else: status = 'ERROR' else: status = 'MISS' # Check the number of reads if requested if (status == 'OK') and (fn[-3:] == 'bam') and (VERBOSE >= 3): status = str(get_number_reads(fn)) stati.add(status) line = line+fragment+': '+\ ('{:>'+str(cell_len - len(fragment) - 1)+'}').format(status)+' ' print line if 'OLD' in stati: raise ValueError('OLD status found')
samplenames = args.samples VERBOSE = args.verbose use_save = args.save samples_pat = lssp() if pnames is not None: samples_pat = samples_pat.loc[samples_pat.patient.isin(pnames)] elif samplenames is not None: samples_pat = samples_pat.loc[samples_pat.index.isin(samplenames)] if VERBOSE >= 2: print 'samples', samples_pat.index.tolist() consensi = [] for samplename_pat, sample_pat in samples_pat.iterrows(): sample_pat = SamplePat(sample_pat) if VERBOSE >= 1: print sample_pat.name, consensi_pat = {} try: for i in xrange(6): fragment = 'F' + str(i + 1) consensi_pat[fragment] = sample_pat.get_consensus(fragment) except IOError: if VERBOSE >= 1: print 'warning: some consensus not found: skipping' continue except ValueError: