def sample_pat(self): '''Patient sample of this sequencing sample''' if self._sample_pat is None: from hivwholeseq.patients.patients import load_samples_sequenced as lssp from hivwholeseq.patients.patients import SamplePat self._sample_pat = SamplePat( lssp(include_wrong=True).loc[self['patient sample']]) return self._sample_pat
else: samples_focal = samples if VERBOSE >= 2: print "samples", samples_focal.index.tolist() if not fragments: fragments = ["F" + str(i) for i in xrange(1, 7)] if VERBOSE >= 3: print "fragments", fragments if submit: for fragment in fragments: for samplename, sample in samples_focal.iterrows(): sample = SamplePat(sample) if PCR is None: PCRs_sample = (1, 2) else: PCRs_sample = [PCR] for PCR_sample in PCRs_sample: bamfilename = sample.get_mapped_filtered_filename(fragment, PCR=PCR_sample, decontaminated=False) if not os.path.isfile(bamfilename): continue # if check_already_decontaminated(sample, fragment, PCR_sample): # continue fork_self(samplename, fragment, VERBOSE=VERBOSE, maxreads=maxreads, summary=summary, PCR=PCR_sample) sys.exit()
counts_all = [] for fragment in fragments: counts = [] for samplename, sample in samples.iterrows(): if VERBOSE >= 1: print samplename, fragment, if VERBOSE >= 2: print '' if submit: fork_self(samplename, fragment, VERBOSE=VERBOSE, PCR=PCR, block_len=block_len, n_reads_per_ali=n_reads_per_ali) continue sample = SamplePat(sample) pname = sample.patient refseq = SeqIO.read(get_initial_reference_filename(pname, fragment), 'fasta') refm = np.array(refseq) len_reference = len(refseq) # NOTE: we need consensi to decontaminate, so bamfilename = sample.get_mapped_filtered_filename(fragment, PCR=PCR, decontaminated=(not use_raw_reads)) if not os.path.isfile(bamfilename): continue if VERBOSE >= 1: print 'PCR', PCR, if VERBOSE >= 2:
def print_info_genomewide(p, title, name, method, VERBOSE=0, require_all=True): '''Pretty printer for patient pipeline info''' mod_dates = p.mod_dates def check_requisite_genomewide(md, name_requisite, samplename, mod_dates, require_all=True): '''Check requisites for genomewide observables''' stati = [] fragments = ['F' + str(i + 1) for i in xrange(6)] for fragment in fragments: if (name_requisite, fragment, samplename) not in mod_dates: stati.append('MISS') elif md < mod_dates[(name_requisite, fragment, samplename)]: stati.append('OLD') else: stati.append('OK') if 'OLD' in stati: return 'OLD' else: if require_all: if 'MISS' in stati: return 'MISS' else: return 'OK' else: if 'OK' in stati: return 'OK' else: return 'MISS' def check_contamination_genomewide(sample): '''Check whether any of the fragment samples is contaminated''' fragments = ['F' + str(i + 1) for i in xrange(6)] for fragment in fragments: if 'contaminated' in sample[fragment]: return True return False import os, sys from hivwholeseq.patients.samples import SamplePat # NOTE: this function is used to check both entire patients and single samples if isinstance(p, SamplePat): sample_iter = [(p.name, p)] else: sample_iter = p.samples.iterrows() stati = set() line = ('{:<' + str(title_len) + '}').format(title + ':') print line for samplename, sample in sample_iter: sample = SamplePat(sample) title = sample.name line = ('{:<' + str(title_len) + '}').format(title + ':') if isinstance(method, basestring) and hasattr(sample, method): fun = getattr(sample, method) fn = fun('genomewide') else: fn = method(sample.patient, samplename, 'genomewide') if os.path.isfile(fn): md = modification_date(fn) mod_dates[(name, 'genomewide', samplename)] = md if name is None: status = 'OK' elif check_contamination_genomewide(sample): status = 'CONT' else: status = check_requisite_genomewide(md, name, samplename, mod_dates, require_all=require_all) else: status = 'MISS' # Check the number of reads if requested if (status == 'OK') and (fn[-3:] == 'bam') and (VERBOSE >= 3): status = str(get_number_reads(fn)) stati.add(status) line = line + ('{:<' + str(cell_len) + '}').format(status) print line if 'OLD' in stati: raise ValueError('OLD status found')
samplenames = args.samples VERBOSE = args.verbose use_save = args.save samples_pat = lssp() if pnames is not None: samples_pat = samples_pat.loc[samples_pat.patient.isin(pnames)] elif samplenames is not None: samples_pat = samples_pat.loc[samples_pat.index.isin(samplenames)] if VERBOSE >= 2: print 'samples', samples_pat.index.tolist() consensi = [] for samplename_pat, sample_pat in samples_pat.iterrows(): sample_pat = SamplePat(sample_pat) if VERBOSE >= 1: print sample_pat.name, consensi_pat = {} try: for i in xrange(6): fragment = 'F'+str(i+1) consensi_pat[fragment] = sample_pat.get_consensus(fragment) except IOError: if VERBOSE >= 1: print 'warning: some consensus not found: skipping' continue except ValueError:
def print_info(p, title, name, method, name_requisite=None, VERBOSE=0): '''Pretty printer for patient pipeline info''' import os, sys from hivwholeseq.patients.samples import SamplePat from hivwholeseq.utils.mapping import get_number_reads mod_dates = p.mod_dates # NOTE: this function is used to check both entire patients and single samples if isinstance(p, SamplePat): sample_iter = [(p.name, p)] else: sample_iter = p.samples.iterrows() fragments = ['F' + str(i + 1) for i in xrange(6)] stati = set() line = ('{:<' + str(title_len) + '}').format(title + ':') print line for samplename, sample in sample_iter: sample = SamplePat(sample) title = sample.name line = ('{:<' + str(title_len) + '}').format(title + ':') for fragment in fragments: if isinstance(method, basestring) and hasattr(sample, method): fun = getattr(sample, method) fn = fun(fragment) else: fn = method(sample.patient, samplename, fragment) if os.path.isfile(fn): md = modification_date(fn) mod_dates[(name, fragment, samplename)] = md if name_requisite is None: status = 'OK' elif ((name_requisite, fragment, samplename) in mod_dates): if md > mod_dates[(name_requisite, fragment, samplename)]: status = 'OK' else: status = 'OLD' print fn, md, mod_dates[(name_requisite, fragment, samplename)] elif ((name_requisite, fragment) in mod_dates): if md > mod_dates[(name_requisite, fragment)]: status = 'OK' else: status = 'OLD' # NOTE: on Nov 13, 2014 I updated the mod dates of all # references by mistake, without actually changing the # sequences (ironically, probably testing a backup system # for the refs themselves). So if the requisite is a ref # seq and the date is this one, it's OK if ((name_requisite == 'reference') and mod_dates[(name_requisite, fragment)].date() == \ datetime.date(2014, 11, 13)): status = 'OK' elif 'contaminated' in sample[fragment]: status = 'CONT' else: status = 'ERROR' else: status = 'MISS' # Check the number of reads if requested if (status == 'OK') and (fn[-3:] == 'bam') and (VERBOSE >= 3): status = str(get_number_reads(fn)) stati.add(status) line = line+fragment+': '+\ ('{:>'+str(cell_len - len(fragment) - 1)+'}').format(status)+' ' print line if 'OLD' in stati: raise ValueError('OLD status found')
else: samples_focal = samples if VERBOSE >= 2: print 'samples', samples_focal.index.tolist() if not fragments: fragments = ['F' + str(i) for i in xrange(1, 7)] if VERBOSE >= 3: print 'fragments', fragments if submit: for fragment in fragments: for samplename, sample in samples_focal.iterrows(): sample = SamplePat(sample) if PCR is None: PCRs_sample = (1, 2) else: PCRs_sample = [PCR] for PCR_sample in PCRs_sample: bamfilename = sample.get_mapped_filtered_filename( fragment, PCR=PCR_sample, decontaminated=False) if not os.path.isfile(bamfilename): continue #if check_already_decontaminated(sample, fragment, PCR_sample): # continue fork_self(samplename, fragment,
parser.add_argument('--samples', nargs='+', help='Samples to analyze') parser.add_argument('--verbose', type=int, default=0, help='Verbosity level [0-3]') args = parser.parse_args() VERBOSE = args.verbose samplenames = args.samples samples = load_samples_sequenced() if samplenames is not None: samples = samples.loc[samplenames] for samplename, sample in samples.iterrows(): sample = SamplePat(sample) mod_dates = {} pretty_print_info( sample, 'Map + filter', 'filter', 'get_mapped_filtered_filename', None, #'reference', mod_dates, VERBOSE=VERBOSE) pretty_print_info(sample, 'Decontaminate', 'decontaminate',
for samplename, sample in samples.iterrows(): if VERBOSE >= 1: print samplename, fragment, if VERBOSE >= 2: print '' if submit: fork_self(samplename, fragment, VERBOSE=VERBOSE, PCR=PCR, block_len=block_len, n_reads_per_ali=n_reads_per_ali) continue sample = SamplePat(sample) pname = sample.patient refseq = SeqIO.read( get_initial_reference_filename(pname, fragment), 'fasta') refm = np.array(refseq) len_reference = len(refseq) # NOTE: we need consensi to decontaminate, so bamfilename = sample.get_mapped_filtered_filename( fragment, PCR=PCR, decontaminated=(not use_raw_reads)) if not os.path.isfile(bamfilename): continue if VERBOSE >= 1: print 'PCR', PCR, if VERBOSE >= 2:
samplenames = args.samples VERBOSE = args.verbose use_save = args.save samples_pat = lssp() if pnames is not None: samples_pat = samples_pat.loc[samples_pat.patient.isin(pnames)] elif samplenames is not None: samples_pat = samples_pat.loc[samples_pat.index.isin(samplenames)] if VERBOSE >= 2: print 'samples', samples_pat.index.tolist() consensi = [] for samplename_pat, sample_pat in samples_pat.iterrows(): sample_pat = SamplePat(sample_pat) if VERBOSE >= 1: print sample_pat.name, consensi_pat = {} try: for i in xrange(6): fragment = 'F' + str(i + 1) consensi_pat[fragment] = sample_pat.get_consensus(fragment) except IOError: if VERBOSE >= 1: print 'warning: some consensus not found: skipping' continue except ValueError: