def get_initial_allele_counts(self, fragment): '''Get allele counts from the initial time point''' import os from hivwholeseq.patients.samples import SamplePat for i in xrange(len(self.samples)): sample = SamplePat(self.samples.iloc[i]) if os.path.isfile(sample.get_allele_counts_filename(fragment)): return sample.get_allele_counts(fragment)
print protein, samplename sample = SamplePat(sample) # NOTE: How do we find what fragment covers the protein? Well, a # protein can happily cross fragments. Since each # codon is independent, we should iterate over codons. We do not # do that for efficiency reasons. Instead, we identify all potential # fragments and split the protein into full codon chunks covered by # a single fragment. fragment_rois = sample.get_fragments_covered( protein, include_coordinates=True) refseq = sample.get_reference(protein) fn_out = sample.get_allele_counts_filename(protein, PCR=PCR, qual_min=qual_min, type='aa') from hivwholeseq.utils.sequence import alphaa count = np.zeros((len(alphaa), len(refseq) // 3), int) for frroi in fragment_rois: fragment = frroi['name'] start_fr, end_fr = frroi['fragment'] start, end = frroi['roi'] # Check that we align with codons rf = start % 3 if rf: start_fr += 3 - rf start += 3 - rf rf = end % 3
sample = SamplePat(sample) pname = sample.patient conss_genomewide = SeqIO.read( get_initial_reference_filename(pname, 'genomewide'), 'fasta') # Collect the allele counts (where possible) acs = [] for fragment in ['F' + str(i) for i in xrange(1, 7)]: try: ref = ''.join( SeqIO.read(get_initial_reference_filename(pname, fragment), 'fasta')) ac = sample.get_allele_counts(fragment, merge_read_types=False) acs.append((fragment, ref, ac)) except IOError: continue if not len(acs): if VERBOSE >= 1: print 'No data found: skipping' continue # Merge allele counts ac = merge_allele_counts(conss_genomewide, acs, VERBOSE=VERBOSE) if save_to_file: fn_out = sample.get_allele_counts_filename('genomewide') np.save(fn_out, ac) if VERBOSE >= 1: print 'Genomewide allele counts saved to:', fn_out
for fragment in fragments: counts = [] for samplename, sample in samples.iterrows(): if submit: fork_self(samplename, fragment, VERBOSE=VERBOSE, qual_min=qual_min) continue if VERBOSE >= 1: print fragment, samplename sample = SamplePat(sample) pname = sample.patient refseq = SeqIO.read(get_initial_reference_filename(pname, fragment), 'fasta') fn = sample.get_mapped_filtered_filename(fragment, PCR=PCR) if not os.path.isfile(fn): warn('No BAM file found', NoDataWarning) continue count, _ = gac(fn, len(refseq), qual_min=qual_min, VERBOSE=VERBOSE) counts.append(count) if save_to_file: fn_out = sample.get_allele_counts_filename(fragment, PCR=PCR, qual_min=qual_min) count.dump(fn_out) if VERBOSE >= 2: print 'Allele counts saved:', samplename, fragment
fork_self(samplename, fragment, VERBOSE=VERBOSE, qual_min=qual_min) continue if VERBOSE >= 1: print fragment, samplename sample = SamplePat(sample) pname = sample.patient refseq = SeqIO.read( get_initial_reference_filename(pname, fragment), 'fasta') fn = sample.get_mapped_filtered_filename(fragment, PCR=PCR) if not os.path.isfile(fn): warn('No BAM file found', NoDataWarning) continue count, _ = gac(fn, len(refseq), qual_min=qual_min, VERBOSE=VERBOSE) counts.append(count) if save_to_file: fn_out = sample.get_allele_counts_filename(fragment, PCR=PCR, qual_min=qual_min) count.dump(fn_out) if VERBOSE >= 2: print 'Allele counts saved:', samplename, fragment
print protein, samplename sample = SamplePat(sample) # NOTE: How do we find what fragment covers the protein? Well, a # protein can happily cross fragments. Since each # codon is independent, we should iterate over codons. We do not # do that for efficiency reasons. Instead, we identify all potential # fragments and split the protein into full codon chunks covered by # a single fragment. fragment_rois = sample.get_fragments_covered(protein, include_coordinates=True) refseq = sample.get_reference(protein) fn_out = sample.get_allele_counts_filename(protein, PCR=PCR, qual_min=qual_min, type='aa') from hivwholeseq.utils.sequence import alphaa count = np.zeros((len(alphaa), len(refseq) // 3), int) for frroi in fragment_rois: fragment = frroi['name'] start_fr, end_fr = frroi['fragment'] start, end = frroi['roi'] # Check that we align with codons rf = start % 3 if rf: start_fr += 3 - rf start += 3 - rf rf = end % 3
for samplename, sample in samples.iterrows(): if VERBOSE >= 1: print samplename sample = SamplePat(sample) pname = sample.patient conss_genomewide = SeqIO.read(get_initial_reference_filename(pname, 'genomewide'), 'fasta') # Collect the allele counts (where possible) acs = [] for fragment in ['F'+str(i) for i in xrange(1, 7)]: try: ref = ''.join(SeqIO.read(get_initial_reference_filename(pname, fragment), 'fasta')) ac = sample.get_allele_counts(fragment, merge_read_types=False) acs.append((fragment, ref, ac)) except IOError: continue if not len(acs): if VERBOSE >= 1: print 'No data found: skipping' continue # Merge allele counts ac = merge_allele_counts(conss_genomewide, acs, VERBOSE=VERBOSE) if save_to_file: fn_out = sample.get_allele_counts_filename('genomewide') np.save(fn_out, ac) if VERBOSE >= 1: print 'Genomewide allele counts saved to:', fn_out