def get_reads(): for i, (seq, count) in enumerate(self.read_file('common_unmapped')['non_long_polyA'].most_common()): read = fastq.Read('{0}_{1}'.format(i, count), seq, fastq.encode_sanger([40]*len(seq)), ) yield read
def make_artificial_reads( transcript, fragment_length, read_length, adapter_sequence, region_fetcher, common_buffer, ): transcript_sequence = transcript.retrieve_sequence( region_fetcher, left_buffer=common_buffer, right_buffer=common_buffer + fragment_length, ) # Needs to include one non-Solexa value for automatic encoding recognition. high_quals = fastq.encode_sanger([25] + [30] * (read_length - 1)) for i, transcript_position in enumerate( range(-common_buffer, transcript.CDS_length + common_buffer)): annotation = artifical_annotation( transcript_name=transcript.name, position=transcript_position, ) fragment_sequence = transcript_sequence[i:i + fragment_length] if '-' in fragment_sequence: # skip fragments that run off the edge of a reference sequence continue full_sequence = fragment_sequence + adapter_sequence read = fastq.Read(annotation.identifier, full_sequence[:read_length], high_quals) yield read
def get_reads(): for i, (seq, count) in enumerate(counts.read_file(unmapped_fn).most_common()): read = fastq.Read('{0}_{1}'.format(i, count), seq, fastq.encode_sanger([40]*len(seq)), ) yield read
def make_artificial_reads(transcript, fragment_length, read_length, adapter_sequence, region_fetcher, common_buffer, ): transcript_sequence = transcript.retrieve_sequence(region_fetcher, left_buffer=common_buffer, right_buffer=common_buffer + fragment_length, ) # Needs to include one non-Solexa value for automatic encoding recognition. high_quals = fastq.encode_sanger([25] + [30]*(read_length - 1)) for i, transcript_position in enumerate(range(-common_buffer, transcript.CDS_length + common_buffer)): annotation = artifical_annotation(transcript_name=transcript.name, position=transcript_position, ) fragment_sequence = transcript_sequence[i:i + fragment_length] if '-' in fragment_sequence: # skip fragments that run off the edge of a reference sequence continue full_sequence = fragment_sequence + adapter_sequence read = fastq.Read(annotation.identifier, full_sequence[:read_length], high_quals) yield read