def read_sequences(filename, qualities=False, genbank_callback=None): """ Read fasta or illumina sequences, possibly compressed Valid values for qualities: False,True,'required' Post reading filters can be applied. """ assert qualities in (False,True,'required') parts = filename.split('~~') info = get_file_info(parts[0]) have_qualities = False if 'type-empty' in info: have_qualities = True result = read_empty(parts[0]) elif 'type-fasta' in info: result = read_fasta(parts[0]) elif 'type-genbank' in info: result = read_genbank_sequence(parts[0], genbank_callback) elif 'type-fastq' in info: have_qualities = True result = read_illumina_with_quality(parts[0]) elif 'type-gff' in info: result = read_gff3_sequence(parts[0]) elif 'type-sff' in info: f.close() grace.require_sff2fastq() have_qualities = True process = run(['sff2fastq', parts[0]]) result = read_illumina_with_quality(process.stdout) else: raise grace.Error('Unrecognized file format for '+filename) if qualities == 'required' and not have_qualities: raise grace.Error('Need base qualities in '+filename) for part in parts[1:]: for prefix in FILTERS: if part.lower().startswith(prefix): result = FILTERS[prefix](result, part[len(prefix):]) break else: raise grace.Error('Unrecognized filter: '+part) if have_qualities and not qualities: result = filter_no_qualities(result) return result
def read_sequences(filename, qualities=False, genbank_callback=None): """ Read fasta or illumina sequences, possibly compressed Valid values for qualities: False,True,'required' Post reading filters can be applied. """ assert qualities in (False, True, 'required') parts = filename.split('~~') info = get_file_info(parts[0]) have_qualities = False if 'type-empty' in info: have_qualities = True result = read_empty(parts[0]) elif 'type-fasta' in info: result = read_fasta(parts[0]) elif 'type-genbank' in info: result = read_genbank_sequence(parts[0], genbank_callback) elif 'type-fastq' in info: have_qualities = True result = read_illumina_with_quality(parts[0]) elif 'type-gff' in info: result = read_gff3_sequence(parts[0]) elif 'type-sff' in info: f.close() grace.require_sff2fastq() have_qualities = True process = run(['sff2fastq', parts[0]]) result = read_illumina_with_quality(process.stdout) else: raise grace.Error('Unrecognized file format for ' + filename) if qualities == 'required' and not have_qualities: raise grace.Error('Need base qualities in ' + filename) for part in parts[1:]: for prefix in FILTERS: if part.lower().startswith(prefix): result = FILTERS[prefix](result, part[len(prefix):]) break else: raise grace.Error('Unrecognized filter: ' + part) if have_qualities and not qualities: result = filter_no_qualities(result) return result
def read_sequences(filename, qualities=False, genbank_callback=None): """ Read fasta or illumina sequences, possibly compressed Post reading filters can be applied. """ parts = filename.split('~~') f = open_possibly_compressed_file(parts[0]) peek = f.read(8) f.close() have_qualities = False if not peek: result = read_empty(parts[0]) elif peek.startswith('>'): result = read_fasta(parts[0]) elif peek.startswith('LOCUS'): result = read_genbank_sequence(parts[0], genbank_callback) elif peek.startswith('@'): have_qualities = True result = read_illumina_with_quality(parts[0]) elif peek.startswith('##gff'): result = read_gff3_sequence(parts[0]) elif peek.startswith('.sff'): f.close() grace.require_sff2fastq() have_qualities = True process = run(['sff2fastq', parts[0]]) result = read_illumina_with_quality(process.stdout) else: raise grace.Error('Unrecognized file format for ' + filename) for part in parts[1:]: for prefix in FILTERS: if part.lower().startswith(prefix): result = FILTERS[prefix](result, part[len(prefix):]) break else: raise grace.Error('Unrecognized filter: ' + part) if have_qualities and not qualities: result = filter_no_qualities(result) return result
def read_sequences(filename, qualities=False, genbank_callback=None): """ Read fasta or illumina sequences, possibly compressed Post reading filters can be applied. """ parts = filename.split('~~') f = open_possibly_compressed_file(parts[0]) peek = f.read(8) f.close() have_qualities = False if not peek: result = read_empty(parts[0]) elif peek.startswith('>'): result = read_fasta(parts[0]) elif peek.startswith('LOCUS'): result = read_genbank_sequence(parts[0], genbank_callback) elif peek.startswith('@'): have_qualities = True result = read_illumina_with_quality(parts[0]) elif peek.startswith('##gff'): result = read_gff3_sequence(parts[0]) elif peek.startswith('.sff'): f.close() grace.require_sff2fastq() have_qualities = True process = run(['sff2fastq', parts[0]]) result = read_illumina_with_quality(process.stdout) else: raise grace.Error('Unrecognized file format for '+filename) for part in parts[1:]: for prefix in FILTERS: if part.lower().startswith(prefix): result = FILTERS[prefix](result, part[len(prefix):]) break else: raise grace.Error('Unrecognized filter: '+part) if have_qualities and not qualities: result = filter_no_qualities(result) return result