def main(): input_filename = sys.argv[1] output_filename = sys.argv[2] input_type = sys.argv[3] or 'sanger' # input type should ordinarily be unnecessary num_reads = None fastq_read = None out = fastaWriter(path=output_filename, format="fasta") for num_reads, fastq_read in enumerate(fastqReader(path=input_filename, format=input_type)): out.write(fastq_read) out.close() if num_reads is None: print("No valid FASTQ reads could be processed.") else: print("%i FASTQ reads were converted to FASTA." % (num_reads + 1))
record.quality = record.quality[:cut] clipped += 1 writer.write(record) else: short_clipped += 1 elif keep_negatives: if len(record) >= min_len: negs += 1 writer.write(record) else: short_negs += 1 elif seq_format.lower() == "fasta": in_handle = open(in_file, "rU") out_handle = open(out_file, "w") reader = fastaReader(in_handle) writer = fastaWriter(out_handle) #Following code is identical to that for FASTQ but without editing qualities if forward: for record in reader: seq = record.sequence.upper() result = primer.search(seq) if result: #Forward primer, take everything after it cut = result.end() record.sequence = seq[cut:] if len(record.sequence) >= min_len: clipped += 1 writer.write(record) else: short_clipped += 1 elif keep_negatives:
manifest = ReadRocheXmlManifest(in_handle) except ValueError: manifest = None out_handle = open(out_file, "wb") writer = SffWriter(out_handle, xml=manifest) in_handle.seek(0) #start again after getting manifest count = writer.write_file(rename_seqrecords(SffIterator(in_handle), rename)) out_handle.close() in_handle.close() else: #Use Galaxy for FASTA, QUAL or FASTQ if seq_format.lower() in ["fasta", "csfasta"] \ or seq_format.lower().startswith("qual"): from galaxy_utils.sequence.fasta import fastaReader, fastaWriter reader = fastaReader(open(in_file, "rU")) writer = fastaWriter(open(out_file, "w")) marker = ">" elif seq_format.lower().startswith("fastq"): from galaxy_utils.sequence.fastq import fastqReader, fastqWriter reader = fastqReader(open(in_file, "rU")) writer = fastqWriter(open(out_file, "w")) marker = "@" else: sys.exit("Unsupported file type %r" % seq_format) #Now do the renaming count = 0 renamed = 0 for record in reader: #The [1:] is because the fastaReader leaves the > on the identifier, #likewise the fastqReader leaves the @ on the identifier try:
record.quality = record.quality[:cut] clipped += 1 writer.write(record) else: short_clipped += 1 elif keep_negatives: if len(record) >= min_len: negs += 1 writer.write(record) else: short_neg += 1 elif seq_format.lower()=="fasta": in_handle = open(in_file, "rU") out_handle = open(out_file, "w") reader = fastaReader(in_handle) writer = fastaWriter(out_handle) #Following code is identical to that for FASTQ but without editing qualities if forward: for record in reader: seq = record.sequence.upper() result = primer.search(seq) if result: #Forward primer, take everything after it cut = result.end() record.sequence = seq[cut:] if len(record.sequence) >= min_len: clipped += 1 writer.write(record) else: short_clipped += 1 elif keep_negatives:
manifest = None out_handle = open(out_file, "wb") writer = SffWriter(out_handle, xml=manifest) in_handle.seek(0) # start again after getting manifest count = writer.write_file(rename_seqrecords(SffIterator(in_handle), rename)) out_handle.close() in_handle.close() else: # Use Galaxy for FASTA, QUAL or FASTQ if seq_format.lower() in ["fasta", "csfasta"] or seq_format.lower().startswith( "qual" ): from galaxy_utils.sequence.fasta import fastaReader, fastaWriter reader = fastaReader(open(in_file, "rU")) writer = fastaWriter(open(out_file, "w")) marker = ">" elif seq_format.lower().startswith("fastq"): from galaxy_utils.sequence.fastq import fastqReader, fastqWriter reader = fastqReader(open(in_file, "rU")) writer = fastqWriter(open(out_file, "w")) marker = "@" else: sys.exit("Unsupported file type %r" % seq_format) # Now do the renaming count = 0 renamed = 0 for record in reader: # The [1:] is because the fastaReader leaves the > on the identifier, # likewise the fastqReader leaves the @ on the identifier
print("Using %i IDs from %i columns of tabular file" % (len(ids), len(columns))) else: # Single column, special case speed up col = columns[0] for line in handle: if not line.startswith("#"): ids.add(line.rstrip("\n").split("\t")[col]) print("Using %i IDs from tabular file" % (len(ids))) handle.close() # Write filtered FASTA file based on IDs from tabular file reader = fastaReader(open(in_file, "rU")) if out_positive_file != "-" and out_negative_file != "-": print("Generating two FASTA files") positive_writer = fastaWriter(open(out_positive_file, "w")) negative_writer = fastaWriter(open(out_negative_file, "w")) for record in reader: # The [1:] is because the fastaReader leaves the > on the identifer. if record.identifier and record.identifier.split()[0][1:] in ids: positive_writer.write(record) else: negative_writer.write(record) positive_writer.close() negative_writer.close() elif out_positive_file != "-": print("Generating matching FASTA file") positive_writer = fastaWriter(open(out_positive_file, "w")) for record in reader: # The [1:] is because the fastaReader leaves the > on the identifer. if record.identifier and record.identifier.split()[0][1:] in ids: