def action(arguments): """ Run """ # Ignore SIGPIPE, for head support common.exit_on_sigpipe() logging.basicConfig() prot_sequences = SeqIO.parse(arguments.protein_align, fileformat.from_filename(arguments.protein_align)) nucl_sequences = SeqIO.parse(arguments.nucl_align, fileformat.from_filename(arguments.nucl_align)) instance = AlignmentMapper(TRANSLATION_TABLES[arguments.translation_table]) SeqIO.write(instance.map_all(prot_sequences, nucl_sequences), arguments.out_file, 'fasta')
def action(arguments): """ Trim the alignment as specified """ # Determine file format for input and output source_format = (arguments.source_format or fileformat.from_filename(arguments.source_file.name)) output_format = (arguments.output_format or fileformat.from_filename(arguments.output_file.name)) # Load the alignment with arguments.source_file: sequences = SeqIO.parse(arguments.source_file, source_format, alphabet=Alphabet.Gapped(Alphabet.single_letter_alphabet)) # Locate primers (forward_start, forward_end), (reverse_start, reverse_end) = \ locate_primers(sequences, arguments.forward_primer, arguments.reverse_primer, arguments.reverse_complement, arguments.max_hamming_distance) # Generate slice indexes if arguments.include_primers: start = forward_start end = reverse_end + 1 else: start = forward_end + 1 end = reverse_start # Rewind the input file arguments.source_file.seek(0) sequences = SeqIO.parse(arguments.source_file, source_format, alphabet=Alphabet.Gapped(Alphabet.single_letter_alphabet)) # Apply the transformation prune_action = _ACTIONS[arguments.prune_action] transformed_sequences = prune_action(sequences, start, end) with arguments.output_file: SeqIO.write(transformed_sequences, arguments.output_file, output_format)
def action(arguments): common.exit_on_sigpipe() # Determine file format for input and output source_format = (arguments.source_format or fileformat.from_filename(arguments.sequence_file.name)) with arguments.sequence_file: sequences = SeqIO.parse(arguments.sequence_file, source_format) if arguments.include_description: ids = (sequence.description for sequence in sequences) else: ids = (sequence.id for sequence in sequences) with arguments.output_file: for i in ids: print >> arguments.output_file, i
def summarize_sequence_file(source_file, file_type=None): """ Summarizes a sequence file, returning a tuple containing the name, whether the file is an alignment, minimum sequence length, maximum sequence length, average length, number of sequences. """ is_alignment = True avg_length = None min_length = sys.maxint max_length = 0 sequence_count = 0 if not file_type: file_type = fileformat.from_filename(source_file) # Get an iterator and analyze the data. for record in SeqIO.parse(source_file, file_type): sequence_count += 1 sequence_length = len(record) if max_length != 0: # If even one sequence is not the same length as the others, # we don't consider this an alignment. if sequence_length != max_length: is_alignment = False # Lengths if sequence_length > max_length: max_length = sequence_length if sequence_length < min_length: min_length = sequence_length # Average length if sequence_count == 1: avg_length = float(sequence_length) else: avg_length = avg_length + ((sequence_length - avg_length) / sequence_count) # Handle an empty file: if avg_length is None: min_length = max_length = avg_length = 0 return _SeqFileInfo(source_file, str(is_alignment).upper(), min_length, max_length, avg_length, sequence_count)
def action(arguments): """ Run """ # Ignore SIGPIPE, for head support common.exit_on_sigpipe() logging.basicConfig() prot_sequences = SeqIO.parse( arguments.protein_align, fileformat.from_handle(arguments.protein_align)) nucl_sequences = SeqIO.parse(arguments.nucl_align, fileformat.from_handle(arguments.nucl_align)) instance = AlignmentMapper(TRANSLATION_TABLES[arguments.translation_table], arguments.fail_action) SeqIO.write(instance.map_all(prot_sequences, nucl_sequences), arguments.out_file, fileformat.from_filename(arguments.out_file.name))
def action(arguments): common.exit_on_sigpipe() # Determine file format for input and output source_format = arguments.source_format or fileformat.from_filename(arguments.sequence_file.name) with arguments.sequence_file: sequences = SeqIO.parse(arguments.sequence_file, source_format) # sort based on name or length sorters = {"length": transform.sort_length, "name": transform.sort_name} directions = {"asc": 1, "desc": 0} if arguments.sort: sort_on, direction = arguments.sort.split("-") reverse = direction == "desc" if (sort_on == "length") or (sort_on == "name"): # Sorted iterator key, direction = arguments.sort.split("-") sequences = sorters[key]( source_file=arguments.sequence_file, source_file_type=source_format, direction=directions[direction] ) stats = [] for s in sequences: params = ProtParam.ProteinAnalysis(str(s.seq)) stats.append((s, molecular_weight(s.seq), params.isoelectric_point())) if arguments.sort and sort_on == "mass": stats = sorted(stats, key=lambda stats: stats[1], reverse=reverse) elif arguments.sort and sort_on == "pi": stats = sorted(stats, key=lambda stats: stats[2], reverse=reverse) if arguments.include_description: out = ((s[0].description, s[1], s[2]) for s in stats) else: out = ((s[0].id, s[1], s[2]) for s in stats) with arguments.output_file: for l in out: print >>arguments.output_file, "%s\t%.2f\t%.2f" % (l)
def action(arguments): """ Given parsed arguments, filter input files. """ if arguments.quality_window_mean_qual and not arguments.quality_window: raise ValueError("--quality-window-mean-qual specified without " "--quality-window") queue = None if arguments.failure_out: queue = Queue() t = FailureReportWriter(queue, arguments.failure_out) t.setDaemon(True) t.start() # Always filter with a quality score qfilter = QualityScoreFilter(arguments.min_mean_quality) filters = [qfilter] output_type = fileformat.from_filename(arguments.output_file.name) with arguments.input_fastq as fp: if arguments.input_qual: sequences = QualityIO.PairedFastaQualIterator(fp, arguments.input_qual) else: sequences = SeqIO.parse(fp, 'fastq') # Add filters if arguments.max_length: max_length_filter = MaxLengthFilter(arguments.max_length) filters.append(max_length_filter) if arguments.min_length: min_length_filter = MinLengthFilter(arguments.min_length) filters.append(min_length_filter) if arguments.max_ambiguous is not None: max_ambig_filter = MaxAmbiguousFilter(arguments.max_ambiguous) filters.append(max_ambig_filter) if arguments.ambiguous_action: ambiguous_filter = AmbiguousBaseFilter( arguments.ambiguous_action) filters.append(ambiguous_filter) if arguments.quality_window: min_qual = arguments.quality_window_mean_qual or \ arguments.min_mean_quality window_filter = WindowQualityScoreFilter(arguments.quality_window, min_qual) filters.insert(0, window_filter) if arguments.barcode_file: with arguments.barcode_file: barcodes = parse_barcode_file(arguments.barcode_file, arguments.barcode_header) f = PrimerBarcodeFilter(arguments.primer or '', barcodes, arguments.map_out, quoting=getattr(csv, arguments.quoting)) filters.append(f) for f in filters: sequences = f.filter_records(sequences, queue) with arguments.output_file: SeqIO.write(sequences, arguments.output_file, output_type) rpt_rows = (f.report_dict() for f in filters) # Write report with arguments.report_out as fp: writer = csv.DictWriter(fp, BaseFilter.report_fields, lineterminator='\n', delimiter='\t') writer.writeheader() writer.writerows(rpt_rows) if queue: queue.join()