Example #1
0
def action(arguments):
    """
    Run
    """
    # Ignore SIGPIPE, for head support
    common.exit_on_sigpipe()
    logging.basicConfig()

    prot_sequences = SeqIO.parse(arguments.protein_align,
            fileformat.from_filename(arguments.protein_align))
    nucl_sequences = SeqIO.parse(arguments.nucl_align,
            fileformat.from_filename(arguments.nucl_align))

    instance = AlignmentMapper(TRANSLATION_TABLES[arguments.translation_table])

    SeqIO.write(instance.map_all(prot_sequences, nucl_sequences),
            arguments.out_file, 'fasta')
Example #2
0
def action(arguments):
    """
    Trim the alignment as specified
    """
    # Determine file format for input and output
    source_format = (arguments.source_format or
            fileformat.from_filename(arguments.source_file.name))
    output_format = (arguments.output_format or
            fileformat.from_filename(arguments.output_file.name))

    # Load the alignment
    with arguments.source_file:
        sequences = SeqIO.parse(arguments.source_file, source_format,
                alphabet=Alphabet.Gapped(Alphabet.single_letter_alphabet))

        # Locate primers
        (forward_start, forward_end), (reverse_start, reverse_end) = \
                locate_primers(sequences, arguments.forward_primer,
                        arguments.reverse_primer, arguments.reverse_complement,
                        arguments.max_hamming_distance)

        # Generate slice indexes
        if arguments.include_primers:
            start = forward_start
            end = reverse_end + 1
        else:
            start = forward_end + 1
            end = reverse_start

        # Rewind the input file
        arguments.source_file.seek(0)
        sequences = SeqIO.parse(arguments.source_file,
                source_format,
                alphabet=Alphabet.Gapped(Alphabet.single_letter_alphabet))

        # Apply the transformation
        prune_action = _ACTIONS[arguments.prune_action]
        transformed_sequences = prune_action(sequences, start, end)

        with arguments.output_file:
            SeqIO.write(transformed_sequences, arguments.output_file,
                    output_format)
Example #3
0
def action(arguments):
    common.exit_on_sigpipe()

    # Determine file format for input and output
    source_format = (arguments.source_format or
            fileformat.from_filename(arguments.sequence_file.name))

    with arguments.sequence_file:
        sequences = SeqIO.parse(arguments.sequence_file, source_format)
        if arguments.include_description:
            ids = (sequence.description for sequence in sequences)
        else:
            ids = (sequence.id for sequence in sequences)
        with arguments.output_file:
            for i in ids:
                print >> arguments.output_file, i
Example #4
0
def summarize_sequence_file(source_file, file_type=None):
    """
    Summarizes a sequence file, returning a tuple containing the name,
    whether the file is an alignment, minimum sequence length, maximum
    sequence length, average length, number of sequences.
    """
    is_alignment = True
    avg_length = None
    min_length = sys.maxint
    max_length = 0
    sequence_count = 0
    if not file_type:
        file_type = fileformat.from_filename(source_file)

    # Get an iterator and analyze the data.
    for record in SeqIO.parse(source_file, file_type):
        sequence_count += 1
        sequence_length = len(record)
        if max_length != 0:
            # If even one sequence is not the same length as the others,
            # we don't consider this an alignment.
            if sequence_length != max_length:
                is_alignment = False

        # Lengths
        if sequence_length > max_length:
            max_length = sequence_length
        if sequence_length < min_length:
            min_length = sequence_length

        # Average length
        if sequence_count == 1:
            avg_length = float(sequence_length)
        else:
            avg_length = avg_length + ((sequence_length - avg_length) /
                                       sequence_count)

    # Handle an empty file:
    if avg_length is None:
        min_length = max_length = avg_length = 0

    return _SeqFileInfo(source_file, str(is_alignment).upper(), min_length,
            max_length, avg_length, sequence_count)
Example #5
0
def action(arguments):
    """
    Run
    """
    # Ignore SIGPIPE, for head support
    common.exit_on_sigpipe()
    logging.basicConfig()

    prot_sequences = SeqIO.parse(
        arguments.protein_align,
        fileformat.from_handle(arguments.protein_align))
    nucl_sequences = SeqIO.parse(arguments.nucl_align,
                                 fileformat.from_handle(arguments.nucl_align))

    instance = AlignmentMapper(TRANSLATION_TABLES[arguments.translation_table],
                               arguments.fail_action)

    SeqIO.write(instance.map_all(prot_sequences, nucl_sequences),
                arguments.out_file,
                fileformat.from_filename(arguments.out_file.name))
Example #6
0
def action(arguments):
    common.exit_on_sigpipe()

    # Determine file format for input and output
    source_format = arguments.source_format or fileformat.from_filename(arguments.sequence_file.name)

    with arguments.sequence_file:
        sequences = SeqIO.parse(arguments.sequence_file, source_format)

        # sort based on name or length
        sorters = {"length": transform.sort_length, "name": transform.sort_name}
        directions = {"asc": 1, "desc": 0}
        if arguments.sort:
            sort_on, direction = arguments.sort.split("-")
            reverse = direction == "desc"
            if (sort_on == "length") or (sort_on == "name"):
                # Sorted iterator
                key, direction = arguments.sort.split("-")
                sequences = sorters[key](
                    source_file=arguments.sequence_file, source_file_type=source_format, direction=directions[direction]
                )

        stats = []
        for s in sequences:
            params = ProtParam.ProteinAnalysis(str(s.seq))

            stats.append((s, molecular_weight(s.seq), params.isoelectric_point()))

            if arguments.sort and sort_on == "mass":
                stats = sorted(stats, key=lambda stats: stats[1], reverse=reverse)
            elif arguments.sort and sort_on == "pi":
                stats = sorted(stats, key=lambda stats: stats[2], reverse=reverse)

        if arguments.include_description:
            out = ((s[0].description, s[1], s[2]) for s in stats)
        else:
            out = ((s[0].id, s[1], s[2]) for s in stats)

        with arguments.output_file:
            for l in out:
                print >>arguments.output_file, "%s\t%.2f\t%.2f" % (l)
Example #7
0
def action(arguments):
    """
    Given parsed arguments, filter input files.
    """
    if arguments.quality_window_mean_qual and not arguments.quality_window:
        raise ValueError("--quality-window-mean-qual specified without "
                "--quality-window")

    queue = None
    if arguments.failure_out:
        queue = Queue()
        t = FailureReportWriter(queue, arguments.failure_out)
        t.setDaemon(True)
        t.start()

    # Always filter with a quality score
    qfilter = QualityScoreFilter(arguments.min_mean_quality)
    filters = [qfilter]

    output_type = fileformat.from_filename(arguments.output_file.name)
    with arguments.input_fastq as fp:
        if arguments.input_qual:
            sequences = QualityIO.PairedFastaQualIterator(fp,
                    arguments.input_qual)
        else:
            sequences = SeqIO.parse(fp, 'fastq')

        # Add filters
        if arguments.max_length:
            max_length_filter = MaxLengthFilter(arguments.max_length)
            filters.append(max_length_filter)
        if arguments.min_length:
            min_length_filter = MinLengthFilter(arguments.min_length)
            filters.append(min_length_filter)
        if arguments.max_ambiguous is not None:
            max_ambig_filter = MaxAmbiguousFilter(arguments.max_ambiguous)
            filters.append(max_ambig_filter)
        if arguments.ambiguous_action:
            ambiguous_filter = AmbiguousBaseFilter(
                    arguments.ambiguous_action)
            filters.append(ambiguous_filter)
        if arguments.quality_window:
            min_qual = arguments.quality_window_mean_qual or \
                    arguments.min_mean_quality
            window_filter = WindowQualityScoreFilter(arguments.quality_window,
                    min_qual)
            filters.insert(0, window_filter)

        if arguments.barcode_file:
            with arguments.barcode_file:
                barcodes = parse_barcode_file(arguments.barcode_file,
                        arguments.barcode_header)
            f = PrimerBarcodeFilter(arguments.primer or '', barcodes,
                    arguments.map_out,
                    quoting=getattr(csv, arguments.quoting))
            filters.append(f)

        for f in filters:
            sequences = f.filter_records(sequences, queue)

        with arguments.output_file:
            SeqIO.write(sequences, arguments.output_file, output_type)

    rpt_rows = (f.report_dict() for f in filters)

    # Write report
    with arguments.report_out as fp:
        writer = csv.DictWriter(fp, BaseFilter.report_fields,
                lineterminator='\n', delimiter='\t')
        writer.writeheader()
        writer.writerows(rpt_rows)

    if queue:
        queue.join()