def check_create_read(test, values):
    # create expected result
    if int(values[0]) == 4:
        expected = "Non-aligning read"
    else:
        start = int(values[2])
        end = int(values[2]) + int(values[4]) - 1
        if values[7] == "-":
            start = end
            end = int(values[2])
        expected = "Read at {0}:{1}-{2} on {3} strand; counts for {4:2.3f}:".format(
            values[1],  # chromosome
            start,
            end,
            values[7],  # strand
            float(values[5]) / float(values[6]))  # abundance / mappings
    # build input to test
    samline = build_samline(*values[0:-1])  # exclude final value
    (created, read) = Read.create_from_sam(samline, chromosome_conversion.values(), count_method='all')
    output = str(read).split("\t")[0]
    # create description in case test fails
    test_description = "\nTest:    \t{}\n".format(test)
    test_description += "Abundance:\t{}\n".format(Read.has_sam_tag["NA"])
    test_description += "Mappings:\t{}\n".format(Read.has_sam_tag["NH"])
    test_description += "Sam Line:\t{}\n".format(samline)
    test_description += "Expected:\t{}\n".format(expected)
    test_description += "Position:\t{}\n".format(output)
    assert output == expected, "{}Error:   \tDid not create expected read.".format(test_description)
Example #2
0
def check_create_read(test, values):
    # create expected result
    if int(values[0]) == 4:
        expected = "Non-aligning read"
    else:
        start = int(values[2])
        end = int(values[2]) + int(values[4]) - 1
        if values[7] == "-":
            start = end
            end = int(values[2])
        expected = "Read at {0}:{1}-{2} on {3} strand; counts for {4:2.3f}:".format(
            values[1],  # chromosome
            start,
            end,
            values[7],  # strand
            float(values[5]) / float(values[6]))  # abundance / mappings
    # build input to test
    samline = build_samline(*values[0:-1])  # exclude final value
    (created, read) = Read.create_from_sam(samline,
                                           chromosome_conversion.values(),
                                           count_method='all')
    output = str(read).split("\t")[0]
    # create description in case test fails
    test_description = "\nTest:    \t{}\n".format(test)
    test_description += "Abundance:\t{}\n".format(Read.has_sam_tag["NA"])
    test_description += "Mappings:\t{}\n".format(Read.has_sam_tag["NH"])
    test_description += "Sam Line:\t{}\n".format(samline)
    test_description += "Expected:\t{}\n".format(expected)
    test_description += "Position:\t{}\n".format(output)
    assert output == expected, "{}Error:   \tDid not create expected read.".format(
        test_description)
def metagene_count():
    """Chain of command for metagene_count analysis."""
    arguments = get_arguments()
    # confirm BAM file and extract chromosome sizes
    Read.set_chromosome_sizes(arguments.alignment)
    ##TODO: create a list of chromosomes to analyze and/or exclude
    # create chromosome conversion dictionary for feature (GFF/BED) to alignment (BAM)
    Feature.set_chromosome_conversion(arguments.chromosome_names, Read.chromosome_sizes.keys())

    # define has_abundance and has_mappings tags for Read class
    Read.set_sam_tag(arguments.extract_abundance, arguments.alignment, "NA:i:(\d+)")
    Read.set_sam_tag(arguments.extract_mappings, arguments.alignment, "NH:i:(\d+)")

    # define the metagene array shape (left padding, start, internal, end, right padding)
    # metagene = padding ---- internal region ---- padding 
    try:
        metagene = Metagene(arguments.interval_size, arguments.padding, arguments.padding)
        print "Metagene definition:\t{}".format(metagene)
    except MetageneError as err:
        print err
        raise MetageneError("Unable to create the metagene template")

    try:
        Feature.set_format(arguments.feature)  # assign file format for the feature file
        print "Reading feature file as {} format".format(Feature.format)
    except MetageneError as err:
        print err
        raise MetageneError("Unable to create the feature object")

    # print out the header line...
    if not arguments.interval_variable:
        with open("{}.metagene_counts.csv".format(arguments.output_prefix), 'w') as output_file:
            output_file.write("# Metagene:\t{}\n".format(metagene))  # define for plotting later
            output_file.write(metagene.print_full())

    # for each feature
    with open(arguments.feature, 'r') as feature_file:
        for feature_line in read_chunk(feature_file, 1024):
            if feature_line[0] != "#":  # skip comment lines
                # change creation with feature_method
                feature = Feature.create(arguments.feature_count, metagene, feature_line, arguments.count_splicing,
                                         arguments.ignore_strand)

                # pull out sam file lines; it is important to use Feature.get_samtools_region(chromosome_lengths) rather
                # than Feature.get_chromosome_region() because only the first ensures that the interval does not
                # extend beyond the length of the chromosome which makes samtools view return no reads
                (run_pipe_worked, sam_sample) = run_pipe(['samtools view {} {}'.format(
                    arguments.alignment,
                    feature.get_samtools_region())])
                if run_pipe_worked:
                    for samline in sam_sample:
                        if len(samline) > 0:
                            # create Read feature
                            (created_read, read) = Read.create_from_sam(samline,
                                                                        Feature.chromosome_conversion.values(),
                                                                        arguments.count_method,
                                                                        arguments.uniquely_mapping,
                                                                        arguments.ignore_strand,
                                                                        arguments.count_secondary_alignments,
                                                                        arguments.count_failed_quality_control,
                                                                        arguments.count_PCR_optical_duplicate,
                                                                        arguments.count_supplementary_alignment)

                            # count read (if it exists)
                            if created_read:
                                feature.count_read(read, arguments.count_method, arguments.count_splicing,
                                                   arguments.count_partial_reads, arguments.ignore_strand)

                    # output the resulting metagene
                    with open("{}.metagene_counts.csv".format(arguments.output_prefix), 'a') as output_file:
                        output_file.write(
                            "{}\n".format(feature.print_metagene(interval_override=arguments.interval_variable)))

                else:
                    raise MetageneError("Could not pull chromosomal region {} for feature {} from BAM file {}.".format(
                        feature.get_chromosome_region(),
                        feature.name,
                        arguments.alignment))
Example #4
0
def metagene_count():
    """Chain of command for metagene_count analysis."""
    arguments = get_arguments()
    # confirm BAM file and extract chromosome sizes
    Read.set_chromosome_sizes(arguments.alignment)
    ##TODO: create a list of chromosomes to analyze and/or exclude
    # create chromosome conversion dictionary for feature (GFF/BED) to alignment (BAM)
    Feature.set_chromosome_conversion(arguments.chromosome_names,
                                      Read.chromosome_sizes.keys())

    # define has_abundance and has_mappings tags for Read class
    Read.set_sam_tag(arguments.extract_abundance, arguments.alignment,
                     "NA:i:(\d+)")
    Read.set_sam_tag(arguments.extract_mappings, arguments.alignment,
                     "NH:i:(\d+)")

    # define the metagene array shape (left padding, start, internal, end, right padding)
    # metagene = padding ---- internal region ---- padding
    try:
        metagene = Metagene(arguments.interval_size, arguments.padding,
                            arguments.padding)
        print "Metagene definition:\t{}".format(metagene)
    except MetageneError as err:
        print err
        raise MetageneError("Unable to create the metagene template")

    try:
        Feature.set_format(
            arguments.feature)  # assign file format for the feature file
        print "Reading feature file as {} format".format(Feature.format)
    except MetageneError as err:
        print err
        raise MetageneError("Unable to create the feature object")

    # print out the header line...
    if not arguments.interval_variable:
        with open("{}.metagene_counts.csv".format(arguments.output_prefix),
                  'w') as output_file:
            output_file.write("# Metagene:\t{}\n".format(
                metagene))  # define for plotting later
            output_file.write(metagene.print_full())

    # for each feature
    with open(arguments.feature, 'r') as feature_file:
        for feature_line in read_chunk(feature_file, 1024):
            if feature_line[0] != "#":  # skip comment lines
                # change creation with feature_method
                feature = Feature.create(arguments.feature_count, metagene,
                                         feature_line,
                                         arguments.count_splicing,
                                         arguments.ignore_strand)

                # pull out sam file lines; it is important to use Feature.get_samtools_region(chromosome_lengths) rather
                # than Feature.get_chromosome_region() because only the first ensures that the interval does not
                # extend beyond the length of the chromosome which makes samtools view return no reads
                (run_pipe_worked, sam_sample) = run_pipe([
                    'samtools view {} {}'.format(arguments.alignment,
                                                 feature.get_samtools_region())
                ])
                if run_pipe_worked:
                    for samline in sam_sample:
                        if len(samline) > 0:
                            # create Read feature
                            (created_read, read) = Read.create_from_sam(
                                samline,
                                Feature.chromosome_conversion.values(),
                                arguments.count_method,
                                arguments.uniquely_mapping,
                                arguments.ignore_strand,
                                arguments.count_secondary_alignments,
                                arguments.count_failed_quality_control,
                                arguments.count_PCR_optical_duplicate,
                                arguments.count_supplementary_alignment)

                            # count read (if it exists)
                            if created_read:
                                feature.count_read(
                                    read, arguments.count_method,
                                    arguments.count_splicing,
                                    arguments.count_partial_reads,
                                    arguments.ignore_strand)

                    # output the resulting metagene
                    with open(
                            "{}.metagene_counts.csv".format(
                                arguments.output_prefix), 'a') as output_file:
                        output_file.write("{}\n".format(
                            feature.print_metagene(interval_override=arguments.
                                                   interval_variable)))

                else:
                    raise MetageneError(
                        "Could not pull chromosomal region {} for feature {} from BAM file {}."
                        .format(feature.get_chromosome_region(), feature.name,
                                arguments.alignment))