def check_create_read(test, values): # create expected result if int(values[0]) == 4: expected = "Non-aligning read" else: start = int(values[2]) end = int(values[2]) + int(values[4]) - 1 if values[7] == "-": start = end end = int(values[2]) expected = "Read at {0}:{1}-{2} on {3} strand; counts for {4:2.3f}:".format( values[1], # chromosome start, end, values[7], # strand float(values[5]) / float(values[6])) # abundance / mappings # build input to test samline = build_samline(*values[0:-1]) # exclude final value (created, read) = Read.create_from_sam(samline, chromosome_conversion.values(), count_method='all') output = str(read).split("\t")[0] # create description in case test fails test_description = "\nTest: \t{}\n".format(test) test_description += "Abundance:\t{}\n".format(Read.has_sam_tag["NA"]) test_description += "Mappings:\t{}\n".format(Read.has_sam_tag["NH"]) test_description += "Sam Line:\t{}\n".format(samline) test_description += "Expected:\t{}\n".format(expected) test_description += "Position:\t{}\n".format(output) assert output == expected, "{}Error: \tDid not create expected read.".format(test_description)
def check_create_read(test, values): # create expected result if int(values[0]) == 4: expected = "Non-aligning read" else: start = int(values[2]) end = int(values[2]) + int(values[4]) - 1 if values[7] == "-": start = end end = int(values[2]) expected = "Read at {0}:{1}-{2} on {3} strand; counts for {4:2.3f}:".format( values[1], # chromosome start, end, values[7], # strand float(values[5]) / float(values[6])) # abundance / mappings # build input to test samline = build_samline(*values[0:-1]) # exclude final value (created, read) = Read.create_from_sam(samline, chromosome_conversion.values(), count_method='all') output = str(read).split("\t")[0] # create description in case test fails test_description = "\nTest: \t{}\n".format(test) test_description += "Abundance:\t{}\n".format(Read.has_sam_tag["NA"]) test_description += "Mappings:\t{}\n".format(Read.has_sam_tag["NH"]) test_description += "Sam Line:\t{}\n".format(samline) test_description += "Expected:\t{}\n".format(expected) test_description += "Position:\t{}\n".format(output) assert output == expected, "{}Error: \tDid not create expected read.".format( test_description)
def metagene_count(): """Chain of command for metagene_count analysis.""" arguments = get_arguments() # confirm BAM file and extract chromosome sizes Read.set_chromosome_sizes(arguments.alignment) ##TODO: create a list of chromosomes to analyze and/or exclude # create chromosome conversion dictionary for feature (GFF/BED) to alignment (BAM) Feature.set_chromosome_conversion(arguments.chromosome_names, Read.chromosome_sizes.keys()) # define has_abundance and has_mappings tags for Read class Read.set_sam_tag(arguments.extract_abundance, arguments.alignment, "NA:i:(\d+)") Read.set_sam_tag(arguments.extract_mappings, arguments.alignment, "NH:i:(\d+)") # define the metagene array shape (left padding, start, internal, end, right padding) # metagene = padding ---- internal region ---- padding try: metagene = Metagene(arguments.interval_size, arguments.padding, arguments.padding) print "Metagene definition:\t{}".format(metagene) except MetageneError as err: print err raise MetageneError("Unable to create the metagene template") try: Feature.set_format(arguments.feature) # assign file format for the feature file print "Reading feature file as {} format".format(Feature.format) except MetageneError as err: print err raise MetageneError("Unable to create the feature object") # print out the header line... if not arguments.interval_variable: with open("{}.metagene_counts.csv".format(arguments.output_prefix), 'w') as output_file: output_file.write("# Metagene:\t{}\n".format(metagene)) # define for plotting later output_file.write(metagene.print_full()) # for each feature with open(arguments.feature, 'r') as feature_file: for feature_line in read_chunk(feature_file, 1024): if feature_line[0] != "#": # skip comment lines # change creation with feature_method feature = Feature.create(arguments.feature_count, metagene, feature_line, arguments.count_splicing, arguments.ignore_strand) # pull out sam file lines; it is important to use Feature.get_samtools_region(chromosome_lengths) rather # than Feature.get_chromosome_region() because only the first ensures that the interval does not # extend beyond the length of the chromosome which makes samtools view return no reads (run_pipe_worked, sam_sample) = run_pipe(['samtools view {} {}'.format( arguments.alignment, feature.get_samtools_region())]) if run_pipe_worked: for samline in sam_sample: if len(samline) > 0: # create Read feature (created_read, read) = Read.create_from_sam(samline, Feature.chromosome_conversion.values(), arguments.count_method, arguments.uniquely_mapping, arguments.ignore_strand, arguments.count_secondary_alignments, arguments.count_failed_quality_control, arguments.count_PCR_optical_duplicate, arguments.count_supplementary_alignment) # count read (if it exists) if created_read: feature.count_read(read, arguments.count_method, arguments.count_splicing, arguments.count_partial_reads, arguments.ignore_strand) # output the resulting metagene with open("{}.metagene_counts.csv".format(arguments.output_prefix), 'a') as output_file: output_file.write( "{}\n".format(feature.print_metagene(interval_override=arguments.interval_variable))) else: raise MetageneError("Could not pull chromosomal region {} for feature {} from BAM file {}.".format( feature.get_chromosome_region(), feature.name, arguments.alignment))
def metagene_count(): """Chain of command for metagene_count analysis.""" arguments = get_arguments() # confirm BAM file and extract chromosome sizes Read.set_chromosome_sizes(arguments.alignment) ##TODO: create a list of chromosomes to analyze and/or exclude # create chromosome conversion dictionary for feature (GFF/BED) to alignment (BAM) Feature.set_chromosome_conversion(arguments.chromosome_names, Read.chromosome_sizes.keys()) # define has_abundance and has_mappings tags for Read class Read.set_sam_tag(arguments.extract_abundance, arguments.alignment, "NA:i:(\d+)") Read.set_sam_tag(arguments.extract_mappings, arguments.alignment, "NH:i:(\d+)") # define the metagene array shape (left padding, start, internal, end, right padding) # metagene = padding ---- internal region ---- padding try: metagene = Metagene(arguments.interval_size, arguments.padding, arguments.padding) print "Metagene definition:\t{}".format(metagene) except MetageneError as err: print err raise MetageneError("Unable to create the metagene template") try: Feature.set_format( arguments.feature) # assign file format for the feature file print "Reading feature file as {} format".format(Feature.format) except MetageneError as err: print err raise MetageneError("Unable to create the feature object") # print out the header line... if not arguments.interval_variable: with open("{}.metagene_counts.csv".format(arguments.output_prefix), 'w') as output_file: output_file.write("# Metagene:\t{}\n".format( metagene)) # define for plotting later output_file.write(metagene.print_full()) # for each feature with open(arguments.feature, 'r') as feature_file: for feature_line in read_chunk(feature_file, 1024): if feature_line[0] != "#": # skip comment lines # change creation with feature_method feature = Feature.create(arguments.feature_count, metagene, feature_line, arguments.count_splicing, arguments.ignore_strand) # pull out sam file lines; it is important to use Feature.get_samtools_region(chromosome_lengths) rather # than Feature.get_chromosome_region() because only the first ensures that the interval does not # extend beyond the length of the chromosome which makes samtools view return no reads (run_pipe_worked, sam_sample) = run_pipe([ 'samtools view {} {}'.format(arguments.alignment, feature.get_samtools_region()) ]) if run_pipe_worked: for samline in sam_sample: if len(samline) > 0: # create Read feature (created_read, read) = Read.create_from_sam( samline, Feature.chromosome_conversion.values(), arguments.count_method, arguments.uniquely_mapping, arguments.ignore_strand, arguments.count_secondary_alignments, arguments.count_failed_quality_control, arguments.count_PCR_optical_duplicate, arguments.count_supplementary_alignment) # count read (if it exists) if created_read: feature.count_read( read, arguments.count_method, arguments.count_splicing, arguments.count_partial_reads, arguments.ignore_strand) # output the resulting metagene with open( "{}.metagene_counts.csv".format( arguments.output_prefix), 'a') as output_file: output_file.write("{}\n".format( feature.print_metagene(interval_override=arguments. interval_variable))) else: raise MetageneError( "Could not pull chromosomal region {} for feature {} from BAM file {}." .format(feature.get_chromosome_region(), feature.name, arguments.alignment))