def main(args):
    numerator_seq_files_data, denominator_seq_files_data, matrix_params, heatmap_params, filenames, max_threads = get_args(
        args)

    # Get the fold change matrix
    fold_change_matrix = get_fold_change_matrix(numerator_seq_files_data,
                                                denominator_seq_files_data,
                                                matrix_params, filenames,
                                                max_threads)

    # Now plot!
    bp_width, width, height, max_log2_fc, interval_size, minor_ticks, major_ticks = heatmap_params
    output_prefix = filenames[-1]

    output_filename = output_prefix + "_max_" + str(max_log2_fc) +  "_width_" + str(bp_width) + \
                      "bp_fold_change_TES_heatmap"

    only_heatmap_filename = generate_random_filename(".tiff")

    negative_log2_value = -1 * max_log2_fc if max_log2_fc else None

    generate_heatmap(fold_change_matrix, 'red/blue', only_heatmap_filename,
                     2.2, negative_log2_value, max_log2_fc)

    tick_params = (minor_ticks, major_ticks)

    ticks_image_filename = make_ticks_image(width, interval_size, tick_params)

    combine_images(ticks_image_filename, only_heatmap_filename,
                   output_filename)

    remove_files(fold_change_matrix, ticks_image_filename,
                 only_heatmap_filename)
    def test_arguments(self):
        with self.assertRaises(SystemExit):
            with Quieter():
                sequence_from_region_around_max_tss.parse_args([])

        with self.assertRaises(SystemExit):
            with Quieter():
                sequence_from_region_around_max_tss.parse_args(
                    ["max_tss_file"])

        with self.assertRaises(SystemExit):
            with Quieter():
                sequence_from_region_around_max_tss.parse_args(
                    ["max_tss_file", 'left'])

        max_tss_file = generate_random_filename()

        with open(max_tss_file, 'w') as file:
            file.write(
                "\t".join(["chr16", "53607", "53608", "POLR3K", "0", "-"]) +
                "\n" +
                "\t".join(["chr16", "53872", "53873", "SNRNP25", "0", "+"]) +
                "\n")

        result = sequence_from_region_around_max_tss.parse_args(
            [max_tss_file, '-5', '10'])
        search = [["-", 5], ['+', 10]]
        self.assertEqual(result, (False, max_tss_file, search))

        remove_files(max_tss_file)
def run_divergent_pileup_plots(regions_filename, sequencing_files_list,
                               region_length, max_threads):
    # Make the opposite stranded regions file
    rev_region_filename = make_rev_region_file(regions_filename)

    # Split the files by strand
    region_files = split_bed_file(regions_filename)
    rev_region_files = split_bed_file(rev_region_filename)

    split_seq_filenames = [(split_bed_file(filename), filename)
                           for filename in sequencing_files_list]

    with multiprocessing.Pool(processes=max_threads) as pool:
        averages = pool.starmap(
            get_pileups, [(region_files, rev_region_files, seq_files,
                           region_length, filename)
                          for (seq_files, filename) in split_seq_filenames])

    split_seq_filenames_to_delete = [tup[0] for tup in split_seq_filenames]

    # Remove all the stranded region files
    remove_files(region_files, rev_region_files, rev_region_filename,
                 split_seq_filenames_to_delete)

    output_metaplot_data(averages, region_length, "")
def make_rgb_heatmap(fold_change_matrix_filename, heatmap_params,
                     output_filename_prefix):
    bp_width, width, height, gamma, max_fold_change, interval_size, minor_ticks_bp, major_ticks_bp = heatmap_params

    tick_params = minor_ticks_bp, major_ticks_bp

    only_heatmap_filename = generate_random_filename(extension=".tiff")

    if max_fold_change != None:
        negative_max_fold_change = -1 * max_fold_change
    else:
        negative_max_fold_change = None

    generate_heatmap(fold_change_matrix_filename,
                     'red/blue',
                     only_heatmap_filename,
                     gamma,
                     negative_max_fold_change,
                     max_fold_change,
                     ticks=None)

    ticks_image_filename = make_ticks_image(width, interval_size, tick_params)

    # Combine the two images together
    output_filename = output_filename_prefix + "_max_" + str(
        max_fold_change) + "_width_" + str(
            bp_width) + "bp_gene_body_fold_change_heatmap"

    combine_images(ticks_image_filename, only_heatmap_filename,
                   output_filename)

    remove_files(fold_change_matrix_filename, ticks_image_filename,
                 only_heatmap_filename)
def make_ticks_image(width, interval_size, tick_params):
    minor_ticks_bp, major_ticks_bp = tick_params

    # Make the tick marks
    t = Ticks(minor_tick_mark_interval_size=(minor_ticks_bp / interval_size),
              major_tick_mark_interval_size=(major_ticks_bp / interval_size))

    # Ticks matrix with a height of 50 px and a max black value of 1
    ticks_matrix = make_ticks_matrix(width, 50, 1, t)

    # Write to a file
    ticks_matrix_filename = generate_random_filename()
    with open(ticks_matrix_filename, 'w') as file:
        for row in ticks_matrix:
            file.write("\t".join([str(val) for val in row]) + "\n")

    ticks_image_filename = generate_random_filename(".tiff")

    os.system(
        "/usr/bin/Rscript " + generate_heatmap_location + " " +
        " ".join([ticks_matrix_filename, "gray", ticks_image_filename, "2.2"]))

    remove_files(ticks_matrix_filename)

    return ticks_image_filename
예제 #6
0
def gather_data(sequencing_file, blacklist_filename, annotated_dataset,
                region_filenames, truQuant_regions_dict):
    paused_region_filename, gene_body_region_filename = region_filenames

    region_data_dict = {}
    # We need to blacklist the data before running the program
    blacklisted_sequencing_filename = generate_random_filename()

    run_subtract(sequencing_file,
                 rna_blacklist_file,
                 blacklist_filename,
                 strand_specific=False,
                 output_filename=blacklisted_sequencing_filename)

    indv_gene_counts_dict = get_counts_in_paused_region(
        paused_region_filename, blacklisted_sequencing_filename)
    get_counts_in_gene_bodies(gene_body_region_filename,
                              blacklisted_sequencing_filename,
                              indv_gene_counts_dict)

    # Only get the region data from the dataset which was annotated
    if annotated_dataset:
        five_prime_counts_dict = build_counts_dict(sequencing_file, "five")

        for gene in truQuant_regions_dict:
            region_data_dict[gene] = get_region_data(
                truQuant_regions_dict[gene]["Pause"], five_prime_counts_dict)

    remove_files(blacklisted_sequencing_filename)

    return sequencing_file, indv_gene_counts_dict, region_data_dict
def run_read_through_transcription(regions_filename, tsr_file,
                                   upstream_distance, downstream_distance,
                                   interval_size, sequencing_files,
                                   max_threads):
    # 1. Make the region intervals file from upstream distance to downstream distance in intervals
    incremented_regions_filename = make_incremented_regions(
        regions_filename, upstream_distance, downstream_distance,
        interval_size)

    # Blacklist the TSRs
    if tsr_file != 'no':
        blacklisted_filenames = blacklist_tsrs(sequencing_files, tsr_file)
        coverage_files = get_coverage_files(blacklisted_filenames,
                                            incremented_regions_filename,
                                            max_threads)
        remove_files(blacklisted_filenames)
    else:
        coverage_files = get_coverage_files(sequencing_files,
                                            incremented_regions_filename,
                                            max_threads)

    combined_dict = coverage_files_to_dictionary(coverage_files,
                                                 sequencing_files)

    output_data(combined_dict, sequencing_files, upstream_distance,
                interval_size)

    # Remove all of the temporary files
    remove_files(incremented_regions_filename, coverage_files)
예제 #8
0
    def test_get_pausing_distances_helper(self):
        region_filename = generate_random_filename()

        with open(region_filename, 'w') as file:
            file.write(
                "\t".join(["chr1", "100", "101", "positive_gene", "0", "+"]) + "\n" +
                "\t".join(["chr1", "9999", "10000", "negative_gene", "0", "-"]) + "\n"
            )

        transcripts_dict = {
            "chr1": {
                "+": {
                    100: {200: 3, 300: 1, 800: 1, 283: 1}
                },
                "-": {
                    9999: {9000: 3, 8000: 1, 7050: 1, 6542: 1}
                }
            }
        }

        result = tps_distance_per_gene.get_pausing_distances_helper(region_filename, transcripts_dict, 1)

        expected = {
            "positive_gene": 101,
            "negative_gene": 999
        }

        self.assertDictEqual(result, expected)

        remove_files(region_filename)
예제 #9
0
    def test_incorrect_number_of_arguments(self):
        with self.assertRaises(SystemExit):
            with Quieter():
                nucleotide_heatmap.parse_args([])

        with self.assertRaises(SystemExit):
            with Quieter():
                nucleotide_heatmap.parse_args(["max_tss_file"])

        with self.assertRaises(SystemExit):
            with Quieter():
                nucleotide_heatmap.parse_args(["max_tss_file", "region_width"])

        with self.assertRaises(SystemExit):
            with Quieter():
                nucleotide_heatmap.parse_args(["max_tss_file", "region_width", "vertical_average", "extra"])

        max_tss_file = generate_random_filename()
        with open(max_tss_file, 'w') as file:
            file.write(
                "\t".join(['chr1', '1', '2', 'name', '0', '+'])
            )

        result = nucleotide_heatmap.parse_args([max_tss_file, '50', '2000', '2'])
        self.assertEqual(result, (max_tss_file, 50, 2000, 2))

        remove_files(max_tss_file)
예제 #10
0
    def test_positive_read_three(self, stdout):
        region_filename = generate_random_filename()
        seq_filename = generate_random_filename()

        with open(region_filename, 'w') as file:
            file.write("\t".join(["chr1", "0", "10", "name", "0", "+"]))

        with open(seq_filename, 'w') as file:
            file.write("\t".join(["chr1", "2", "9", "name", "0", "+"]))

        metaplot.main(['three', region_filename, seq_filename])

        # Get the result from stdout by splitting into a list and making the output floats where possible
        result = [line for line in stdout.getvalue().split("\n") if line]
        result[0] = result[0].split("\t")

        for i, line in enumerate(result[1:]):
            result[i + 1] = [float(val) for val in line.split()]

        seq_file_basename = seq_filename.split("/")[-1]

        expected = [[
            "Position", seq_file_basename + " 3' sense strand",
            seq_file_basename + " 3' divergent strand"
        ], [-5, 0, 0], [-4, 0, 0], [-3, 0, 0], [-2, 0, 0], [-1, 0, 0],
                    [1, 0, 0], [2, 0, 0], [3, 0, 0], [4, 1, 0], [5, 0, 0]]

        remove_files(region_filename, seq_filename)
        self.assertEqual(result, expected)
예제 #11
0
def get_matrix(seq_files_data, matrix_params, filenames, threads):
    upstream_distance, downstream_distance, bp_width, width, height, interval_size = matrix_params
    truQuant_output_file, tsr_file, output_filename_prefix = filenames

    blacklist_regions_file = blacklist_extended_gene_bodies(
        tsr_file, downstream_distance)

    # Make the intervals file to quantify
    intervals_filename = make_incremented_regions(truQuant_output_file,
                                                  downstream_distance,
                                                  upstream_distance, bp_width,
                                                  interval_size)

    with multiprocessing.Pool(threads) as pool:
        args = []
        dimensions = width, height

        for dataset in seq_files_data:
            seq_filename, spike_in = dataset
            filenames = [
                seq_filename, blacklist_regions_file, intervals_filename
            ]
            args.append((filenames, dimensions, spike_in))

        individual_matrices = pool.starmap(get_individual_matrix, args)

    combined_matrix = add_matrices(individual_matrices)
    remove_files(blacklist_regions_file, intervals_filename)

    return combined_matrix
예제 #12
0
    def test_complete_run(self, stdout):
        reads_filename = generate_random_filename()

        with open(reads_filename, 'w') as file:
            file.write("chr1\t1\t10\tname\t0\t+\n")
            file.write("chr1\t10\t20\tname\t0\t-\n")

        regions_filename = generate_random_filename()

        with open(regions_filename, 'w') as file:
            file.write("chr1\t0\t20\tname\t0\t+\n")

        metaplot.main(['whole', regions_filename, reads_filename])

        output = stdout.getvalue().split("\n")[1:]

        result = []

        for line in output:
            if line:
                result.append(tuple([float(val) for val in line.split()]))

        remove_files(reads_filename, regions_filename)

        position = list(range(-10, 0)) + list(range(1, 11))
        fw_expected = [0] + [1] * 9 + [0] * 10
        rv_expected = [0] * 10 + [-1] * 10

        expected = list(zip(position, fw_expected, rv_expected))

        self.assertEqual(result, expected)
예제 #13
0
def get_individual_matrix(regions_filename, seq_file_data, end,
                          repeat_amounts):
    repeat_amount, vertical_averaging = repeat_amounts
    seq_file, norm_factor = seq_file_data

    # 2. Load 2D list containing the data to be outputted
    original_matrix = get_original_matrix(regions_filename, seq_file,
                                          norm_factor, end)

    # Expand the matrix using the repeat amounts and write it to a file
    matrix_filename = generate_random_filename(".matrix")

    with open(matrix_filename, 'w') as file:
        for row in original_matrix:
            # Make the row the correct size by repeating each element by repeat_amount
            output_list = []
            for val in row:
                for _ in range(repeat_amount):
                    output_list.append(str(val))

            file.write("\t".join(output_list) + "\n")

    # Do the vertical averaging
    heatmap_matrix = average_matrix(matrix_filename, vertical_averaging)
    remove_files(matrix_filename)

    return heatmap_matrix
예제 #14
0
def build_matrix(seq_file_data, matrix_params, filenames, threads):
    # Need to build a matrix for each sequencing file in seq_file_data
    truQuant_output_file, tsr_file, output_filename_prefix = filenames
    upstream_distance, distance_past_tes, width, height, interval_size = matrix_params

    blacklist_regions_file = blacklist_extended_gene_bodies(
        tsr_file, distance_past_tes)

    # Step 1. Make regions to quantify
    intervals_filename = make_incremented_regions(truQuant_output_file,
                                                  distance_past_tes,
                                                  interval_size,
                                                  upstream_distance)

    dimensions = width, height

    with multiprocessing.Pool(threads) as pool:
        args = []

        for dataset in seq_file_data:
            seq_filename, spike_in = dataset
            filenames = [
                seq_filename, blacklist_regions_file, intervals_filename
            ]
            args.append((filenames, dimensions, spike_in))

        individual_matrices = pool.starmap(build_individual_matrix, args)

    combined_matrix = add_matrices(individual_matrices)

    remove_files(individual_matrices, blacklist_regions_file,
                 intervals_filename)

    return combined_matrix
    def test_get_regions_file(self):

        max_tss_file = generate_random_filename()

        with open(max_tss_file, 'w') as file:
            file.write(
                "\t".join(["chr16", "53607", "53608", "POLR3K", "0", "-"]) +
                "\n" +
                "\t".join(["chr16", "53872", "53873", "SNRNP25", "0", "+"]) +
                "\n")

        search = [["-", 5], ["+", 5]]

        chrom_sizes = {}
        with open(hg38_chrom_sizes_random_file) as file:
            for line in file:
                chrom, size = line.split()
                chrom_sizes[chrom] = int(size)

        region_file, gene_names = sequence_from_region_around_max_tss.get_regions_file(
            max_tss_file, search, chrom_sizes)

        result = []

        with open(region_file) as file:
            for line in file:
                result.append(line.split())

        expected = [["chr16", "53603", "53613", "POLR3K", "0", "-"],
                    ["chr16", "53867", "53877", "SNRNP25", "0", "+"]]

        self.assertEqual(result, expected)

        remove_files(max_tss_file, region_file)
예제 #16
0
    def test_expand_region(self):
        max_tss_file = generate_random_filename()

        with open(max_tss_file, 'w') as file:
            file.write(
                "\t".join(["chr1", "925739", "925740", "SAMD11", "0", "+"]) + "\n" +
                "\t".join(["chr1", "959255", "959256", "NOC2L", "0", "-"]) + "\n"
            )

        region_width = 20
        expanded_region = nucleotide_heatmap.expand_region(max_tss_file, region_width)

        result = []
        with open(expanded_region) as file:
            for line in file:
                result.append(line.split())

        expected = [
            ["chr1", "925729", "925749", "SAMD11", "0", "+"],
            ["chr1", "959246", "959266", "NOC2L", "0", "-"]
        ]

        self.assertEqual(result, expected)

        remove_files(expanded_region, max_tss_file)
def get_fold_change_matrix(numerator_seq_files_data,
                           denominator_seq_files_data, matrix_params,
                           filenames, max_threads):
    # We use max_threads / 2 because we will be running two instances of the combined
    threads_per_heatmap = int(max_threads / 2)

    # Make sure that if the user only wants to run on one thread that it does not default to 0
    if threads_per_heatmap == 0:
        threads_per_heatmap = 1

    numerator_args = (numerator_seq_files_data, matrix_params, filenames,
                      threads_per_heatmap)
    denominator_args = (denominator_seq_files_data, matrix_params, filenames,
                        threads_per_heatmap)

    with NestedPool(max_threads) as pool:
        numerator_matrix_filename, denominator_matrix_filename = pool.starmap(
            TES_heatmap.get_matrix, [numerator_args, denominator_args])

    # Make the fold change matrix
    log_two_fold_change_matrix_filename = make_log_two_fold_change_matrix(
        numerator_matrix_filename, denominator_matrix_filename)

    remove_files(numerator_matrix_filename, denominator_matrix_filename)

    return log_two_fold_change_matrix_filename
예제 #18
0
def run_read_end_fold_change_heatmap(args):
    end, filenames, max_log2_fc, repeat_amounts, numerator_seq_files_data, denominator_seq_files_data, \
        threads, tick_parameters = parse_input(args)

    regions_file, output_prefix = filenames

    # We use max_threads / 2 because we will be running two instances of the combined
    threads_per_heatmap = int(threads / 2)

    # Make sure that if the user only wants to run on one thread that it does not default to 0
    if threads_per_heatmap == 0:
        threads_per_heatmap = 1

    # Get the numerators and denominators matrix
    with NestedPool(threads) as pool:
        args = [
            (regions_file, numerator_seq_files_data, end, repeat_amounts, threads_per_heatmap),
            (regions_file, denominator_seq_files_data, end, repeat_amounts, threads_per_heatmap)
        ]

        numerator_matrix, denominator_matrix = pool.starmap(region_heatmap.get_matrix, args)

    # Do the log2 fold change for them
    log2_matrix = make_log_two_fold_change_matrix(numerator_matrix, denominator_matrix)
    remove_files(numerator_matrix, denominator_matrix)

    px_per_bp, vertical_averaging = repeat_amounts
    output_filename = output_prefix.replace(".tiff", "") + "_max_" + str(max_log2_fc) + "_vertical_averaging_" + \
                      str(vertical_averaging) +  "_px_per_bp_" + str(px_per_bp) + "_region_heatmap.tiff"

    # Make the heatmap
    make_heatmap(log2_matrix, max_log2_fc, tick_parameters, output_filename, px_per_bp)
    remove_files(log2_matrix)
예제 #19
0
    def test_parse_input(self):
        # No arguments throws error
        with self.assertRaises(SystemExit):
            with Quieter():
                metaplot.parse_input([])

        # Needs a region file and at least one seq file
        regions_file = generate_random_filename()
        with open(regions_file, 'w') as file:
            file.write("\t".join(['chr1', '1', '3', 'name', '0', '+']))

        region_length = 2
        max_threads = multiprocessing.cpu_count()

        # These will work!!
        result = metaplot.parse_input(['five', regions_file, 'seq_file'])
        self.assertEqual(result,
                         ('five', regions_file, ['seq_file'], 2, max_threads))
        result = metaplot.parse_input(['three', regions_file, 'seq_file'])
        self.assertEqual(result,
                         ('three', regions_file, ['seq_file'], 2, max_threads))

        # Test the threading is working
        result = metaplot.parse_input(
            ['five', regions_file, 'seq_file', '-t', '4'])
        self.assertEqual(result, ('five', regions_file, ['seq_file'], 2, 4))

        result = metaplot.parse_input(
            ['three', regions_file, 'seq_file', '--threads', '4'])
        self.assertEqual(result, ('three', regions_file, ['seq_file'], 2, 4))

        remove_files(regions_file)
예제 #20
0
    def test_get_counts(self):

        pause_regions_filename = generate_random_filename()

        with open(pause_regions_filename, 'w') as file:
            file.write(
                "\t".join(["chr1", "100", "250", "positive_gene", "90", "+"]) +
                "\n" + "\t".join(
                    ["chr1", "700", "850", "negative_gene", "1523", "-"]) +
                "\n")

        gene_body_filename = generate_random_filename()

        with open(gene_body_filename, 'w') as file:
            file.write(
                "\t".join(["chr1", "251", "750", "positive_gene", "90", "+"]) +
                "\n" + "\t".join(
                    ["chr1", "200", "700", "negative_gene", "1523", "-"]) +
                "\n")

        blacklisted_sequencing_file = generate_random_filename()

        with open(blacklisted_sequencing_file, 'w') as file:
            file.write(
                "\t".join(["chr1", "80", "220", "5'not_counted", "0", "+"]) +
                "\n" +
                "\t".join(["chr1", "259", "285", "5'not_counted", "0", "+"]) +
                "\n" + "\t".join(["chr1", "132", "220", "5'count", "0", "+"]) +
                "\n" + "\t".join(["chr1", "132", "800", "5'count", "0", "-"]) +
                "\n" + "\t".join(["chr1", "750", "783", "5'count", "0", "-"]) +
                "\n" +
                "\t".join(["chr1", "750", "900", "5'not_counted", "0", "-"]) +
                "\n" +
                "\t".join(["chr1", "500", "600", "5'not_counted", "0", "-"]) +
                "\n")

        indv_gene_counts_dict = truQuant.get_counts_in_paused_region(
            pause_regions_filename, blacklisted_sequencing_file)

        indv_gene_counts_dict = truQuant.get_counts_in_gene_bodies(
            gene_body_filename, blacklisted_sequencing_file,
            indv_gene_counts_dict)

        expected_indv_gene_counts_dict = {
            "positive_gene": {
                "Pause": 1,
                "Body": 1
            },
            "negative_gene": {
                "Pause": 2,
                "Body": 1
            }
        }

        self.assertDictEqual(indv_gene_counts_dict,
                             expected_indv_gene_counts_dict)

        remove_files(pause_regions_filename, gene_body_filename,
                     blacklisted_sequencing_file)
예제 #21
0
    def test_map_tsrs_to_search_regions(self):
        # Test # TSRs
        # One that is contained in the search region, one with partial overlap in the 5' end,
        # one with partial overlap in the 3' end. One with no overlap before the TSR. One with no overlap after the TSR
        # One on the opposite strand

        # Need to define a TSR file and a search regions dict
        search_regions_dict = {
            "chr1": [["chr1", "100", "200", "positive_strand_test", "0", "+"],
                     ["chr1", "500", "600", "negative_strand_test", "0", "-"]]
        }

        tsr_filename = generate_random_filename(".tab")

        additional_columns = [
            "tss_left", "tss_right", "tss_strength", "avg_tss"
        ]

        with open(tsr_filename, 'w') as file:
            file.write("\t".join(["chr1", "40", "60", "no_overlap", "0", "+"] +
                                 additional_columns) + "\n")
            file.write(
                "\t".join(["chr1", "300", "320", "no_overlap2", "0", "+"] +
                          additional_columns) + "\n")
            file.write("\t".join(
                ["chr1", "90", "110", "partial_overlap_5'", "0", "+"] +
                additional_columns) + "\n")
            file.write("\t".join(
                ["chr1", "190", "210", "partial_overlap_3'", "0", "+"] +
                additional_columns) + "\n")
            file.write("\t".join(
                ["chr1", "140", "160", "complete_overlap", "0", "+"] +
                additional_columns) + "\n")
            file.write(
                "\t".join(["chr1", "140", "160", "opposite_strand", "0", "-"] +
                          additional_columns) + "\n")

        gene_tsr_dict, flow_through_tsrs = truQuant.map_tsrs_to_search_regions(
            tsr_filename, search_regions_dict)

        expected_gene_tsr_dict = {
            "positive_strand_test":
            [["chr1", "90", "110", "partial_overlap_5'", "0", "+", "avg_tss"],
             ["chr1", "190", "210", "partial_overlap_3'", "0", "+", "avg_tss"],
             ["chr1", "140", "160", "complete_overlap", "0", "+", "avg_tss"]]
        }

        expected_flow_through_tsrs = [
            ["chr1", "40", "60", "no_overlap", "0", "+", "avg_tss"],
            ["chr1", "300", "320", "no_overlap2", "0", "+", "avg_tss"],
            ["chr1", "140", "160", "opposite_strand", "0", "-", "avg_tss"]
        ]

        self.assertDictEqual(gene_tsr_dict, expected_gene_tsr_dict)
        self.assertEqual(flow_through_tsrs, expected_flow_through_tsrs)

        remove_files(tsr_filename)
    def test_with_n_in_sequence(self, stdout):
        region_file = generate_random_filename()

        with open(region_file, 'w') as file:
            file.write("chr1\t1\t11\tname\t0\t+")

        self.assertFalse(self.get_sequence(stdout.getvalue()))

        base_distribution.main([region_file])
        remove_files(region_file)
예제 #23
0
def main(args):
    seq_files_data, matrix_params, heatmap_params, filenames, threads = get_args(
        args)
    output_filename_prefix = filenames[-1]

    matrix = build_matrix(seq_files_data, matrix_params, filenames, threads)

    make_heatmap(matrix, heatmap_params, output_filename_prefix)

    remove_files(matrix)
예제 #24
0
    def test_two_sequencing_files(self, stdout):
        reads_filename = generate_random_filename()
        reads_filename_two = generate_random_filename()

        with open(reads_filename, 'w') as file:
            file.write("chr1\t1\t10\tname\t0\t+\n")
            file.write("chr1\t10\t20\tname\t0\t-\n")

        with open(reads_filename_two, 'w') as file:
            file.write("chr1\t1\t10\tname\t0\t-\n")
            file.write("chr1\t10\t20\tname\t0\t+\n")

        regions_filename = generate_random_filename()

        with open(regions_filename, 'w') as file:
            file.write("chr1\t0\t20\tname\t0\t+\n")

        metaplot.main(
            ['whole', regions_filename, reads_filename, reads_filename_two])

        output = stdout.getvalue().split("\n")
        header = output[0].split("\t")

        reads_basename = reads_filename.split("/")[-1]
        reads_basename_two = reads_filename_two.split("/")[-1]

        expected_header = [
            "Position", reads_basename + " whole sense strand",
            reads_basename + " whole divergent strand",
            reads_basename_two + " whole sense strand",
            reads_basename_two + " whole divergent strand"
        ]

        self.assertEqual(header, expected_header)

        result = []

        for line in output[1:]:
            if line:
                result.append(tuple([float(val) for val in line.split()]))

        remove_files(reads_filename, regions_filename, reads_filename_two)

        position = list(range(-10, 0)) + list(range(1, 11))
        fw_expected = [0] + [1] * 9 + [0] * 10
        rv_expected = [0] * 10 + [-1] * 10

        fw_expected_two = [0] * 10 + [1] * 10
        rv_expected_two = [0] + [-1] * 9 + [0] * 10

        expected = list(
            zip(position, fw_expected, rv_expected, fw_expected_two,
                rv_expected_two))

        self.assertEqual(result, expected)
예제 #25
0
def combine_images(ticks_image_filename, only_heatmap_filename, output_filename):
    ticks_image = Image.open(ticks_image_filename)
    heatmap_image = Image.open(only_heatmap_filename)

    final_image = Image.new('RGB', (ticks_image.width, ticks_image.height + heatmap_image.height))

    final_image.paste(heatmap_image, (0, 0))
    final_image.paste(ticks_image, (0, heatmap_image.height))

    final_image.save(output_filename + ".tiff")
    remove_files(ticks_image_filename, only_heatmap_filename)
예제 #26
0
def run_base_distribution(regions_file, region_length):
    # 1. Get the sequences of the region
    fasta_file = run_getfasta(regions_file)
    sequences = read_fasta(fasta_file)
    remove_files(fasta_file)

    # 2. Get the percentages at each position
    avgs_dict = calculate_averages(sequences)

    # 3. Output into a file
    output_data(avgs_dict, region_length)
def get_coverage_files_helper(filename, region_intervals_file):
    # First makes the three bed file
    three_prime_end_file = make_read_end_file(filename, 'three')

    # Run coverage on the three bed file
    coverage_file = generate_random_filename()
    run_coverage(region_intervals_file,
                 three_prime_end_file,
                 output_filename=coverage_file)

    remove_files(three_prime_end_file)
    return coverage_file
예제 #28
0
def run_region_heatmap(args):
    end, filenames, seq_files_data, heatmap_parameters, repeat_amounts, tick_parameters, threads = parse_input(
        args)
    regions_filename, output_prefix = filenames

    combined_matrix = get_matrix(regions_filename, seq_files_data, end,
                                 repeat_amounts, threads)

    make_heatmap(combined_matrix, output_prefix, heatmap_parameters,
                 tick_parameters)

    remove_files(combined_matrix)
예제 #29
0
    def test_make_search_regions(self):
        # Tests both positive and negative strands with differing extensions

        regions_filename = generate_random_filename()

        with open(regions_filename, 'w') as file:
            file.write("\t".join([
                "chr1", "100", "200", "+", "positive_strand_test", "108", "111"
            ]) + "\n")
            file.write("\t".join([
                "chr1", "2", "505", "-", "negative_strand_test", "498", "501"
            ]) + "\n")

        search_regions_dict, annotations_dict = truQuant.make_search_regions(
            regions_filename, 10)

        expected_search_regions_dict = {
            "chr1": [["chr1", "90", "108", "positive_strand_test", "0", "+"],
                     ["chr1", "501", "515", "negative_strand_test", "0", "-"]]
        }

        expected_annotations_dict = {
            "positive_strand_test":
            ["chr1", "100", "200", "positive_strand_test", "0", "+"],
            "negative_strand_test":
            ["chr1", "2", "505", "negative_strand_test", "0", "-"]
        }

        self.assertDictEqual(search_regions_dict, expected_search_regions_dict)
        self.assertDictEqual(annotations_dict, expected_annotations_dict)

        search_regions_dict, annotations_dict = truQuant.make_search_regions(
            regions_filename, 100)

        expected_search_regions_dict = {
            "chr1": [["chr1", "0", "108", "positive_strand_test", "0", "+"],
                     ["chr1", "501", "605", "negative_strand_test", "0", "-"]]
        }

        expected_annotations_dict = {
            "positive_strand_test":
            ["chr1", "100", "200", "positive_strand_test", "0", "+"],
            "negative_strand_test":
            ["chr1", "2", "505", "negative_strand_test", "0", "-"]
        }

        self.assertDictEqual(search_regions_dict, expected_search_regions_dict)
        self.assertDictEqual(annotations_dict, expected_annotations_dict)

        remove_files(regions_filename)
예제 #30
0
    def test_make_incremented_regions(self):
        return

        truQuant_output_file = generate_random_filename('-truQuant_output.txt')

        tQ_text = """  Gene    Chromosome      Pause Region Left       Pause Region Right      Strand  Total 5' Reads  MaxTSS  MaxTSS 5' Reads Weighted Pause Region Center    STDEV of TSSs   Gene Body Left  Gene Body Right Gene Body Distance      seq_file.bed Pause Region   seq_file.bed Gene Body
  negative_gene   chr1    5000  5150  -       194     5100  46      5100  13.306459171023036      4000  5000  600   194     18
  positive_gene  chr1    1000  1150  +       234     1100  27      1100  25.417791063821863      1150  2000  850    234     17"""

        tQ_text = [line.split() for line in tQ_text.split("\n") if line]

        with open(truQuant_output_file, 'w') as file:
            for line in tQ_text:
                file.write("\t".join(line) + "\n")

        downstream_distance = 0
        upstream_distance = 200
        bp_width = 1000
        interval_size = 200

        incremented_regions_file = TES_heatmap.make_incremented_regions(truQuant_output_file,
                                                                        downstream_distance,
                                                                        upstream_distance,
                                                                        bp_width,
                                                                        interval_size)


        result = []
        with open(incremented_regions_file) as file:
            for line in file:
                result.append(line.split())

        expected = [
            ["chr1", "5100", "5300", "negative_gene", "46", "-"],
            ["chr1", "4900", "5100", "negative_gene", "46", "-"],
            ["chr1", "4700", "4900", "negative_gene", "46", "-"],
            ["chr1", "4500", "4700", "negative_gene", "46", "-"],
            ["chr1", "4300", "4500", "negative_gene", "46", "-"],

            ["chr1", "900", "1100", "positive_gene", "27", "+"],
            ["chr1", "1100", "1300", "positive_gene", "27", "+"],
            ["chr1", "1300", "1500", "positive_gene", "27", "+"],
            ["chr1", "1500", "1700", "positive_gene", "27", "+"],
            ["chr1", "1700", "1900", "positive_gene", "27", "+"],
        ]

        remove_files(truQuant_output_file)
        self.assertEqual(result, expected)