Exemple #1
0
def output_data(data):
    # Print the header first
    filenames = [filename.split("/")[-1] for filename, _ in data]
    print_tab_delimited(["Chromosome", "Left", "Right", "Name", "Strand"] + filenames)

    for _, tsr in data[0][1]:
        print_tab_delimited(list(tsr) + [file_data[tsr] for _, file_data in data])
Exemple #2
0
def output_pausing_distances(pausing_distances, sequencing_files):
    # Print the headers first
    print_tab_delimited(
        ["Transcript Length"] +
        [seq_filename.split("/")[-1] for seq_filename in sequencing_files])

    for i in range(len(pausing_distances[0])):
        print_tab_delimited([i] + [x[i] for x in pausing_distances])
def output_data(combined_dict, sequencing_files, upstream_distance,
                interval_size):
    # Now the data is in the combined_dict, we need to reduce it back down to positions again

    # First print out the headers
    print_tab_delimited(
        ["Position"] +
        [seq_file.split("/")[-1] for seq_file in sequencing_files])

    real_position = upstream_distance * -1
    # Now output all of the data
    for position in combined_dict:
        print_tab_delimited([real_position] + combined_dict[position])
        real_position += interval_size
def output_data(avgs_dict, region_length):
    # Write the header
    print_tab_delimited(["Position"] + list(avgs_dict.keys()))

    position = region_length / 2 * -1

    # We go through each position and output the averages
    for i in range(region_length):
        if position == 0:
            position += 1

        print_tab_delimited([position] +
                            [avgs_dict[nt][i] for nt in avgs_dict.keys()])

        position += 1
def run_sequence_searches(regions_file, searching_sequences, region_length):
    # Has keys of gene names and values of dictionary which has keys of "Sequence" and "Region" and "Motifs".
    master_dict = {}

    # 1. Read in the contents of the bed file
    with open(regions_file) as file:
        bed_lines = file.readlines()

    fasta_file = run_getfasta(regions_file)
    fasta_sequences = read_fasta(fasta_file)
    remove_files(fasta_file)

    # Fill the dictionary
    for i, line in enumerate(bed_lines):
        chromosome, left, right, gene_name, _, strand = line.split()

        master_dict[gene_name] = {
            "Sequence": "",
            "Region": line.split(),
            "Motifs": {}
        }
        master_dict[gene_name]["Sequence"] = fasta_sequences[i]

        for search in searching_sequences:
            sequence, _, _ = search

            master_dict[gene_name]["Motifs"][sequence] = False

    for gene_name in master_dict:
        for search in searching_sequences:
            find_sequences(master_dict[gene_name], search, region_length)

    # Output the results
    print_tab_delimited(
        ["Chromosome", "Left", "Right", "Gene", "Score", "Strand"] +
        [search[0] for search in searching_sequences])

    for gene_name in master_dict:
        print_tab_delimited(master_dict[gene_name]["Region"] + [
            has_motif
            for _, has_motif in master_dict[gene_name]["Motifs"].items()
        ])
def output_data(pausing_distances):
    output_dict = {}
    for tup in pausing_distances:
        ret_dict, seq_filename = tup

        for gene_name in ret_dict:
            if gene_name not in output_dict:
                output_dict[gene_name] = {}

            output_dict[gene_name][seq_filename] = ret_dict[gene_name]

    for i, gene_name in enumerate(output_dict):
        if i == 0:
            # We print the headers
            print_tab_delimited(
                ["Gene"] +
                [x.split("/")[-1] for x in output_dict[gene_name].keys()])

        print_tab_delimited([gene_name] + [
            output_dict[gene_name][sequencing_filename]
            for sequencing_filename in output_dict[gene_name]
        ])
Exemple #7
0
def output_metaplot_data(averages, region_length, prime_name):
    """

    :param averages: averages list from the metaplots programs
    :type averages: list
    :param region_length: length of the region
    :type region_length: int
    :param prime_name: either "five prime" or "three prime"
    :type prime_name: str
    :return:
    """
    avgs_data, files = [x for x in zip(*averages)]

    # Merge all of the lists together
    merged_list = [list(chain.from_iterable(x)) for x in zip(*avgs_data)]

    # 5. Put the data into a file
    header = ["Position"]
    # Write the header first
    for file in files:
        if prime_name:
            # Include a space before print the prime name
            header.append(
                file.split("/")[-1] + " " + prime_name + " sense strand")
            header.append(
                file.split("/")[-1] + " " + prime_name + " divergent strand")
        else:
            header.append(file.split("/")[-1] + " sense strand")
            header.append(file.split("/")[-1] + " divergent strand")

    print_tab_delimited(header)

    for i, base_list in enumerate(merged_list):
        position = i - (region_length / 2)
        if position >= 0:
            position += 1

        print_tab_delimited([position] + base_list)
Exemple #8
0
def output_data(expanded_regions):
    for region in expanded_regions:
        print_tab_delimited(region)