コード例 #1
0
ファイル: histogram.py プロジェクト: Fa-bula/diploma
def draw_all_plots(dataDir, outputDir):
    fileNames = core.get_only_files(dataDir)
    for fileName in fileNames:
        y = []
        with open(fileName) as readFile:
            for line in readFile:
                y.append(float(line))
        del y[0]
        x = [10 * i for i in range(9)]
        plt.bar(x, y, width=10)
        # plt.show()
        plt.savefig(os.path.join(outputDir,
                                 os.path.basename(fileName)))
    return
コード例 #2
0
ファイル: split.py プロジェクト: Fa-bula/diploma
#!/usr/bin/python
""" Splits replication timings of APOBEC-motif in bins with equal number
of points in each bin"""
import numpy as np
from scipy import stats
import sys
import core

if __name__ == '__main__':
    if len(sys.argv) != 3:
        sys.exit("Usage: {0} motifRepTimeDir outFile".format(sys.argv[0]))

    replication_timings = np.array([])
    file_names = core.get_only_files(sys.argv[1])
    for name in file_names:
        with open(name) as fin:
            l = np.array(map(float, fin))
            l = l[l != -1]
            replication_timings = np.concatenate((replication_timings,
                                                  l), axis=1)
            print "{0}:\n".format(name)
            temp = l[l < 0]
            print(len(temp))
    sys.exit()
    borders = np.linspace(0, 1, num=core.BIN_QUANTITY + 1, endpoint=True)
    bin_borders = stats.mstats.mquantiles(replication_timings, borders)
    with open(sys.argv[2], 'w') as fout:
        fout.write('bin_start\tbin_end\motifs\n')
        for i in range(len(bin_borders) - 1):
            print "Bin #{0}".format(i)
            point_number = sum(map(lambda x: bin_borders[i] < x and
コード例 #3
0
ファイル: frequency.py プロジェクト: Fa-bula/diploma
    )
    frequency = 1.0 * attempts_in_bin["mutations"] / attempts_in_bin["motifs"]
    attempts_in_bin["frequency"] = pandas.Series(frequency, index=attempts_in_bin.index)
    return attempts_in_bin


def estimate_conditional_probability(bin_borders, event_positions):
    """ P{event occurred in bin[i] | event occurred}
    dataDir - directory with data, should be splitted and normalized """
    events_in_bin = core.split_to_bins(event_positions, bin_borders)
    conditional_probability = [0] * len(events_in_bin)
    number_of_events = sum(events_in_bin)
    for i in range(len(events_in_bin)):
        conditional_probability[i] = events_in_bin[i] / number_of_events
    return conditional_probability


if __name__ == "__main__":
    if len(sys.argv) != 4:
        sys.exit("Usage: {0} motifRepTimeBins mutationRepTimeDir " "outDir".format(sys.argv[0]))
    OUT_DIR = sys.argv[3]
    motifs_in_bin = pandas.read_csv(sys.argv[1], sep="\t")

    mutation_rep_time_files = core.get_only_files(sys.argv[2])
    for mutation_file in mutation_rep_time_files:
        with open(mutation_file) as f:
            replication_timings = map(float, f)
        frequency = calculate_frequency(motifs_in_bin, replication_timings)
        outFile = os.path.join(OUT_DIR, os.path.basename(mutation_file))
        frequency.to_csv(outFile, sep="\t")