Esempio n. 1
0
 def __init__(self, graph_name="graph.obg"):
     self.graph = obg.GraphWithReversals.from_file(
         self.data_folder + graph_name)
     intervals = obg.IntervalCollection(
         list(self.generate_read_set(
             135, 36)))
     noise_generator = IntervalSimulator(self.graph, 36)
     noise = [noise_generator.generate_interval() for _ in range(2000)]
     intervals.intervals += noise
     print(len(intervals.intervals))
     for i in intervals:
         i.graph = self.graph
     info = ExperimentInfo(None, 135, 36)
     self.intervals = intervals.intervals
     self.map()
     callpeaks = CallPeaks(
         self.graph,
         vg_gam_file_to_interval_collection(None, "mapped_reads.gam", self.graph),  # obg.IntervalCollection((i for i in intervals)),
         vg_gam_file_to_interval_collection(None, "mapped_reads.gam", self.graph),  # obg.IntervalCollection((i for i in intervals)),
         info, has_control=False,
         out_file_base_name="simulated_",
         linear_map="../tests/haplo1kg50-mhc.lm")
     callpeaks.run()
     peaks = callpeaks.q_value_peak_caller.max_paths
     counter = 0
     for summit in self.summits:
         print(summit)
     for peak in peaks:
         if self.check_peak(peak):
             counter += 1
     print("%s/%s" % (counter, len(peaks)))
Esempio n. 2
0
    def from_vg_json_reads_and_graph(cls, json_file_name, graph_file_name):
        logging.info("Reading graph %s" % graph_file_name)
        graph = obg.GraphWithReversals.from_numpy_file(graph_file_name)

        logging.info("Getting indexed interval through graph")
        intervals = vg_json_file_to_intervals(json_file_name, graph)
        haplotyper = HaploTyper(graph, obg.IntervalCollection(intervals))
        haplotyper.build()
        indexed_interval = haplotyper.get_maximum_interval_through_graph()

        intervals = vg_json_file_to_intervals(json_file_name, graph)
        positions = (interval.start_position for interval in intervals)

        return cls(positions, indexed_interval)
Esempio n. 3
0
 def write_sequence_and_intervals(self, n=100):
     logging.info("Reading graph")
     logging.info("Simulating intervals")
     sim = IntervalSimulator(self.graph, 36)
     self.intervals = [sim.generate_interval() for _ in range(n)]
     obg.IntervalCollection(self.intervals).to_file(
         "simulated_intervals.py")
     logging.info("Getting sequences")
     self.retriever = SequenceRetriever.from_vg_graph(
         "../tests/haplo1kg50-mhc.vg")
     sequences = [self.retriever.get_interval_sequence(i)
                  for i in self.intervals]
     with open("simulated_sequences.fq", "w") as f:
         for i, seq in enumerate(sequences):
             f.write("@sim" + str(i) + "\n")
             f.write(seq + "\n")
             f.write("+\n")
             f.write("~"*36 + "\n")
Esempio n. 4
0
def get_linear_paths_in_graph(ob_graph, vg_graph, write_to_file_name=None):
    assert ob_graph is not None
    intervals = {}
    for path in vg_graph.paths:
        obg_interval = path.to_obg(ob_graph=ob_graph)
        if not obg_interval:
            logging.info("OBG interval for path " + path.name +
                         " is False. Skipping.")
            continue
        obg_interval.name = path.name
        print("Path name: %s" % path.name)
        intervals[obg_interval.name] = obg_interval

    if write_to_file_name is not None:
        logging.info("Writing linear path to %s" % write_to_file_name)
        collection = obg.IntervalCollection(intervals.values())
        collection.to_file(write_to_file_name, text_file=True)

    return intervals
Esempio n. 5
0
def vg_json_file_to_interval_collection(vg_mapping_file_name,
                                        offset_based_graph=None):
    return obg.IntervalCollection(
        vg_json_file_to_intervals(vg_mapping_file_name, offset_based_graph))
Esempio n. 6
0
    def test_simulations(self, n=5):
        from pyvg.mapping import map
        from pyvg.util import vg_gam_file_to_interval_collection
        self.write_sequence_and_intervals(n)
        graph = obg.GraphWithReversals.from_file("../tests/graph.obg")
        gam_file_name = "mapped_reads.gam"
        map("simulated_sequences.fq", "../tests/vgdata/haplo1kg50-mhc.xg",
            "../tests/vgdata/haplo1kg50-mhc.gcsa",
            gam_file_name)
        reads_intervals = vg_gam_file_to_interval_collection(
             None, gam_file_name, graph)
        self.compare_intervals(list(self.intervals),
                               list(reads_intervals))

if __name__ == "__main__":
    random.seed(2000)
    EvaluateSimulations()
    exit()
    graph = obg.GraphWithReversals.from_file("../tests/graph.obg")
    intervals = obg.IntervalCollection(list(generate_read_set(
        graph,
        135, 36)))
    for i in intervals:
        i.graph = graph
    info = ExperimentInfo(None, 135, 36)
    callpeaks = CallPeaks(graph, intervals, intervals, info, has_control=False,
                          out_file_base_name="simulated_",
                          linear_map="../tests/haplo1kg50-mhc.lm")
    callpeaks.run()