def create_linear_peaks_from_bed(linear_sequence_fasta_file, peaks_bed_file,
                                 obg_graph_file_name, vg_graph_file_name,
                                 start_node, region):

    ob_graph = obg.GraphWithReversals.from_file(obg_graph_file_name)
    search_sequence = open(linear_sequence_fasta_file).read()
    sequence_retriever = SequenceRetriever.from_vg_graph(vg_graph_file_name)
    traverser = GraphTraverserUsingSequence(ob_graph, search_sequence,
                                            sequence_retriever)
    traverser.search_from_node(start_node)
    linear_path_interval = traverser.get_interval_found()
    IntervalCollection([linear_path_interval
                        ]).to_file("linear_path.intervalcollection",
                                   text_file=True)
    print("Length")
    print(linear_path_interval.length())
    print(linear_path_interval.region_paths[0])
    print(linear_path_interval.start_position)
    print(linear_path_interval.end_position)

    linear_peaks = PeakCollection.create_from_linear_intervals_in_bed_file(
        obg_graph_file_name, linear_path_interval, peaks_bed_file,
        region.start, region.end)

    linear_peaks.to_file("linear_peaks.intervalcollection", text_file=True)
Exemple #2
0
class TestSequences(unittest.TestCase):
    def _test_from_vg_graph(self):
        retriever = SequenceRetriever.from_vg_graph("cactus-mhc.vg")

    def setUp(self):
        self.nodes = {1: "AAG", 2: "GAA", 3: "AGA"}
        self.retriever = SequenceRetriever(self.nodes)

    def test_single_node_interval(self):
        interval = Interval(0, 3, [1])
        self.assertEqual(self.retriever.get_interval_sequence(interval),
                         self.nodes[1])

    def test_single_node_interval_with_offset(self):
        interval = Interval(1, 3, [1])
        self.assertEqual(self.retriever.get_interval_sequence(interval), "AG")

    def test_single_node_interval_with_dual_offset(self):
        interval = Interval(1, 2, [1])
        self.assertEqual(self.retriever.get_interval_sequence(interval), "A")

    def test_reversed_single_node_interval(self):
        interval = Interval(0, 3, [-1])
        self.assertEqual(self.retriever.get_interval_sequence(interval), "CTT")

    def test_reversed_single_node_interval_with_dual_offsetl(self):
        interval = Interval(1, 2, [-3])
        self.assertEqual(self.retriever.get_interval_sequence(interval), "C")

    def test_multiple_nodes_interval(self):
        interval = Interval(0, 3, [1, 2])
        self.assertEqual(self.retriever.get_interval_sequence(interval),
                         "AAGGAA")

    def test_multiple_nodes_interval_second_rp_reversed(self):
        interval = Interval(0, 3, [1, -2])
        self.assertEqual(self.retriever.get_interval_sequence(interval),
                         "AAGTTC")

    def test_long_interval(self):
        interval = Interval(1, 1, [1, 2, 3, -3, -2, -1])
        self.assertEqual(self.retriever.get_interval_sequence(interval),
                         "AGGAAAGATCTTTCC")

    def test_from_vg_json_graph(self):
        retriever = SequenceRetriever.from_vg_json_graph(
            "tests/simple_graph.json")
        self.assertEqual(retriever.get_sequence(1, 0, 7), "tttcccc")
Exemple #3
0
 def write_sequence_and_intervals(self, n=100):
     # obg.IntervalCollection(self.intervals).to_file(
     #     "simulated_intervals.py")
     logging.info("Getting sequences")
     self.retriever = SequenceRetriever.from_vg_graph(
         "../tests/haplo1kg50-mhc.vg")
     sequences = [self.retriever.get_interval_sequence(i)
                  for i in self.intervals]
     with open("simulated_sequences.fq", "w") as f:
         for i, seq in enumerate(sequences):
             f.write("@sim" + str(i) + "\n")
             f.write(seq + "\n")
             f.write("+\n")
             f.write("~"*36 + "\n")
Exemple #4
0
def find_linear_path_through_chromosome(chromosome, chromend, fasta_file_name,
                                        ob_graph_file_name,
                                        vg_graph_file_name):
    genome = Fasta(fasta_file_name)
    seq = str(genome[chromosome][0:50818468]).lower()

    logging.info("Creating sequence retriever")
    sequence_retriever = SequenceRetriever.from_vg_json_graph(
        vg_graph_file_name)

    graph = GraphWithReversals.from_numpy_file(ob_graph_file_name)

    start_nodes = graph.get_first_blocks()
    assert len(start_nodes) == 1, "Found %d start nodes" % start_nodes
    start_node = start_nodes[0]

    traverser = GraphTraverserUsingSequence(graph, seq, sequence_retriever)
    traverser.search_from_node(start_node)
    path = traverser.get_interval_found()
    path = IntervalCollection(path)
    path.to_file("22_path.intervalcollection", text_file=True)
    logging.info("Done")
Exemple #5
0
    def _create_data(self):
        node_offset = 1
        for chrom_number, chromosome in enumerate(self.chromosomes):
            graph = Graph(
                {i + node_offset: Block(10)
                 for i in range(0, 3)},
                {i + node_offset: [i + 1 + node_offset]
                 for i in range(0, 2)})

            linear_map = LinearMap.from_graph(graph)
            linear_map_file_name = "linear_map_%s.npz" % chromosome
            linear_map.to_file(linear_map_file_name)
            self.linear_maps.append(linear_map_file_name)
            self.sequence_retrievers.append(
                SequenceRetriever(
                    {i + node_offset: "A" * 10
                     for i in range(0, 3)}))
            self._create_reads(chrom_number, chromosome, graph)
            node_offset += 3
            graph.convert_to_numpy_backend()
            SequenceGraph.create_empty_from_ob_graph(graph).to_file(
                chromosome + ".nobg.sequences")
            graph.to_file(chromosome + ".nobg")
Exemple #6
0
 def _test_from_vg_graph(self):
     retriever = SequenceRetriever.from_vg_graph("cactus-mhc.vg")
Exemple #7
0
 def test_from_vg_json_graph(self):
     retriever = SequenceRetriever.from_vg_json_graph(
         "tests/simple_graph.json")
     self.assertEqual(retriever.get_sequence(1, 0, 7), "tttcccc")
Exemple #8
0
 def setUp(self):
     self.nodes = {1: "AAG", 2: "GAA", 3: "AGA"}
     self.retriever = SequenceRetriever(self.nodes)