def test_old_parser(self): parser = parse_data(self.big_data) name, sequence = next(parser) self.assertEqual(name, 'Rosalind_3496') self.assertEqual(sequence, 'ATGGCAGTCCGAGTTCACGAACCGAATACGTTTAATAGGTAGTCGCCACCACTTAGACGGGTTCTCGCCTATAGGGAACATTAAAGGCGTGGAATTCG') self.assertEqual(len(self.big_data), 0)
def test_old_parser(self): parser = parse_data(self.big_data) name, sequence = next(parser) self.assertEqual(name, 'Rosalind_3496') self.assertEqual( sequence, 'ATGGCAGTCCGAGTTCACGAACCGAATACGTTTAATAGGTAGTCGCCACCACTTAGACGGGTTCTCGCCTATAGGGAACATTAAAGGCGTGGAATTCG' ) self.assertEqual(len(self.big_data), 0)
def setUp(cls): with open('CENPA_3chunks.txt', 'r') as f: data = f.readlines() labeled = list(parse_data(data)) cls.matches = compare_all_pairs_both_ways(labeled) with open('CENPA_8chunks.txt', 'r') as g: big_data = g.readlines() longer = list(parse_data(big_data)) cls.more_matches = compare_all_pairs_both_ways(longer) with open('CA_superstring3_expected.txt', 'r') as h: cls.ss = h.readline().strip() with open('CA_superstring8_expected.txt', 'r') as m: lines = m.readlines() cls.ss8 = ''.join([x.strip() for x in lines])
def get_rescore_from_labeled(rescore, data): """ This should not be needed. :param rescore: list of (name,name) tuples :param data: raw data :return: subset of labeled where all names in rescore are keys in labeled """ labeled_list = list(parse_data(data)) labeled_dict = {x[0]:x[1] for x in labeled_list} losers = dict() for x in rescore: losers[x[0]] = labeled.get(x[0]) losers[x[1]] = labeled.get(x[1]) #convert back to tuples for rescoring underdogs = list(losers.items()) return underdogs
def test_multiple_sequences(self): name, sequence = parse_data(self.big_data) self.assertEqual(name, 'Rosalind_0808') name, percent = gc_content(name, sequence, debug=True) self.assertEqual(percent, 60.919540)
def test_raw_parsing_one(self): self.assertEqual(len(self.data[1].strip() + self.data[2].strip()), 87) name, sequence = parse_data(self.data) self.assertEqual(name, 'Rosalind_0808') self.assertEqual(len(sequence), 87)
def test_gc_content(self): name, sequence = parse_data(self.data) name, percent = gc_content(name, sequence, debug=True) self.assertEqual(percent, 60.919540)
try: newedge = next(edges) listofedges.append(newedge) except StopIteration: break return listofedges if __name__=='__main__': with open('CENPA_8chunks.txt', 'r') as f: data = f.readlines() labeled = list(parse_data(data)) expected_pairs=8 matches = compare_all_pairs_both_ways(labeled) if len(matches) != expected_pairs: print("warning! expected {} but found {}" .format(expected_pairs, len(matches))) listofedges = make_listofedges(matches) newgraph = Graph(listofedges) newgraph.sort_edges() superstring = newgraph.flatten_graph(matches) print(superstring)
def setUp(cls): with open('CENPA_3chunks.txt', 'r') as f: data = f.readlines() cls.labeled = list(parse_data(data))
def setUp(cls): with open('CENPA_3chunks.txt', 'r') as f: data = f.readlines() labeled = list(parse_data(data)) cls.matches = compare_all_pairs_both_ways(labeled)
if result[1] not in matches[result[0]]: matches[result[0]].append((result[1], result[2])) if debug == True: for k, v in matches.items(): print(k[0], [(x[0][0], x[1]) for x in v]) return matches if __name__ == '__main__': with open('CENPA_3chunks.txt', 'r') as f: data = f.readlines() labeled = list(parse_data(data)) expected_pairs = 3 matches = compare_all_pairs_both_ways(labeled, debug=True) #matches = itertools_combinations(labeled) if len(matches) != expected_pairs: print("warning! expected {} but found {}".format( expected_pairs, len(matches))) #to make a graph with Gephi: matches_to_graph(matches) #to make results file for Rosalind matches_to_rosalind(matches)
def test_old_parser(self): parsed = list(parse_data(self.big_data)) self.assertEqual(len(parsed), 100)