def run_test(self, seq_name, expected_name): with open(util.find_file(seq_name, __file__), "r") as fp: seqs = fp.read().splitlines() with open(util.find_file(expected_name, __file__), "r") as fp: expected = fp.read().splitlines() actual = list(debruijn.format_graph(main.build_graph(seqs))) self.assertItemsEqual(actual, expected)
def run_test(self, sample_name, expected_name): with open(util.find_file(sample_name, __file__), "r") as fp: graph = debruijn.read_adjacency_list(fp) with open(util.find_file(expected_name, __file__), "r") as fp: expected = fp.readline().strip() cycle = eulerian.find_cycle(graph) actual = eulerian.format_path(cycle) duped = actual + actual[actual.find("-"):] self.assertEqual(len(actual), len(expected)) self.assertTrue(duped.find(expected) != -1)
def main(fname): with open(util.find_file(sys.argv[1]), "r") as fp: k = int(fp.readline()) seq = fp.readline().strip() for k in sorted(generate_kmers(seq, k)): print k
def main(fname): weights = [] with open(util.find_file(fname), "r") as fp: for line in fp: weights.append(float(line)) protein = infer_protein(weights) print protein
def main(fname): with open(util.find_file(fname), "r") as fp: s1 = map(float, fp.readline().split()) s2 = map(float, fp.readline().split()) num, big = convolve(s1, s2) print num print big
def run_test(self, k, seq_name, expected_name): with open(util.find_file(seq_name, __file__), "r") as fp: fp.readline() # skip count seq = fp.readline().strip() expected = seqio.read_list(expected_name, __file__) actual = main.generate_kmers(seq, k) self.assertItemsEqual(actual, expected)
def test_sample2(self): with open(util.find_file("sample2.txt", __file__), "r") as fp: dna = fp.readline().strip() protein = fp.readline().strip() expected = seqio.read_list("expected2.txt", __file__) actual = main.find_encodings(dna, protein) self.assertItemsEqual(actual, expected)
def run_test(self, fname, expected): L = [] with open(util.find_file(fname, __file__), "r") as fp: for line in fp: L.append(float(line)) actual = main.infer_peptide(L[0], L[1:]) self.assertEquals(actual, expected)
def main(fname): seqs = [] with open(util.find_file(sys.argv[1]), "r") as fp: for line in fp: seqs.append(line.strip()) result = reconstruct_string(seqs) print result
def main(fname): with open(util.find_file(fname), "r") as fp: k, n = map(int, fp.readline().split()) elems = [] for i in xrange(k): arr = map(int, fp.readline().split()) elems.append(find_majority(arr)) print " ".join(map(str, elems))
def main(fname): with open(util.find_file(sys.argv[1]), "r") as fp: seqs = fp.read().splitlines() graph = build_graph(seqs) for line in debruijn.format_graph(graph): print line
def main(fname): seqs, _ = fasta.read(util.find_file(fname)) print "s:", seqs[0] print "t:", seqs[1] align, os, ot = edit_distance_alignment(seqs[0], seqs[1]) print align print os print ot
def test_sample2(self): weights = [] with open(util.find_file("sample2.txt", __file__), "r") as fp: for line in fp: weights.append(float(line)) expected = "NKKHNAARMINKKEVYEWDSPIDEPIMVCSVRYFNTQWIRGYKVMIKDKNNKFKAECGVPIHWIAFRDVTYSYVAHDQHCID" # noqa actual = main.infer_protein(weights) self.assertEquals(actual, expected)
def main(fname): with open(util.find_file(fname), "r") as fp: n = int(fp.readline()) pi = map(int, fp.readline().split()) liss = longest_increasing_subsequence(n, pi) ldss = longest_decreasing_subsequence(n, pi) print " ".join(map(str, liss)) print " ".join(map(str, ldss))
def main(fname): with open(util.find_file(sys.argv[1]), "r") as fp: k = int(fp.readline()) seq = fp.readline().strip() graph = build_graph(seq, k) for line in debruijn.format_graph(graph): print line
def main(fname): with open(util.find_file(fname), "r") as fp: distances = [] while True: a = map(int, fp.readline().split()) b = map(int, fp.readline().split()) distances.append(reversal_distance(a, b)) if fp.readline() == '': break print " ".join(map(str, distances))
def main(fname): with open(util.find_file(fname), "r") as fp: bits = fp.readline().split() N = int(bits[0]) x = float(bits[1]) dna = fp.readline().strip() prob = compute_probability(N, x, dna) print round(prob, 3)
def run_test(self, sample_name, expected_name): seqs = seqio.read_list(sample_name, __file__) expected = [] with open(util.find_file(expected_name, __file__), "r") as fp: for line in fp: bits = line.strip().split() expected.append((bits[0], bits[2])) actual = main.build_overlap_graph(seqs) self.assertItemsEqual(actual, expected)
def main(): if len(sys.argv) != 2: print "Please enter the name of the data file to load!" sys.exit(1) with open(util.find_file(sys.argv[1]), "r") as fp: fp.readline() # skip n A = map(int, fp.readline().split()) partition(A, 0, len(A) - 1) print " ".join(map(str, A))
def main(fname): # Read in the graph with open(util.find_file(fname), "r") as fp: node_dict = debruijn.read_edge_list(fp) # List of distances to each node dist = [-1 for i in node_dict.values()] dist[0] = 0 # Set up a queue of all nodes do_bfs(node_dict['1'], dist) # Print the result print " ".join(map(str, dist))
def read_input(fname): seqs = [] seq = "" with open(util.find_file(fname), "r") as fp: for line in fp: line = line.strip() if line[:1] == '>': if len(seq) > 0: seqs.append(seq) seq = "" else: seq += line if len(seq) > 0: seqs.append(seq) return seqs
def main(fname): # Read in the graph with open(util.find_file(fname), "r") as fp: node_dict = debruijn.read_edge_list(fp) # Build a sorted list out of the nodes nodes = node_dict.values() nodes.sort(key=lambda x: int(x.label)) # Go through each node and sum the degree of its neighbors counts = [] for n in nodes: counts.append(neighbor_degree(n)) # Print the result print " ".join(map(str, counts))
def read_data(): if len(sys.argv) != 2: print "You must enter the name of the file to load!" sys.exit(1) seqs = [] lineno = 0 with open(util.find_file(sys.argv[1])) as fp: for line in fp: lineno += 1 if lineno == 1: k, d = map(int, line.split()) else: seq = line.strip() seqs.append(seq) return (k, d, seqs)
def main(fname): # Read in the graph with open(util.find_file(fname), "r") as fp: node_dict = debruijn.read_edge_list(fp) # Set up a set to keep track of the nodes that have been visited visited = set() # Set up a queue of all nodes queue = [] for n in node_dict.values(): queue.append(n) # Count the connected components components = 0 while len(queue) > 0: n = queue.pop() if n.label not in visited: components += 1 search(visited, n) # Print the result print components
def main(fname): with open(util.find_file(fname), "r") as fp: dna = fp.readline().strip() protein = fp.readline().strip() for e in find_encodings(dna, protein): print e
def main(fname): seqs, _ = fasta.read(util.find_file(fname)) print count_matchings(seqs[0])
import sys from rosalind.common import util if len(sys.argv) != 2: print "Please enter the name of the data file to load!" sys.exit(1) nums = [] with open(util.find_file(sys.argv[1]), "r") as fp: n, m = map(int, fp.readline().split()) for line in fp.readlines(): a, b = map(int, line.split()) nums.append(a) nums.append(b) deg = [0] * n for i in xrange(len(nums)): deg[nums[i] - 1] += 1 print " ".join(map(str, deg))
def main(fname): seqs, _ = fasta.read(util.find_file(fname)) corrections = find_corrections(seqs) for c in corrections: print c
def main(fname): seqs, _ = fasta.read(util.find_file(fname)) lcs = find_longest_common_subsequence(seqs[0], seqs[1]) print lcs
def main(fname): with open(util.find_file(sys.argv[1]), "r") as fp: graph = debruijn.read_adjacency_list(fp) path = eulerian.find_cycle(graph) print eulerian.format_path(path)