def foun(f): '''Main driver to solve this problem.''' lines = ro.read_lines(f) (N, m), a = map(int, lines[0].split()), map(int, lines[1].split()) b = np.zeros((m, len(a))) for j, aj in enumerate(a): b[:, j] = [np.log10(x[0]) for x in it.islice(ro.wf_pmf(2*N, aj), m)] np.savetxt(sys.stdout, b, '%f')
def test_ksim(f, debug=0, impl=max_repeats): '''Main driver to solve the LOCA problem.''' lines = ro.read_lines(f) k, s, t = int(lines[0]), lines[1], lines[2] a_bf = sorted(max_repeats_bf(t, s, k)) a = sorted(impl(t, s, k, debug=debug)) if debug >= 1: print f print 'actual', a print 'bf ', a_bf print len(a_bf) assert_equal(a, a_bf, err_msg='Wrong maximum repeats set for file_name %s' % (f,))
def conv(f, tol=1e-10): '''Main driver to solve this problem.''' lines = ro.read_lines(f) s1, s2 = np.loadtxt(StringIO.StringIO(lines[0])), np.loadtxt(StringIO.StringIO(lines[1])) return ro.join_list(max_multiplicity(s1, s2), delimiter='\n')
def lexv(f): lines = list(ro.read_lines(f)) a, n = lines[0].split(), int(lines[1]) for x in sorted(it.chain.from_iterable(it.product(xrange(len(a)), repeat=r) for r in xrange(1, n + 1))): print ''.join(a[i] for i in x)
def refs(f): org, start, stop, before = ro.read_lines(f) return int(Entrez.read(Entrez.esearch(db='nucleotide', term='%s[Organism] AND srcdb_refseq[PROP] AND %s:%s[Sequence length] AND 1986/01/01:%s[dp]' % (org, start, stop, before)))['Count'])
def scsp(f): '''Main driver to solve this problem.''' return shortest_supersequence(*ro.read_lines(f))
def prob(f): lines = ro.read_lines(f) # Bug: missing method param f s, a = lines[0], np.array(map(float, lines[1].split())) gc = ro.gc_count(s) at = len(s) - gc # bug: at undefined var and wrong var, too return ' '.join(map(str, gc*np.log10(0.5*a) + at*np.log10(0.5*(1-a))))
def itwv(f): '''Main driver to solve this problem.''' lines = ro.read_lines(f) m = itwv_matrix(lines[0], lines[1:]) for j in xrange(len(m)): print ' '.join('%d' % (m[j][k],) for k in xrange(len(m[j])))
def cntq(f): '''Main driver to solve this problem.''' return cntq_ncgll(int(ro.read_lines(f)[0])), num_quartets(rt.read_newick(StringIO.StringIO(ro.read_lines(f)[1])))
def gbk(f): org, start, stop = ro.read_lines(f) return rd.num_records('nucleotide', '%s[Organism] AND %s:%s[dp]' % (org, start, stop))
def lexf(f): lines = list(ro.read_lines(f)) a, n = lines[0].split(), int(lines[1]) return "\n".join("".join(x) for x in it.product(*(a for _ in xrange(n))))
def one_d(f): lines = ro.read_lines(f) s = lines[0] k, L, t = map(int, lines[1].split()) return ' '.join(clumps(s, k, L, t))
def rstr(f): lines = ro.read_lines(f) parts = lines[0].split() return p(int(parts[0]), float(parts[1]), lines[1])
def chbp(f): '''Main driver to solve this problem.''' lines = ro.read_lines(f) s = np.array(lines[0].split()) c = np.array([map(int, line) for line in lines[1:]]) return to_newick_tree(c, s)
''' ============================================================ http://rosalind.info/problems/hamm Given: Two DNA strings s and t of equal length (not exceeding 1 kbp). Return: The Hamming distance dH(s,t). ============================================================ ''' import rosalind.rosutil as ro if __name__ == "__main__": # import doctest # doctest.testmod() print ro.hamm(*ro.read_lines('rosalind_hamm_sample.dat')) print ro.hamm(*ro.read_lines('rosalind_hamm.dat'))
''' ============================================================ http://rosalind.info/problems/spec The prefix spectrum of a weighted string is the collection of all its prefix weights. Given: A list L of n (n<=100) positive real numbers. Return: A protein string of length n-1 whose prefix spectrum is equal to L (if multiple solutions exist, you may output any one of them). Consult the monoisotopic mass table. ============================================================ ''' import rosalind.rosutil as ro, numpy as np spec = lambda f: ''.join(map(ro.aa_of_mass, np.diff(map(float, ro.read_lines(f))))) if __name__ == "__main__": print spec('rosalind_spec_sample.dat') print spec('rosalind_spec.dat')
def one_f(f): p, s, d = ro.read_lines(f) return ro.join_list(apm(s, p, int(d)))
def one_c(f): pattern, text = ro.read_lines(f) return ' '.join(map(str, ro.find_all(text, pattern)))
def bins(f): '''Main driver to solve this problem.''' lines = ro.read_lines(f) a, k = ro.to_int_list(lines[2]), ro.to_int_list(lines[3]) for v in k: print '%s ' % (to_one_based(bin_search(a, v))), print ''
def grep(f): '''Main driver to solve this problem.''' return ro.join_list(possible_assemblies(ro.read_lines(f)), delimiter='\n')
''' ============================================================ http://rosalind.info/problems/subs Given: Two DNA strings s and t (each of length at most 1 kbp). Return: All locations of t as a substring of s. (1-based) ============================================================ ''' from rosalind.rosutil import read_lines def subs(s, t): n, t0, last = len(t), t[0], len(s) - len(t) # @UnusedVariable return [j + 1 for j in (j for (j, sj) in enumerate(s) if j <= last and sj == t0) if s[j:j + n] == t] if __name__ == "__main__": # import doctest # doctest.testmod() lines = read_lines('rosalind_subs_sample.dat') print ' '.join(map(str, subs(lines[0], lines[1]))) lines = read_lines('rosalind_subs.dat') print ' '.join(map(str, subs(lines[0], lines[1])))
def test_possible_assemblies(file_name): S = ro.read_lines('%s/%s.dat' % (ro.ROSALIND_HOME, file_name)) a = possible_assemblies(S) assert_equal(sorted(a), sorted(ro.read_lines('%s/%s.out' % (ro.ROSALIND_HOME, file_name))), 'Wrong assembly set')
def gasm(f): '''Main driver to solve this problem.''' return min_superstring(ro.read_lines(f))
def ebin(f): '''Main driver to solve this problem.''' lines = ro.read_lines(f) n = int(lines[0]) p = map(float, lines[1].split()) return ro.join_list((n * x for x in p))
def asmq(f): '''Main driver to solve this problem.''' S = ro.read_lines(f) print N(S, 0.5), N(S, 0.75)
def ksim(f, debug=0): '''Main driver to solve the LOCA problem.''' lines = ro.read_lines(f) for i, j, _ in max_repeats(lines[2], lines[1], int(lines[0]), debug=debug): print i + 1, j - i
def one_h(f): '''Main driver for solving this problem.''' lines = ro.read_lines(f) s, (k, d) = lines[0], map(int, lines[1].split()) c = ro.possible_kmers_counter(s, k, d) return ro.join_list(ro.most_frequent(c + Counter(dict((ro.revc(x), v) for x, v in c.iteritems()))))
def qrtd(f): '''Main driver to solve this problem.''' lines = ro.read_lines(f) return dq(lines[1], lines[2])
''' ============================================================ http://rosalind.info/problems/dbru Given: A collection of up to 1000 DNA strings of equal length (not exceeding 50 bp) corresponding to a set S of (k+1)-mers. Return: The adjacency list corresponding to the de Bruijn graph corresponding to S U Src. ============================================================ ''' import rosalind.rosutil as ro dbru = lambda f: '\n'.join('(%s, %s)' % (x[0], x[1]) for x in ro.de_bruijn_adj_list(ro.read_lines(f))) if __name__ == "__main__": # print dbru('rosalind_dbru_sample.dat') print dbru('rosalind_dbru.dat')
def ptra(f): '''Main driver to solve this problem.''' return translate_table_index(*ro.read_lines(f))