Ejemplos de read_str en Python, ejemplos de rosalind.rosutil.read_str en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: rosalind_frmt.py Proyecto: orenlivne/euler

def frmt(f):
    ids = ' '.join(ro.read_str(f).split())
    handle = Entrez.efetch(db='nucleotide', id=ids, rettype='fasta')
    records = list(SeqIO.parse(handle, 'fasta'))  # we get the list of SeqIO objects in FASTA format
    x = min((len(x), x) for x in records)[1]
    print '>' + x.description
    print x.seq

Ejemplo n.º 2

0

Mostrar archivo

Archivo: rosalind_suff.py Proyecto: pombredanne/euler

def test_suffix_tree_weights(file_name_prefix):
    s = ro.read_str("%s/%s.dat" % (ro.ROSALIND_HOME, file_name_prefix))
    actual = np.array(list(rm.suffix_tree_weights(s)))
    expected = np.loadtxt("%s/%s.out" % (ro.ROSALIND_HOME, file_name_prefix), dtype=str)
    np.savetxt("%s/%s.mine.out" % (ro.ROSALIND_HOME, file_name_prefix), np.array(sorted(actual)), fmt="%s")
    # print sorted(actual)
    # print sorted(expected)
    assert_equal(sorted(actual), sorted(expected), "Wrong suffix tree weight list")

Ejemplo n.º 3

0

Mostrar archivo

Archivo: rosalind_orfr.py Proyecto: orenlivne/euler

def orfr(f):
    '''Main driver to solve this problem.'''
    return max((len(x), x) for x in distinct_protein_strings(ro.read_str(f)))[1]

Ejemplo n.º 4

0

Mostrar archivo

Archivo: rosalind_1e.py Proyecto: orenlivne/euler

def one_e(f):
    return ro.join_list(prefix_skew_argmax(ro.read_str(f)))

Ejemplo n.º 5

0

Mostrar archivo

Archivo: rosalind_prot.py Proyecto: orenlivne/euler

'''
============================================================
http://rosalind.info/problems/prot

The Genetic Codeclick to expand

Problem

The 20 commonly occurring amino acids are abbreviated by using 20 letters from the English alphabet (all letters except for B, J, O, U, X, and Z). Protein strings are constructed from these 20 symbols. Henceforth, the term genetic string will incorporate protein strings along with DNA strings and RNA strings.

The RNA codon table dictates the details regarding the encoding of specific codons into the amino acid alphabet.

Given: An RNA string s corresponding to a strand of mRNA (of length at most 10 kbp).

Return: The protein string encoded by s.
============================================================
'''
from rosalind.rosutil import read_str, RNA_TRANSLATION, STOP_VALUE
from itertools import takewhile

def mrna_to_protein(s):
    '''Convert mRNA string to a protein string.'''
    return reduce(lambda x, y: ''.join((x, y)), takewhile(lambda v: v != STOP_VALUE, (RNA_TRANSLATION[s[i:i + 3]] for i in xrange(0, len(s), 3))), '')

if __name__ == "__main__":
    print mrna_to_protein(read_str('rosalind_prot_sample.dat'))
    print mrna_to_protein(read_str('rosalind_prot.dat'))

Ejemplo n.º 6

0

Mostrar archivo

Archivo: rosalind_ling.py Proyecto: orenlivne/euler

def ling(f):
    '''Main driver to solve this problem.'''
    return ling_complexity(ro.read_str(f))

Ejemplo n.º 7

0

Mostrar archivo

Archivo: rosalind_dbpr.py Proyecto: pombredanne/euler

def dbpr(f):
    return "\n".join(protein_biological_processes(ro.read_str(f)))

Ejemplo n.º 8

0

Mostrar archivo

Archivo: rosalind_mrep.py Proyecto: orenlivne/euler

def mrep(f):
    '''Main driver to solve this problem.'''
    s = ro.read_str(f)
    for r in maximal_prefixes(s, 20): print r

Ejemplo n.º 9

0

Mostrar archivo

Archivo: rosalind_need.py Proyecto: orenlivne/euler

def need(f):
    a, b = ro.read_str(f).split()
    s, t = rd.dna_seq_of_id(a), rd.dna_seq_of_id(b)
    print s
    print t

Ejemplo n.º 10

0

Mostrar archivo

Archivo: rosalind_dna.py Proyecto: orenlivne/euler

#!/usr/bin/env python
'''
============================================================
http://rosalind.info/problems/dna/

Given: A DNA string s of length at most 1000 nt.

Return: Four integers (separated by spaces) counting the respective number of times that the symbols 'A', 'C', 'G', and 'T' occur in s.
============================================================
'''
from rosalind.rosutil import read_str

def histogram(s):
    d = {}
    for x in s: d[x] = d.setdefault(x, 0) + 1
    return ' '.join(map(str, (d[x] for x in ['A', 'C', 'G', 'T'])))
         
if __name__ == "__main__":
    print histogram(read_str('rosalind_dna.dat'))
    print histogram(read_str('rosalind_dna_sample.dat'))

Ejemplo n.º 11

0

Mostrar archivo

Archivo: rosalind_swat.py Proyecto: orenlivne/euler

def swat(f):
    '''Print strings for local alignment score.'''
    a, b = ro.read_str(f).split()
    s, t = rd.protein_record(a).sequence, rd.protein_record(b).sequence
    print s
    print t

Ejemplo n.º 12

0

Mostrar archivo

Archivo: rosalind_suff.py Proyecto: pombredanne/euler

def suff(f):
    """Main driver to solve this problem."""
    for x in rm.suffix_tree_weights(ro.read_str(f)):
        print x

Ejemplo n.º 13

0

Mostrar archivo

Archivo: rosalind_prtm.py Proyecto: orenlivne/euler

'''
============================================================
http://rosalind.info/problems/prtm

Chaining the Amino Acidsclick to expandclick to expand

Problem

In a weighted alphabet, every symbol is assigned a positive real number called a weight. A string formed from a weighted alphabet is called a weighted string, and its weight is equal to the sum of the weights of its symbols.

The standard weight assigned to each member of the 20-symbol amino acid alphabet is the monoisotopic mass of the corresponding amino acid.

Given: A protein string P of length at most 1000 aa.

Return: The total weight of P. Consult the monoisotopic mass table.
============================================================
'''
from rosalind.rosutil import read_str, aa_mass

'''Return the amino acid mass of the protein whose string s.'''
prtm = lambda s: sum(aa_mass[x] for x in s)

if __name__ == "__main__":
    print prtm(read_str('rosalind_prtm.dat')) # 821.392
    print prtm(read_str('rosalind_prtm_sample.dat'))

Ejemplo n.º 14

0

Mostrar archivo

Archivo: rosalind_mrna.py Proyecto: orenlivne/euler

Problem

For positive integers a and n, a modulo n (written amodn in shorthand) is the remainder when a is divided by n. For example, 29mod11=7 because 29=11x2+7.

Modular arithmetic is the study of addition, subtraction, multiplication, and division with respect to the modulo operation. We say that a and b are congruent modulo n if amodn=bmodn; in this case, we use the notation a=bmodn.

Two useful facts in modular arithmetic are that if a=bmodn and c=dmodn, then a+c=b+dmodn and axc=bxdmodn. To check your understanding of these rules, you may wish to verify these relationships for a=29, b=73, c=10, d=32, and n=11.

As you will see in this exercise, some Rosalind problems will ask for a (very large) integer solution modulo a smaller number to avoid the computational pitfalls that arise with storing such large numbers.

Given: A protein string of length at most 1000 aa.

Return: The total number of different RNA strings from which the protein could have been translated, modulo 1,000,000. (Don't neglect the importance of the stop codon in protein translation.)
============================================================
'''
import rosalind.rosutil as ro
from itertools import chain

INV_CODON = {}
for k, v in ro.RNA_TRANSLATION.iteritems(): INV_CODON.setdefault(v, []).append(k)

def mrna(s, r=1000000):
#    print [len(INV_CODON[x]) for x in chain(s, [STOP_VALUE])]
    return ro.prod_mod((len(INV_CODON[x]) for x in chain(s, [ro.STOP_VALUE])), r)

if __name__ == "__main__":
    print mrna(ro.read_str('rosalind_mrna_sample.dat'))
    print mrna(ro.read_str('rosalind_mrna.dat'))

Ejemplo n.º 15

0

Mostrar archivo

Archivo: rosalind_rnas.py Proyecto: orenlivne/euler

'''
============================================================
http://rosalind.info/problems/rnas

Given an RNA string s, we will augment the bonding graph of s by adding basepair edges connecting all occurrences of 'U' to all occurrences of 'G' in order to represent possible wobble base pairs.

We say that a matching in the bonding graph for s is valid if it is noncrossing (to prevent pseudoknots) and has the property that a basepair edge in the matching cannot connect symbols sj and sk unless k>=j+4 (to prevent nearby nucleotides from base pairing).

See Figure 1 for an example of a valid matching if we allow wobble base pairs. In this problem, we will wish to count all possible valid matchings in a given bonding graph; see Figure 2 for all possible valid matchings in a small bonding graph, assuming that we allow wobble base pairing.

Given: An RNA string s (of length at most 200 bp).

Return: The total number of distinct valid matchings of basepair edges in the bonding graph of s. Assume that wobble base pairing is allowed.
============================================================
'''
import rosalind.rosutil as ro

'''Which letters can a letter bind to, assuming wobble bonding.'''
_BONDING = {'A':'U', 'U':'AG', 'C':'G', 'G':'CU'}
'''Returns the number of wobble non-crossing matching in the string s with
minimium wobble distance min_wobble_dist between bases.'''
_wobb = lambda s, w: wobb(s[1:], w) + sum(wobb(s[1:i], w) * wobb(s[i + 1:], w) for i in [i for i in xrange(w, len(s)) if s[i] in _BONDING[s[0]]]) if s else 1
wobb = ro.memoize(_wobb)
rnas = lambda f: wobb(ro.read_str(f), 4) # Main driver to solve this problem.

if __name__ == "__main__":
    print rnas('rosalind_rnas_sample1.dat')
    print rnas('rosalind_rnas_sample.dat')
    print rnas('rosalind_rnas.dat')

Ejemplo n.º 16

0

Mostrar archivo

Archivo: rosalind_rna.py Proyecto: pombredanne/euler

#!/usr/bin/env python
"""
============================================================
http://rosalind.info/problems/rna/

Given: A DNA string t having length at most 1000 nt.

Return: The transcribed RNA string of t.
============================================================
"""
from rosalind.rosutil import read_str

transcribe = lambda s: "".join(("U" if x == "T" else x) for x in s)
if __name__ == "__main__":
    print transcribe(read_str("rosalind_rna_sample.dat"))
    print transcribe(read_str("rosalind_rna.dat"))

Ejemplo n.º 17

0

Mostrar archivo

Archivo: rosalind_mend_manual.py Proyecto: orenlivne/euler

def mend(f):
    '''Main driver to solve this problem.'''
    return ro.join_list(geno_prob(rt.parse_newick(ro.read_str(f))))

Ejemplo n.º 18

0

Mostrar archivo

Archivo: rosalind_revc.py Proyecto: orenlivne/euler

'''
============================================================
http://rosalind.info/problems/revc

Given: A DNA string s of length at most 1000 bp.
Return: The reverse complement sc of s.
============================================================
'''
from rosalind.rosutil import read_str, revc

if __name__ == "__main__":
#    import doctest
#    doctest.testmod()
#    print revc(read_str('rosalind_revc_sample.dat'))
#    print revc(read_str('rosalind_revc.dat'))
    print revc(read_str('rosalind_revc_1b.dat'))

Ejemplo n.º 19

0

Mostrar archivo

Archivo: rosalind_eubt.py Proyecto: orenlivne/euler

def eubt(f):
    '''Main driver to solve this problem.'''
    labels = ro.read_str(f).split()
    for g in enumerate_trees(labels): print to_newick_str(g)