コード例 #1
0
ファイル: cons.py プロジェクト: ajdust/euler
def cons(stream):

    countA: List[int] = []
    countC: List[int] = []
    countG: List[int] = []
    countT: List[int] = []

    for chunk in fasta(stream):

        if len(countA) == 0:
            dnaSize = len(chunk.content)
            countA = [0 for _ in range(dnaSize)]
            countC = [0 for _ in range(dnaSize)]
            countG = [0 for _ in range(dnaSize)]
            countT = [0 for _ in range(dnaSize)]

        for i, c in enumerate(chunk.content):
            if c == "A":
                countA[i] += 1
            elif c == "C":
                countC[i] += 1
            elif c == "G":
                countG[i] += 1
            elif c == "T":
                countT[i] += 1

    consensus = ""
    for a, c, g, t in zip(countA, countC, countG, countT):
        consensus += max([
            {
                "l": "A",
                "n": a
            },
            {
                "l": "C",
                "n": c
            },
            {
                "l": "G",
                "n": g
            },
            {
                "l": "T",
                "n": t
            },
        ],
                         key=lambda x: x["n"])["l"]

    return {
        "A": countA,
        "C": countC,
        "G": countG,
        "T": countT,
        "Consensus": consensus
    }
コード例 #2
0
ファイル: tran.py プロジェクト: zzggbb/rosalind
#!/bin/env python3

import sys
import util
"""
Given: Two DNA strings of equal length.
Return: The transition/transversion ratio.

transitions: A<>G, C<>T
transversions: A<>C, A<>T, G<>C, G<>T
"""

s1, s2 = util.fasta(sys.stdin.readlines()).values()

assert len(s1) == len(s2)

n = len(s1)

transitions = 0
transversions = 0
for i in range(n):
    c1 = s1[i]
    c2 = s2[i]
    if c1 == c2:
        continue
    # ord('A') + ord('G') = 136
    # ord('C') + ord('T') = 151
    if ord(c1) + ord(c2) in [136, 151]:
        transitions += 1
    else:
        transversions += 1
コード例 #3
0
ファイル: gc.py プロジェクト: zzggbb/rosalind
#!/bin/env python3

import sys
import util
"""
Given: At most 10 DNA strings in FASTA format.
Return: The ID of the string having the highest GC-content, followed by
the GC-content of that string.
"""

data = util.fasta(sys.stdin.readlines())
max_gc = -1
for k, v in data.items():
    gc = util.gc(v)
    if gc > max_gc:
        max_gc = gc
        max_id = k

print(max_id)
print("{:.6f}".format(max_gc * 100))
コード例 #4
0
ファイル: rosalindgc.py プロジェクト: ajdust/euler
def gcPerFasta(stream):
    for fastaChunk in fasta(stream):
        nc = dna([fastaChunk.content])
        yield ((nc.G + nc.C) / nc.total(), fastaChunk)
コード例 #5
0
#!/bin/env python3

import sys
from util import fasta
"""
Given: A collection of 'k' DNA strings each in FASTA format.
Return: A longest common substring of the collection. If multiple solutions
        exist, you may return any single solution.

Notes:
Start with the longest candidates, and work towards shorter ones.
Once a candidate is a solution, the search is done.
Need a way of knowing if a given substring exists in a given string.
"""

strings = list(fasta(sys.stdin.readlines()).values())
print(strings)
コード例 #6
0
#!/bin/env python3

import sys
import util

"""
Given: A DNA string in FASTA format.
Return: The position and length of every reverse palindrome in the
string having length between 4 and 12.
"""

MIN_LEN = 4
MAX_LEN = 12

dna_string = list(util.fasta(sys.stdin.readlines()).values())[0]
dna_len = len(dna_string)

for i in range(0, dna_len - MIN_LEN + 1):
    search_space = dna_len - i
    current_max_len = min(MAX_LEN, search_space)
    for l in range(MIN_LEN, current_max_len + 1):
        if util.reverse_palindrome(dna_string, i, i + l - 1):
            print("{} {}".format(i + 1, l))
コード例 #7
0
ファイル: splc.py プロジェクト: zzggbb/rosalind
#!/bin/env python3

import sys
import util
"""
Given: A DNA string 's' and a collection of substrings of 's' acting
as introns.  All strings are given in FASTA format.
Return: A protein string resulting from transcribing and translating
the exons of 's'.
"""

data = list(util.fasta(sys.stdin.readlines()).values())
dna_string = data[0]
introns = data[1:]
for intron in introns:
    dna_string = dna_string.replace(intron, '')

rna_string = util.rna(dna_string)
codons = util.codons(rna_string)
protein = util.protein(codons)
print(''.join(protein))