コード例 #1
0
    print(path)
    print(matrix)
    product = 0.5  # probability of the first letter being A or B
    for i, k in zip(path[0::1], path[1::1]):
        if i == 'A' and k == 'A':
            product *= matrix.item(0)
        elif i == 'A' and k == 'B':
            product *= matrix.item(1)
        elif i == 'B' and k == 'A':
            product *= matrix.item(2)
        elif i == 'B' and k == 'B':
            product *= matrix.item(3)

    return product


if __name__ == '__main__':
    path = 'AABBBAABABAAAABBBBAABBABABBBAABBAAAABABAABBABABBAB'
    transition_matrix = np.matrix([[0.194, 0.806], [0.273, 0.727]])

    # read the dataset file in and construct the numpy matrix and path
    if len(sys.argv) >= 2:
        file_lines = utils.read_file_lines(sys.argv[1])
        path = file_lines[0]
        last_line = np.array(file_lines[len(file_lines) - 1].split()[1:])
        second_to_last = np.array(file_lines[len(file_lines) - 2].split()[1:])
        transition_matrix = np.column_stack(
            (second_to_last.astype(float), last_line.astype(float)))

    print(format(path_probability(path, transition_matrix), '.11e'))
コード例 #2
0
    """get_uniprot_fasta(uniprot_id) - makes a network GET request to the uniprot server using an id which returns a fasta file.  This data is decoded into a 'utf-8' string."""
    data = ''
    with urllib.request.urlopen('http://www.uniprot.org/uniprot/{}.fasta'.format(uniprot_id)) as response:
        data = response.read().decode('utf-8')
    return data


def find_locations(id):
    """find_locations(id) - fetches the fasta of a protein id, then parses the fasta and looks for the locations of the protein motifs. returns an array of indcies, could be empty"""
    fasta = utils.parse_fasta(get_uniprot_fasta(id))
    pattern = r'(?=(N[^P][ST][^P]))'
    locations = []
    for match in re.finditer(pattern, str(fasta[0])):
        locations.append(match.start(0) + 1)
    return locations


if __name__ == '__main__':
    dataset = ['A2Z669', 'B5ZC00', 'P07204_TRBM_HUMAN', 'P20840_SAG1_YEAST']
    if len(sys.argv) >= 2:
        dataset = utils.read_file_lines(sys.argv[1])

    # for each id, in the dataset, we figure out the locations from the remote fasta
    # and print out the id followed by the locations if they exist
    for access_id in dataset:
        if access_id.strip() is not '':
            locations = find_locations(access_id.strip())
            if len(locations):
                print(access_id.strip())
                print(' '.join(map(str, locations)))
コード例 #3
0
import sys
import dna.utils as utils


def suffix_array(text):
    return [
        str(j[1]) for j in sorted([((s[i:], i)) for i in range(len(text))])
    ]


if __name__ == '__main__':
    s = 'AACGATAGCGGTAGA$'
    if len(sys.argv) >= 2:
        s = utils.read_file_lines(sys.argv[1])[0].strip()

    print(', '.join(suffix_array(s)))

    with open('output/answer_21.txt', 'w') as out:
        print(', '.join(map(str, suffix_array(s))), file=out)
        print('Written to {}'.format(out.name))

    expected = utils.read_file('output/expected_suff_arr.txt')

    actual = utils.read_file('output/answer_21.txt')

    print(actual == expected)
コード例 #4
0
import sys
import dna.utils as utils


def remaining_rabbits(n, m):
    sequence = [1, 1]

    for i in range(n - 2):
        tmp = 0
        if i + 2 < m:
            tmp = sequence[i] + sequence[i + 1]
        else:
            tmp = sum(sequence[i - (m - 2):i + 1])
        sequence.append(tmp)

    return sequence[-1]


if __name__ == '__main__':
    n = 6
    m = 3
    if len(sys.argv) >= 2:
        n, m = utils.read_file_lines(sys.argv[1])[0].split()
        n = int(n)
        m = int(m)
    print(remaining_rabbits(n, m))
コード例 #5
0
        given an integer 'n', calculates all the possible permutations from
        1 to 'n' without repeating signs"""
    permutations = list(it.permutations(range(1, n + 1)))
    prefixes = list(it.product('-+', repeat=n))
    results = list(it.product(permutations, prefixes))
    enumerations = []
    for product in results:
        numbers, signs = product
        for i, number in enumerate(numbers):
            sign = signs[i]
            number = int(sign + str(number))
            enumerations.append(number)
    return enumerations


if __name__ == '__main__':
    d = 5

    if len(sys.argv) >= 2:
        d = int(utils.read_file_lines(sys.argv)[0])

    gene_orders = enumerate_oriented_gene_orders(d)
    print(int(len(gene_orders) / d))
    iterator = iter(gene_orders)
    for x in iterator:
        result = []
        result.append(x)
        for i in range(d - 1):
            result.append(next(iterator))
        print(' '.join(map(str, result)))