Ejemplo n.º 1
0
def solution(dataset: list) -> str:
    fasta = read_fasta(lines=dataset)
    Seq1, Seq2 = fasta[0], fasta[1]
    w, h = len(Seq1), len(Seq2)
    Matrix = [[0 for x in range(w + 1)] for y in range(h + 1)]

    for i, ibase in enumerate(Seq1, 1):
        for j, jbase in enumerate(Seq2, 1):
            if ibase == jbase:
                Matrix[j][i] = Matrix[j - 1][i - 1] + 1
            else:
                Matrix[j][i] = max(Matrix[j - 1][i], Matrix[j][i - 1])

    # remove zeros
    Matrix = [M[1:] for M in Matrix[1:]]

    # print(" ", " ".join([nt for nt in Seq1]))
    # for i, b in enumerate(Seq2):
    #     print(b, " ".join([str(s) for s in Matrix[i]]))

    # len(LCS) == Matrix[len(Seq2) - 1][len(Seq1) - 1]

    i, j = len(Seq1) - 1, len(Seq2) - 1
    LCS = []
    while i > -1 and j > -1:
        if Seq1[i] == Seq2[j]:
            LCS.append(Seq1[i])
            j -= 1
            i -= 1
        elif Matrix[j][i - 1] == Matrix[j][i]:
            i -= 1
        elif Matrix[j - 1][i] == Matrix[j][i]:
            j -= 1

    return "".join(LCS)[::-1]
Ejemplo n.º 2
0
def slow_solution(dataset: list) -> str:
    sequences = read_fasta(lines=dataset)
    k = 3
    graph = []
    for s in sequences:  # O(n**2)
        for t in sequences:
            if s == t:
                # stop cycling
                continue
            if s[-k:] == t[:k]:
                graph.append((s.id, t.id))
    return "\n".join([f"{edge[0]} {edge[1]}" for edge in graph])
Ejemplo n.º 3
0
def solution(dataset: list) -> str:
    sequences = read_fasta(lines=dataset)
    nucleotides = ["A", "C", "G", "T"]
    all_counts = []
    consensus = ""
    for nts in zip(*sequences):
        nt_counts = Counter(nts)
        consensus += nt_counts.most_common(1)[0][0]
        all_counts.append([str(nt_counts[nt]) for nt in nucleotides])
    profile_matrix = "\n".join([
        f"{nt}: {' '.join(counts)}"
        for nt, counts in zip(nucleotides, zip(*all_counts))
    ])
    return f"{consensus}\n{profile_matrix}"
Ejemplo n.º 4
0
def solution(dataset: list) -> str:
    sequences = read_fasta(lines=dataset)
    k = 3
    edges = []
    nodes = []
    for s in sequences:  # O(n * ( n - 1 ) / 2) == O(n**2), but runs twice as fast
        # check what other nodes in the graph s connects to
        for t in nodes:
            if s.endswith(t[:k]):
                edges.append((s.id, t.id))
            if t.endswith(s[:k]):
                edges.append((t.id, s.id))
        # add node to graph
        nodes.append(s)
    return "\n".join([f"{edge[0]} {edge[1]}" for edge in edges])
Ejemplo n.º 5
0
def solution(dataset: list) -> str:
    sequences = read_fasta(lines=dataset)
    DNA = sequences[0]
    subsequence = sequences[1]

    indices = []
    offest = 0
    for base in subsequence:
        loc = DNA[offest:].find_one(base)
        offest = loc + offest + 1
        indices.append(str(offest))

    # sanity check
    for i, idx in enumerate(indices):
        try:
            assert subsequence[i] == DNA[int(idx) - 1]
        except:
            print(f"{subsequence[i]} != {DNA[int(idx)-1]} at {idx}")

    return " ".join(indices)