Beispiel #1
0
def find_synteny_region(query, sbed, data, window, cutoff, colinear=False):
    """
    Get all synteny blocks for a query, algorithm is single linkage
    anchors are a window centered on query

    Two categories of syntenic regions depending on what query is:
    (Syntelog): syntenic region is denoted by the syntelog
    (Gray gene): syntenic region is marked by the closest flanker
    """
    regions = []
    ysorted = sorted(data, key=lambda x: x[1])
    g = Grouper()

    a, b = tee(ysorted)
    next(b, None)
    for ia, ib in zip(a, b):
        pos1, pos2 = ia[1], ib[1]
        if pos2 - pos1 < window and sbed[pos1].seqid == sbed[pos2].seqid:
            g.join(ia, ib)

    for group in sorted(g):
        (qflanker, syntelog), (far_flanker, far_syntelog), flanked = \
            get_flanker(group, query)

        # run a mini-dagchainer here, take the direction that gives us most anchors
        if colinear:
            y_indexed_group = [(y, i) for i, (x, y) in enumerate(group)]
            lis = longest_increasing_subsequence(y_indexed_group)
            lds = longest_decreasing_subsequence(y_indexed_group)

            if len(lis) >= len(lds):
                track = lis
                orientation = "+"
            else:
                track = lds
                orientation = "-"

            group = [group[i] for (y, i) in track]

        xpos, ypos = zip(*group)
        score = min(len(set(xpos)), len(set(ypos)))

        if qflanker == query:
            gray = "S"
        else:
            gray = "G" if not flanked else "F"
            score -= 1  # slight penalty for not finding syntelog

        if score < cutoff:
            continue

        # y-boundary of the block
        left, right = group[0][1], group[-1][1]
        # this characterizes a syntenic region (left, right).
        # syntelog is -1 if it's a gray gene
        syn_region = (syntelog, far_syntelog, left, right, gray, orientation,
                      score)
        regions.append(syn_region)

    return sorted(regions, key=lambda x: -x[-1])  # decreasing synteny score
Beispiel #2
0
def find_synteny_region(query, sbed, data, window, cutoff, colinear=False):
    """
    Get all synteny blocks for a query, algorithm is single linkage
    anchors are a window centered on query

    Two categories of syntenic regions depending on what query is:
    (Syntelog): syntenic region is denoted by the syntelog
    (Gray gene): syntenic region is marked by the closest flanker
    """
    regions = []
    ysorted = sorted(data, key=lambda x: x[1])
    g = Grouper()

    a, b = tee(ysorted)
    next(b, None)
    for ia, ib in izip(a, b):
        pos1, pos2 = ia[1], ib[1]
        if pos2 - pos1 < window and sbed[pos1].seqid == sbed[pos2].seqid:
            g.join(ia, ib)

    for group in sorted(g):
        (qflanker, syntelog), (far_flanker, far_syntelog), flanked = \
            get_flanker(group, query)

        # run a mini-dagchainer here, take the direction that gives us most anchors
        if colinear:
            y_indexed_group = [(y, i) for i, (x, y) in enumerate(group)]
            lis = longest_increasing_subsequence(y_indexed_group)
            lds = longest_decreasing_subsequence(y_indexed_group)

            if len(lis) >= len(lds):
                track = lis
                orientation = "+"
            else:
                track = lds
                orientation = "-"

            group = [group[i] for (y, i) in track]

        xpos, ypos = zip(*group)
        score = min(len(set(xpos)), len(set(ypos)))

        if qflanker == query:
            gray = "S"
        else:
            gray = "G" if not flanked else "F"
            score -= 1  # slight penalty for not finding syntelog

        if score < cutoff:
            continue

        # y-boundary of the block
        left, right = group[0][1], group[-1][1]
        # this characterizes a syntenic region (left, right).
        # syntelog is -1 if it's a gray gene
        syn_region = (syntelog, far_syntelog, left,
                      right, gray, orientation, score)
        regions.append(syn_region)

    return sorted(regions, key=lambda x: -x[-1])  # decreasing synteny score
Beispiel #3
0
def main(arg):
    fp = open(arg)
    N = int(fp.readline().strip())
    a = [int(x) for x in fp.readline().split()]
    assert N == len(a)

    lis = longest_increasing_subsequence(a)
    lds = longest_decreasing_subsequence(a)
    print " ".join(str(x) for x in lis)
    print " ".join(str(x) for x in lds)
Beispiel #4
0
def test_longest_decreasing_subsequence(input_array, expected):
    from jcvi.algorithms.lis import longest_decreasing_subsequence

    assert longest_decreasing_subsequence(input_array) == expected