Beispiel #1
0
def generate(X, Y):
    sentinel = (sys.maxint, sys.maxint)
    X = common.sentinelize(X, sentinel)
    Y = common.sentinelize(Y, sentinel)
    x = X.next()
    y = Y.next()
    if x == sentinel or y == sentinel: continue_loop = False
    else:                              continue_loop = True
    while continue_loop:
        open_window  = y[0]
        close_window = y[1]
        # Extend the Y window as long as possible #
        while True:
            if y == sentinel: break
            y = Y.next()
            if y[0] > close_window: break
            if y[1] > close_window: close_window = y[1]
        # Read features from X until overshooting the Y window #
        while True:
            if x[0] >= close_window: break
            if x[1] >  open_window:  yield x
            x = X.next()
            if x == sentinel:
                continue_loop = False
                break
Beispiel #2
0
def generate(X, Y, l):
    """Inspired from:
    fjoin: Simple and Efficient Computation of Feature Overlap
    Journal of Computational Biology, 13(8), Oct. 2006, pp 1457-1464
    """
    # Preparation #
    sentinel = (sys.maxint, sys.maxint)
    X = common.sentinelize(X, sentinel)
    Y = common.sentinelize(Y, sentinel)
    x = X.next()
    y = Y.next()
    Wx = []
    Wy = []
    # Core loop stops when both x and y are at the sentinel
    while x[0] != sentinel or y[0] != sentinel:
        # Take the leftmost current feature and scan it against the other window
        if x[0] < y[0]:
            # Remove features from the y window that are left of x
            Wy = [f for f in Wy if f[1] > x[0]]
            # Yield new features with all overlaps of x in Wy
            for f in [f for f in Wy if f[1] > x[0] and x[1] > f[0]]: yield make_feature(x, f)
            # Put x in the window only if it is not left of y
            if x[1] >= y[0]: Wx.append(x)
            # Advance current x feature
            x = X.next()
        else:
            # Remove features from the x window that are left of y
            Wx = [f for f in Wx if f[1] > y[0]]
            # Yield new features with all overlaps of y in Wx
            for f in [f for f in Wx if f[1] > y[0] and y[1] > f[0]]: yield make_feature(y, f)
            # Put y in the window only if it is not left of x
            if y[1] >= x[0]: Wy.append(y)
            # Advance current y feature
            y = Y.next()
def generate(X, Y):
    # Sentinel #
    sentinel = (sys.maxint, sys.maxint, 0.0)
    X = common.sentinelize(X, sentinel)
    # Growing and shrinking list of features in X #
    F = [(-sys.maxint, -sys.maxint, 0.0)]
    # --- Core loop --- #
    for y in Y:
        # Check that we have all the scores necessary #
        xnext = F[-1]
        while xnext[0] < y[1]:
            xnext = X.next()
            if xnext[1] > y[0]: F.append(xnext)
        # Throw away the scores that are not needed anymore #
        n = 0
        while F[n][1] <= y[0]:
            n += 1
        F = F[n:]
        # Compute the average #
        score = 0.0
        for f in F:
            if y[1] <= f[0]: continue
            if f[0] < y[0]: start = y[0]
            else: start = f[0]
            if y[1] < f[1]: end = y[1]
            else: end = f[1]
            score += (end - start) * f[2]
        # Emit a feature #
        yield y[0:2] + (score / (y[1] - y[0]), ) + y[3:]
Beispiel #4
0
def generate(n_tracks, geometric=False):
    # Get all iterators #
    sentinel = (sys.maxint, sys.maxint, 0.0)
    tracks = [common.sentinelize(x, sentinel) for x in n_tracks]
    elements = [x.next() for x in tracks]
    tracks_denom = 1.0 / len(tracks)
    # Choose meaning function #
    if geometric: mean_fn = lambda x: sum(x)**tracks_denom
    else: mean_fn = lambda x: sum(x) * tracks_denom
    # Check empty #
    for i in xrange(len(tracks) - 1, -1, -1):
        if elements[i] == sentinel:
            tracks.pop(i)
            elements.pop(i)
    # Core loop #
    while tracks:
        # Find the next boundaries #
        start = min([x[0] for x in elements])
        end = min([x[0] for x in elements if x[0] > start] +
                  [x[1] for x in elements])
        # Scores between boundaries #
        scores = [x[2] for x in elements if x[1] > start and x[0] < end]
        if scores: yield (start, end, mean_fn(scores))
        # Iterate over elements #
        for i in xrange(len(tracks) - 1, -1, -1):
            # Change all starts #
            if elements[i][0] < end:
                elements[i] = (end, elements[i][1], elements[i][2])
            # Advance the elements that need to be advanced #
            if elements[i][1] <= end:
                elements[i] = tracks[i].next()
            # Pop sentinels #
            if elements[i] == sentinel:
                tracks.pop(i)
                elements.pop(i)
Beispiel #5
0
def generate(X, Y):
    # Sentinel #
    sentinel = (sys.maxint, sys.maxint, 0.0)
    X = common.sentinelize(X, sentinel)
    # Growing and shrinking list of features in X #
    F = [(-sys.maxint, -sys.maxint, 0.0)]
    # --- Core loop --- #
    for y in Y:
        # Check that we have all the scores necessary #
        xnext = F[-1]
        while xnext[0] < y[1]:
            xnext = X.next()
            if xnext[1] > y[0]: F.append(xnext)
        # Throw away the scores that are not needed anymore #
        n = 0
        while F[n][1] <= y[0]:
            n+=1
        F = F[n:]
        # Compute the average #
        score = 0.0
        for f in F:
            if y[1] <= f[0]: continue
            if f[0] <  y[0]: start = y[0]
            else:            start = f[0]
            if y[1] <  f[1]: end   = y[1]
            else:            end   = f[1]
            score += (end-start) * f[2]
        # Emit a feature #
        yield y[0:2]+(score/(y[1]-y[0]),)+y[3:]
Beispiel #6
0
def meta_generate(n_tracks, fn, win_size):
    import fusion
    tracks = n_tracks
    sentinel = (sys.maxint, sys.maxint, 0.0)
    tracks = [fusion.generate(t) for t in tracks]
    tracks = [common.sentinelize(t, sentinel) for t in tracks]

    N = len(tracks)
    init = [tracks[i].next() for i in range(N)]
    for i in xrange(N - 1, -1, -1):
        if init[i] == sentinel:  # empty track
            N -= 1
            tracks.pop(i)
            init.pop(i)
    available_tracks = range(N - 1, -1, -1)
    activity = [False] * N
    current = []
    current.extend([(init[i][0], i) for i in range(N)])
    current.extend([(init[i][1], i) for i in range(N)])
    current.sort()

    start = current[0][0]
    while current[0][0] == start:
        activity[current[0][1]] = True
        z = current.pop(0)

    k = 1
    while available_tracks:
        # load *win_size* bp in memory
        to_remove = []
        for i in available_tracks:
            a = [0, 0]
            limit = k * win_size
            while a[1] < limit:
                a = tracks[i].next()
                if a == sentinel:
                    to_remove.append(i)
                    break
                current.append((a[0], i))
                current.append((a[1], i))
        for i in to_remove:
            available_tracks.remove(i)
        current.sort()

        # calculate boolean values for start-next interval
        while current and current[0][0] < limit:
            next = current[0][0]
            res = fn(activity)
            if res:
                yield (start, next, '', 0.0, 0)
            while current and current[0][0] == next:
                i = current[0][1]
                activity[i] = not (activity[i])
                z = current.pop(0)
            start = next
        k += 1
Beispiel #7
0
def meta_generate(n_tracks, fn, win_size):
    from gMiner.manipulate import fusion
    tracks = n_tracks
    sentinel = (sys.maxint, sys.maxint, 0.0)
    tracks = [fusion(t) for t in tracks]
    tracks = [common.sentinelize(t, sentinel) for t in tracks]

    N = len(tracks)
    init = [tracks[i].next() for i in range(N)]
    for i in xrange(N-1,-1,-1):
        if init[i] == sentinel: # empty track
            N-=1
            tracks.pop(i)
            init.pop(i)
    available_tracks = range(N-1,-1,-1)
    activity = [False]*N
    current = []
    current.extend([(init[i][0],i) for i in range(N)])
    current.extend([(init[i][1],i) for i in range(N)])
    current.sort()

    start = current[0][0]
    while current[0][0] == start:
        activity[current[0][1]] = True
        z = current.pop(0)

    k=1
    while available_tracks:
        # load *win_size* bp in memory
        to_remove = []
        for i in available_tracks:
            a = [0,0]
            limit = k*win_size
            while a[1] < limit:
                a = tracks[i].next()
                if a == sentinel:
                    to_remove.append(i)
                    break
                current.append((a[0],i))
                current.append((a[1],i))
        for i in to_remove:
            available_tracks.remove(i)
        current.sort()

        # calculate boolean values for start-next interval
        while current and current[0][0] < limit:
            next = current[0][0]
            res = fn(activity)
            if res:
                yield (start,next,'',0.0,0)
            while current and current[0][0] == next:
                i = current[0][1]
                activity[i] = not(activity[i])
                z = current.pop(0)
            start = next
        k+=1
Beispiel #8
0
def generate(X, L, l):
    # Sentinel #
    sentinel = (sys.maxint, sys.maxint, 0.0)
    X = common.sentinelize(X, sentinel)
    # Growing and shrinking list of features around our moving window #
    F = []
    # Current position counting on nucleotides (first nucleotide is zero) #
    p = -L - 2
    # Position since which the mean hasn't changed #
    same_since = -L - 3
    # The current mean and the next mean #
    curt_mean = 0
    next_mean = 0
    # Multiplication factor instead of division #
    f = 1.0 / (2 * L + 1)
    # First feature if it exists #
    F.append(X.next())
    if F == [sentinel]:
        return
    # Core loop #
    while True:
        # Advance one #
        p += 1
        # Window start and stop #
        s = p - L
        e = p + L + 1
        # Scores entering window #
        if F[-1][1] < e:
            F.append(X.next())
        if F[-1][0] < e:
            next_mean += F[-1][2] * f
        # Scores exiting window #
        if F[0][1] < s:
            F.pop(0)
        if F[0][0] < s:
            next_mean -= F[0][2] * f
        # Border condition on the left #
        if p < 0:
            curt_mean = 0
            same_since = p
            continue
        # Border condition on the right #
        if p == l:
            if curt_mean != 0:
                yield (same_since, p, curt_mean)
            break
        # Maybe emit a feature #
        if next_mean != curt_mean:
            if curt_mean != 0:
                yield (same_since, p, curt_mean)
            curt_mean = next_mean
            same_since = p
Beispiel #9
0
def generate(X, Y, l):
    """Inspired from:
    fjoin: Simple and Efficient Computation of Feature Overlap
    Journal of Computational Biology, 13(8), Oct. 2006, pp 1457-1464
    """
    # Preparation #
    sentinel = (sys.maxint, sys.maxint)
    X = common.sentinelize(X, sentinel)
    Y = common.sentinelize(Y, sentinel)
    x = X.next()
    y = Y.next()
    Wx = []
    Wy = []
    # Core loop stops when both x and y are at the sentinel
    while x[0] != sentinel or y[0] != sentinel:
        # Take the leftmost current feature and scan it against the other window
        if x[0] < y[0]:
            # Remove features from the y window that are left of x
            Wy = [f for f in Wy if f[1] > x[0]]
            # Yield new features with all overlaps of x in Wy
            for f in [f for f in Wy if f[1] > x[0] and x[1] > f[0]]:
                yield make_feature(x, f)
            # Put x in the window only if it is not left of y
            if x[1] >= y[0]: Wx.append(x)
            # Advance current x feature
            x = X.next()
        else:
            # Remove features from the x window that are left of y
            Wx = [f for f in Wx if f[1] > y[0]]
            # Yield new features with all overlaps of y in Wx
            for f in [f for f in Wx if f[1] > y[0] and y[1] > f[0]]:
                yield make_feature(y, f)
            # Put y in the window only if it is not left of x
            if y[1] >= x[0]: Wy.append(y)
            # Advance current y feature
            y = Y.next()
Beispiel #10
0
def generate(X, L, l):
    # Sentinel #
    sentinel = (sys.maxint, sys.maxint, 0.0)
    X = common.sentinelize(X, sentinel)
    # Growing and shrinking list of features around our moving window #
    F = []
    # Current position counting on nucleotides (first nucleotide is zero) #
    p = -L - 2
    # Position since which the mean hasn't changed #
    same_since = -L - 3
    # The current mean and the next mean #
    curt_mean = 0
    next_mean = 0
    # Multiplication factor instead of division #
    f = 1.0 / (2 * L + 1)
    # First feature if it exists #
    F.append(X.next())
    if F == [sentinel]: return
    # Core loop #
    while True:
        # Advance one #
        p += 1
        # Window start and stop #
        s = p - L
        e = p + L + 1
        # Scores entering window #
        if F[-1][1] < e: F.append(X.next())
        if F[-1][0] < e: next_mean += F[-1][2] * f
        # Scores exiting window #
        if F[0][1] < s: F.pop(0)
        if F[0][0] < s: next_mean -= F[0][2] * f
        # Border condition on the left #
        if p < 0:
            curt_mean = 0
            same_since = p
            continue
        # Border condition on the right #
        if p == l:
            if curt_mean != 0: yield (same_since, p, curt_mean)
            break
        # Maybe emit a feature #
        if next_mean != curt_mean:
            if curt_mean != 0: yield (same_since, p, curt_mean)
            curt_mean = next_mean
            same_since = p
Beispiel #11
0
def generate(n_tracks):
    # Get all iterators #
    sentinel = (sys.maxint, sys.maxint, 0.0)
    tracks   = [common.sentinelize(x, sentinel) for x in n_tracks]
    features = range(len(tracks))
    # Advance feature #
    def advance(i):
        features[i] = tracks[i].next()
        if features[i] == sentinel:
            tracks.pop(i)
            features.pop(i)
    # Find lowest feature #
    def get_lowest_feature():
        i = min(enumerate([(f[0],f[1]) for f in features]), key=itemgetter(1))[0]
        return i, features[i]
    # Core loop #
    for i in xrange(len(tracks)-1, -1, -1): advance(i)
    while tracks:
        i,f = get_lowest_feature()
        yield f
        advance(i)
Beispiel #12
0
def generate(n_tracks, geometric=False):
    # Get all iterators #
    sentinel = (sys.maxint, sys.maxint, 0.0)
    tracks = [common.sentinelize(x, sentinel) for x in n_tracks]
    elements = [x.next() for x in tracks]
    tracks_denom = 1.0 / len(tracks)
    # Choose meaning function #
    if geometric:
        mean_fn = lambda x: sum(x) ** tracks_denom
    else:
        mean_fn = lambda x: sum(x) * tracks_denom
    # Check empty #
    for i in xrange(len(tracks) - 1, -1, -1):
        if elements[i] == sentinel:
            tracks.pop(i)
            elements.pop(i)
    # Core loop #
    while tracks:
        # Find the next boundaries #
        start = min([x[0] for x in elements])
        end = min([x[0] for x in elements if x[0] > start] + [x[1] for x in elements])
        # Scores between boundaries #
        scores = [x[2] for x in elements if x[1] > start and x[0] < end]
        if scores:
            yield (start, end, mean_fn(scores))
        # Iterate over elements #
        for i in xrange(len(tracks) - 1, -1, -1):
            # Change all starts #
            if elements[i][0] < end:
                elements[i] = (end, elements[i][1], elements[i][2])
            # Advance the elements that need to be advanced #
            if elements[i][1] <= end:
                elements[i] = tracks[i].next()
            # Pop sentinels #
            if elements[i] == sentinel:
                tracks.pop(i)
                elements.pop(i)