def generate(X, Y): sentinel = (sys.maxint, sys.maxint) X = common.sentinelize(X, sentinel) Y = common.sentinelize(Y, sentinel) x = X.next() y = Y.next() if x == sentinel or y == sentinel: continue_loop = False else: continue_loop = True while continue_loop: open_window = y[0] close_window = y[1] # Extend the Y window as long as possible # while True: if y == sentinel: break y = Y.next() if y[0] > close_window: break if y[1] > close_window: close_window = y[1] # Read features from X until overshooting the Y window # while True: if x[0] >= close_window: break if x[1] > open_window: yield x x = X.next() if x == sentinel: continue_loop = False break
def generate(X, Y, l): """Inspired from: fjoin: Simple and Efficient Computation of Feature Overlap Journal of Computational Biology, 13(8), Oct. 2006, pp 1457-1464 """ # Preparation # sentinel = (sys.maxint, sys.maxint) X = common.sentinelize(X, sentinel) Y = common.sentinelize(Y, sentinel) x = X.next() y = Y.next() Wx = [] Wy = [] # Core loop stops when both x and y are at the sentinel while x[0] != sentinel or y[0] != sentinel: # Take the leftmost current feature and scan it against the other window if x[0] < y[0]: # Remove features from the y window that are left of x Wy = [f for f in Wy if f[1] > x[0]] # Yield new features with all overlaps of x in Wy for f in [f for f in Wy if f[1] > x[0] and x[1] > f[0]]: yield make_feature(x, f) # Put x in the window only if it is not left of y if x[1] >= y[0]: Wx.append(x) # Advance current x feature x = X.next() else: # Remove features from the x window that are left of y Wx = [f for f in Wx if f[1] > y[0]] # Yield new features with all overlaps of y in Wx for f in [f for f in Wx if f[1] > y[0] and y[1] > f[0]]: yield make_feature(y, f) # Put y in the window only if it is not left of x if y[1] >= x[0]: Wy.append(y) # Advance current y feature y = Y.next()
def generate(X, Y): # Sentinel # sentinel = (sys.maxint, sys.maxint, 0.0) X = common.sentinelize(X, sentinel) # Growing and shrinking list of features in X # F = [(-sys.maxint, -sys.maxint, 0.0)] # --- Core loop --- # for y in Y: # Check that we have all the scores necessary # xnext = F[-1] while xnext[0] < y[1]: xnext = X.next() if xnext[1] > y[0]: F.append(xnext) # Throw away the scores that are not needed anymore # n = 0 while F[n][1] <= y[0]: n += 1 F = F[n:] # Compute the average # score = 0.0 for f in F: if y[1] <= f[0]: continue if f[0] < y[0]: start = y[0] else: start = f[0] if y[1] < f[1]: end = y[1] else: end = f[1] score += (end - start) * f[2] # Emit a feature # yield y[0:2] + (score / (y[1] - y[0]), ) + y[3:]
def generate(n_tracks, geometric=False): # Get all iterators # sentinel = (sys.maxint, sys.maxint, 0.0) tracks = [common.sentinelize(x, sentinel) for x in n_tracks] elements = [x.next() for x in tracks] tracks_denom = 1.0 / len(tracks) # Choose meaning function # if geometric: mean_fn = lambda x: sum(x)**tracks_denom else: mean_fn = lambda x: sum(x) * tracks_denom # Check empty # for i in xrange(len(tracks) - 1, -1, -1): if elements[i] == sentinel: tracks.pop(i) elements.pop(i) # Core loop # while tracks: # Find the next boundaries # start = min([x[0] for x in elements]) end = min([x[0] for x in elements if x[0] > start] + [x[1] for x in elements]) # Scores between boundaries # scores = [x[2] for x in elements if x[1] > start and x[0] < end] if scores: yield (start, end, mean_fn(scores)) # Iterate over elements # for i in xrange(len(tracks) - 1, -1, -1): # Change all starts # if elements[i][0] < end: elements[i] = (end, elements[i][1], elements[i][2]) # Advance the elements that need to be advanced # if elements[i][1] <= end: elements[i] = tracks[i].next() # Pop sentinels # if elements[i] == sentinel: tracks.pop(i) elements.pop(i)
def generate(X, Y): # Sentinel # sentinel = (sys.maxint, sys.maxint, 0.0) X = common.sentinelize(X, sentinel) # Growing and shrinking list of features in X # F = [(-sys.maxint, -sys.maxint, 0.0)] # --- Core loop --- # for y in Y: # Check that we have all the scores necessary # xnext = F[-1] while xnext[0] < y[1]: xnext = X.next() if xnext[1] > y[0]: F.append(xnext) # Throw away the scores that are not needed anymore # n = 0 while F[n][1] <= y[0]: n+=1 F = F[n:] # Compute the average # score = 0.0 for f in F: if y[1] <= f[0]: continue if f[0] < y[0]: start = y[0] else: start = f[0] if y[1] < f[1]: end = y[1] else: end = f[1] score += (end-start) * f[2] # Emit a feature # yield y[0:2]+(score/(y[1]-y[0]),)+y[3:]
def meta_generate(n_tracks, fn, win_size): import fusion tracks = n_tracks sentinel = (sys.maxint, sys.maxint, 0.0) tracks = [fusion.generate(t) for t in tracks] tracks = [common.sentinelize(t, sentinel) for t in tracks] N = len(tracks) init = [tracks[i].next() for i in range(N)] for i in xrange(N - 1, -1, -1): if init[i] == sentinel: # empty track N -= 1 tracks.pop(i) init.pop(i) available_tracks = range(N - 1, -1, -1) activity = [False] * N current = [] current.extend([(init[i][0], i) for i in range(N)]) current.extend([(init[i][1], i) for i in range(N)]) current.sort() start = current[0][0] while current[0][0] == start: activity[current[0][1]] = True z = current.pop(0) k = 1 while available_tracks: # load *win_size* bp in memory to_remove = [] for i in available_tracks: a = [0, 0] limit = k * win_size while a[1] < limit: a = tracks[i].next() if a == sentinel: to_remove.append(i) break current.append((a[0], i)) current.append((a[1], i)) for i in to_remove: available_tracks.remove(i) current.sort() # calculate boolean values for start-next interval while current and current[0][0] < limit: next = current[0][0] res = fn(activity) if res: yield (start, next, '', 0.0, 0) while current and current[0][0] == next: i = current[0][1] activity[i] = not (activity[i]) z = current.pop(0) start = next k += 1
def meta_generate(n_tracks, fn, win_size): from gMiner.manipulate import fusion tracks = n_tracks sentinel = (sys.maxint, sys.maxint, 0.0) tracks = [fusion(t) for t in tracks] tracks = [common.sentinelize(t, sentinel) for t in tracks] N = len(tracks) init = [tracks[i].next() for i in range(N)] for i in xrange(N-1,-1,-1): if init[i] == sentinel: # empty track N-=1 tracks.pop(i) init.pop(i) available_tracks = range(N-1,-1,-1) activity = [False]*N current = [] current.extend([(init[i][0],i) for i in range(N)]) current.extend([(init[i][1],i) for i in range(N)]) current.sort() start = current[0][0] while current[0][0] == start: activity[current[0][1]] = True z = current.pop(0) k=1 while available_tracks: # load *win_size* bp in memory to_remove = [] for i in available_tracks: a = [0,0] limit = k*win_size while a[1] < limit: a = tracks[i].next() if a == sentinel: to_remove.append(i) break current.append((a[0],i)) current.append((a[1],i)) for i in to_remove: available_tracks.remove(i) current.sort() # calculate boolean values for start-next interval while current and current[0][0] < limit: next = current[0][0] res = fn(activity) if res: yield (start,next,'',0.0,0) while current and current[0][0] == next: i = current[0][1] activity[i] = not(activity[i]) z = current.pop(0) start = next k+=1
def generate(X, L, l): # Sentinel # sentinel = (sys.maxint, sys.maxint, 0.0) X = common.sentinelize(X, sentinel) # Growing and shrinking list of features around our moving window # F = [] # Current position counting on nucleotides (first nucleotide is zero) # p = -L - 2 # Position since which the mean hasn't changed # same_since = -L - 3 # The current mean and the next mean # curt_mean = 0 next_mean = 0 # Multiplication factor instead of division # f = 1.0 / (2 * L + 1) # First feature if it exists # F.append(X.next()) if F == [sentinel]: return # Core loop # while True: # Advance one # p += 1 # Window start and stop # s = p - L e = p + L + 1 # Scores entering window # if F[-1][1] < e: F.append(X.next()) if F[-1][0] < e: next_mean += F[-1][2] * f # Scores exiting window # if F[0][1] < s: F.pop(0) if F[0][0] < s: next_mean -= F[0][2] * f # Border condition on the left # if p < 0: curt_mean = 0 same_since = p continue # Border condition on the right # if p == l: if curt_mean != 0: yield (same_since, p, curt_mean) break # Maybe emit a feature # if next_mean != curt_mean: if curt_mean != 0: yield (same_since, p, curt_mean) curt_mean = next_mean same_since = p
def generate(n_tracks): # Get all iterators # sentinel = (sys.maxint, sys.maxint, 0.0) tracks = [common.sentinelize(x, sentinel) for x in n_tracks] features = range(len(tracks)) # Advance feature # def advance(i): features[i] = tracks[i].next() if features[i] == sentinel: tracks.pop(i) features.pop(i) # Find lowest feature # def get_lowest_feature(): i = min(enumerate([(f[0],f[1]) for f in features]), key=itemgetter(1))[0] return i, features[i] # Core loop # for i in xrange(len(tracks)-1, -1, -1): advance(i) while tracks: i,f = get_lowest_feature() yield f advance(i)
def generate(n_tracks, geometric=False): # Get all iterators # sentinel = (sys.maxint, sys.maxint, 0.0) tracks = [common.sentinelize(x, sentinel) for x in n_tracks] elements = [x.next() for x in tracks] tracks_denom = 1.0 / len(tracks) # Choose meaning function # if geometric: mean_fn = lambda x: sum(x) ** tracks_denom else: mean_fn = lambda x: sum(x) * tracks_denom # Check empty # for i in xrange(len(tracks) - 1, -1, -1): if elements[i] == sentinel: tracks.pop(i) elements.pop(i) # Core loop # while tracks: # Find the next boundaries # start = min([x[0] for x in elements]) end = min([x[0] for x in elements if x[0] > start] + [x[1] for x in elements]) # Scores between boundaries # scores = [x[2] for x in elements if x[1] > start and x[0] < end] if scores: yield (start, end, mean_fn(scores)) # Iterate over elements # for i in xrange(len(tracks) - 1, -1, -1): # Change all starts # if elements[i][0] < end: elements[i] = (end, elements[i][1], elements[i][2]) # Advance the elements that need to be advanced # if elements[i][1] <= end: elements[i] = tracks[i].next() # Pop sentinels # if elements[i] == sentinel: tracks.pop(i) elements.pop(i)