def bpms(): ''' Generates a list of happy bipartitions in parallel and then generates a list of BPMs in parallel. ''' # This global variable is bad in principle but seems to be required # for parallelism to be effective with 'group_genes'. I was currying # 'happyparts' with group_genes, but for whatever reason, this stopped # multiprocessing from keeping all of the cores hot. global happyparts happyparts = parallel.pmap(localmaxcut, xrange(0, conf.M)) return parallel.pmap(group_genes, enumerate(geneinter.genes))
def prune(bpms): ''' After all BPMs are generated, two different pruning mechanisms are applied. The first is pruning all BPMs that have a module less than the minimum size or greater than the maximum size. If either is 0, then the pruning for that constraint is skipped. The second pruning mechanism is more complex. Essentially, the interaction weight of each BPM is calculated (see 'interweight') and the list of BPMs are then sorted by that interaction weight in descending order. Starting from the beginning, BPMs are then added to final set of BPMs if and only if its Jaccard index with every BPM already in the final set is less than the threshold. ''' if conf.min_size > 0 or conf.max_size > 0: bpms = filter(lambda (A, B): satisfy_min_max(A, B), bpms) # If pruning is disabled, exit now. if not conf.pruning: return bpms withI = parallel.pmap(interweight, bpms) withI = sorted(withI, key=lambda (iw, (A, B)): iw, reverse=True) pruned = [] for iw, (A, B) in withI: jind = partial(jaccard_index, A.union(B)) if all( map(lambda ji: ji < conf.jaccard, [jind(S1.union(S2)) for S1, S2 in pruned])): pruned.append((A, B)) return pruned
def prune(bpms): ''' After all BPMs are generated, two different pruning mechanisms are applied. The first is pruning all BPMs that have a module less than the minimum size or greater than the maximum size. If either is 0, then the pruning for that constraint is skipped. The second pruning mechanism is more complex. Essentially, the interaction weight of each BPM is calculated (see 'interweight') and the list of BPMs are then sorted by that interaction weight in descending order. Starting from the beginning, BPMs are then added to final set of BPMs if and only if its Jaccard index with every BPM already in the final set is less than the threshold. ''' if conf.min_size > 0 or conf.max_size > 0: bpms = filter(lambda (A, B): satisfy_min_max(A, B), bpms) # If pruning is disabled, exit now. if not conf.pruning: return bpms withI = parallel.pmap(interweight, bpms) withI = sorted(withI, key=lambda (iw, (A, B)): iw, reverse=True) pruned = [] for iw, (A, B) in withI: jind = partial(jaccard_index, A.union(B)) if all(map(lambda ji: ji < conf.jaccard, [jind(S1.union(S2)) for S1, S2 in pruned])): pruned.append((A, B)) return pruned