Esempio n. 1
0
def bpms():
    '''
    Generates a list of happy bipartitions in parallel and then
    generates a list of BPMs in parallel.
    '''

    # This global variable is bad in principle but seems to be required
    # for parallelism to be effective with 'group_genes'. I was currying
    # 'happyparts' with group_genes, but for whatever reason, this stopped
    # multiprocessing from keeping all of the cores hot.
    global happyparts

    happyparts = parallel.pmap(localmaxcut, xrange(0, conf.M))
    return parallel.pmap(group_genes, enumerate(geneinter.genes))
Esempio n. 2
0
def bpms():
    '''
    Generates a list of happy bipartitions in parallel and then
    generates a list of BPMs in parallel.
    '''

    # This global variable is bad in principle but seems to be required
    # for parallelism to be effective with 'group_genes'. I was currying
    # 'happyparts' with group_genes, but for whatever reason, this stopped
    # multiprocessing from keeping all of the cores hot.
    global happyparts

    happyparts = parallel.pmap(localmaxcut, xrange(0, conf.M))
    return parallel.pmap(group_genes, enumerate(geneinter.genes))
Esempio n. 3
0
def prune(bpms):
    '''
    After all BPMs are generated, two different pruning mechanisms are applied.
    
    The first is pruning all BPMs that have a module less than the minimum size
    or greater than the maximum size. If either is 0, then the pruning for that
    constraint is skipped.

    The second pruning mechanism is more complex. Essentially, the interaction
    weight of each BPM is calculated (see 'interweight') and the list of BPMs
    are then sorted by that interaction weight in descending order. Starting
    from the beginning, BPMs are then added to final set of BPMs if and only if
    its Jaccard index with every BPM already in the final set is less than
    the threshold.
    '''
    if conf.min_size > 0 or conf.max_size > 0:
        bpms = filter(lambda (A, B): satisfy_min_max(A, B), bpms)

    # If pruning is disabled, exit now.
    if not conf.pruning:
        return bpms

    withI = parallel.pmap(interweight, bpms)
    withI = sorted(withI, key=lambda (iw, (A, B)): iw, reverse=True)

    pruned = []
    for iw, (A, B) in withI:
        jind = partial(jaccard_index, A.union(B))
        if all(
                map(lambda ji: ji < conf.jaccard,
                    [jind(S1.union(S2)) for S1, S2 in pruned])):
            pruned.append((A, B))

    return pruned
Esempio n. 4
0
def prune(bpms):
    '''
    After all BPMs are generated, two different pruning mechanisms are applied.
    
    The first is pruning all BPMs that have a module less than the minimum size
    or greater than the maximum size. If either is 0, then the pruning for that
    constraint is skipped.

    The second pruning mechanism is more complex. Essentially, the interaction
    weight of each BPM is calculated (see 'interweight') and the list of BPMs
    are then sorted by that interaction weight in descending order. Starting
    from the beginning, BPMs are then added to final set of BPMs if and only if
    its Jaccard index with every BPM already in the final set is less than
    the threshold.
    '''
    if conf.min_size > 0 or conf.max_size > 0:
        bpms = filter(lambda (A, B): satisfy_min_max(A, B), bpms)

    # If pruning is disabled, exit now.
    if not conf.pruning:
        return bpms

    withI = parallel.pmap(interweight, bpms)
    withI = sorted(withI, key=lambda (iw, (A, B)): iw, reverse=True)

    pruned = []
    for iw, (A, B) in withI:
        jind = partial(jaccard_index, A.union(B))
        if all(map(lambda ji: ji < conf.jaccard,
                   [jind(S1.union(S2)) for S1, S2 in pruned])):
            pruned.append((A, B))
    
    return pruned