Esempio n. 1
0
def group_genes((i, g1)):
    '''
    group_genes is applied to every gene, and a BPM is generated from *every*
    gene. In particular, given M happy bipartitions, generate a BPM where
    the first module contains all genes that appeared in the same set in the M
    bipartitions C% of the time and the second module contains all genes
    that appeared in the opposite set in the M bipartitions C% of the time.
    '''
    mod1, mod2 = [], []

    for g2 in geneinter.genes:
        # Count the number of times g2 is in the same set as g2
        freqsame = sum([
            1 for A, B in happyparts
            if (g1 in A and g2 in A) or (g1 in B and g2 in B)
        ])

        ratio = float(freqsame) / conf.M
        if ratio >= conf.C:
            mod1.append(g2)
        elif (1 - ratio) >= conf.C:
            mod2.append(g2)

    parallel.inc_counter()
    parallel.print_progress()

    return set(mod1), set(mod2)
Esempio n. 2
0
def group_genes((i, g1)):
    '''
    group_genes is applied to every gene, and a BPM is generated from *every*
    gene. In particular, given M happy bipartitions, generate a BPM where
    the first module contains all genes that appeared in the same set in the M
    bipartitions C% of the time and the second module contains all genes
    that appeared in the opposite set in the M bipartitions C% of the time.
    '''
    mod1, mod2 = [], []

    for g2 in geneinter.genes:
        # Count the number of times g2 is in the same set as g2
        freqsame = sum([1 for A, B in happyparts
                          if (g1 in A and g2 in A) or (g1 in B and g2 in B)])

        ratio = float(freqsame) / conf.M
        if ratio >= conf.C:
            mod1.append(g2)
        elif (1 - ratio) >= conf.C:
            mod2.append(g2)

    parallel.inc_counter()
    parallel.print_progress()

    return set(mod1), set(mod2)
Esempio n. 3
0
def localmaxcut(m):
    '''
    Generates a random bipartition and makes the bipartition 'happy' by
    applying 'Weighted-Flip' (from Leiserson et al., 2011) until there are no
    unhappy genes left.
    '''
    A, B = random_bipartition()

    same_set = lambda g1, g2: (g1 in A and g2 in A) or (g1 in B and g2 in B)

    def weights(g1):
        '''
        Calculates the total neighboring weight of 'g1'. The total
        neighboring weight is a tuple of the sum of interactions in the same
        set as g1 and the sum of interactions in the opposite set as g1.

        The tuple in this case is represented by a dictionary with keys
        'same' and 'other'. I'm using a dictionary because the values need
        to be mutable; they change as we move vertices between the partitions.
        '''
        ws = {'same': 0, 'other': 0}
        for g2 in geneinter.genes:
            w = geneinter.gi(g1, g2)
            if same_set(g1, g2):
                ws['same'] += w
            else:
                ws['other'] += w
        return ws

    nweights = {g: weights(g) for g in geneinter.genes}
    unhappy = get_unhappy(nweights)

    while unhappy:
        v = random.choice(unhappy)

        if v in A:
            A.remove(v)
            B.add(v)
        else:
            A.add(v)
            B.remove(v)

        # This loop eliminates the need to recalculate 'weights' for every
        # gene again, which is O(n^2) in the number of genes. This loop is
        # O(n) but comes at the cost of clarity.
        #
        # The idea is to modify the weights of every other interacting gene and
        # to switch the 'same' and 'other' scores of the gene that was made
        # happy.
        for g, nw in nweights.iteritems():
            if g == v:
                nw['same'], nw['other'] = nw['other'], nw['same']
                continue

            # The interaction score between this gene and the gene that
            # was made happy.
            w = geneinter.gi(v, g)

            # If the two genes are now in the same set, then 'g' gets a boost
            # to its happiness. Otherwise, 'g' becomes more unhappy.
            if same_set(v, g):
                nw['same'] += w
                nw['other'] -= w
            else:
                nw['same'] -= w
                nw['other'] += w

        # Refresh the unhappy list
        unhappy = get_unhappy(nweights)

    parallel.inc_counter()
    parallel.print_progress()

    return A, B
Esempio n. 4
0
def enrich(modulecnt, (bpmi, modi, genes)):
    '''
    Initiates a request to Funcassociate and returns a dictionary of goterms.

    :param modulecnt: The total number of modules in the BPM file.
    :param bpmi, modi, genes: A tuple representing a module. 'bpmi' is the
                                BPM index number, 'modi' is the module index
                                number, and 'genes' is a list of gene names
                                in the module.
    :return: A four-tuple of the input module and its associated go terms.
    '''
    goterms = faread.functionate(genes, min(10000, max(1000, modulecnt)))

    parallel.inc_counter()
    parallel.print_progress()

    return bpmi, modi, genes, goterms


def sortgo(goterms):
    '''
    Sorts the keys of a goterms dictionary according to the current
    configuration.
    '''
    if conf is None:
        reverse = False
        sort_by = 'p'
    else:
        reverse = conf.order_go == 'desc'
        sort_by = conf.sort_go_by
Esempio n. 5
0
def enrich(modulecnt, (bpmi, modi, genes)):
    '''
    Initiates a request to Funcassociate and returns a dictionary of goterms.

    :param modulecnt: The total number of modules in the BPM file.
    :param bpmi, modi, genes: A tuple representing a module. 'bpmi' is the
                                BPM index number, 'modi' is the module index
                                number, and 'genes' is a list of gene names
                                in the module.
    :return: A four-tuple of the input module and its associated go terms.
    '''
    goterms = faread.functionate(genes, min(10000, max(1000, modulecnt)))

    parallel.inc_counter()
    parallel.print_progress()

    return bpmi, modi, genes, goterms

def sortgo(goterms):
    '''
    Sorts the keys of a goterms dictionary according to the current
    configuration.
    '''
    if conf is None:
        reverse = False
        sort_by = 'p'
    else:
        reverse = conf.order_go == 'desc'
        sort_by = conf.sort_go_by
Esempio n. 6
0
def localmaxcut(m):
    '''
    Generates a random bipartition and makes the bipartition 'happy' by
    applying 'Weighted-Flip' (from Leiserson et al., 2011) until there are no
    unhappy genes left.
    '''
    A, B = random_bipartition()

    same_set = lambda g1, g2: (g1 in A and g2 in A) or (g1 in B and g2 in B)
    def weights(g1):
        '''
        Calculates the total neighboring weight of 'g1'. The total
        neighboring weight is a tuple of the sum of interactions in the same
        set as g1 and the sum of interactions in the opposite set as g1.

        The tuple in this case is represented by a dictionary with keys
        'same' and 'other'. I'm using a dictionary because the values need
        to be mutable; they change as we move vertices between the partitions.
        '''
        ws = { 'same': 0, 'other': 0 }
        for g2 in geneinter.genes:
            w = geneinter.gi(g1, g2)
            if same_set(g1, g2):
                ws['same'] += w
            else:
                ws['other'] += w
        return ws

    nweights = { g: weights(g) for g in geneinter.genes }
    unhappy = get_unhappy(nweights)

    while unhappy:
        v = random.choice(unhappy)

        if v in A:
            A.remove(v)
            B.add(v)
        else:
            A.add(v)
            B.remove(v)

        # This loop eliminates the need to recalculate 'weights' for every
        # gene again, which is O(n^2) in the number of genes. This loop is
        # O(n) but comes at the cost of clarity.
        #
        # The idea is to modify the weights of every other interacting gene and
        # to switch the 'same' and 'other' scores of the gene that was made
        # happy.
        for g, nw in nweights.iteritems():
            if g == v:
                nw['same'], nw['other'] = nw['other'], nw['same']
                continue

            # The interaction score between this gene and the gene that
            # was made happy.
            w = geneinter.gi(v, g) 

            # If the two genes are now in the same set, then 'g' gets a boost
            # to its happiness. Otherwise, 'g' becomes more unhappy.
            if same_set(v, g):
                nw['same'] += w
                nw['other'] -= w
            else:
                nw['same'] -= w
                nw['other'] += w

        # Refresh the unhappy list
        unhappy = get_unhappy(nweights)

    parallel.inc_counter()
    parallel.print_progress()

    return A, B