Beispiel #1
0
def subprocessPerLayer(layer_names, parm):

    # Spawn the subprocesses to calculate stats for each layer
    # The number of pairs to compare including compare to self
    pairCount = len(parm['statsLayers']) ** 2
    pool.hostProcessorMsg()
    print 'Starting to build', pairCount, 'layer pairs...'

    """
    # TODO easy testing without subprocesses
    for layerA in parm['statsLayers']:
        parm['layerA'] = layerA
        parm['layerIndex'] = layer_names.index(layerA)
        oneLayer = ForEachLayer(parm)
        oneLayer()
    """
    
    # Handle the stats for each layer, in parallel
    allLayers = []
    for layerA in parm['statsLayers']:
        parm['layerA'] = layerA
        parm['layerIndex'] = layer_names.index(layerA)
        allLayers.append(ForEachLayer(parm))

    print pool.hostProcessorMsg()
    print len(parm['statsLayers']), 'subprocesses to run, one per layer.'
    pool.runSubProcesses(allLayers)
def subprocessPerLayer(layer_names, parm):

    # Spawn the subprocesses to calculate stats for each layer
    # The number of pairs to compare including compare to self
    pairCount = len(parm['statsLayers'])**2
    pool.hostProcessorMsg()
    print 'Starting to build', pairCount, 'layer pairs...'
    """
    # TODO easy testing without subprocesses
    for layerA in parm['statsLayers']:
        parm['layerA'] = layerA
        parm['layerIndex'] = layer_names.index(layerA)
        oneLayer = ForEachLayer(parm)
        oneLayer()
    """

    # Handle the stats for each layer, in parallel
    allLayers = []
    for layerA in parm['statsLayers']:
        parm['layerA'] = layerA
        parm['layerIndex'] = layer_names.index(layerA)
        allLayers.append(ForEachLayer(parm))

    print pool.hostProcessorMsg()
    print len(parm['statsLayers']), 'subprocesses to run, one per layer.'
    pool.runSubProcesses(allLayers)
Beispiel #3
0
def normalized_pearson_statistics(layers, layerNames, nodes_multiple, ctx, options):

    # For every binary layer and for every layout, we need a file called
    # layer_<layer number>_<layout number>_region.tab with scores associating
    # that layer with other layers of its type. This uses a normalized pearson
    # correlation to produce a score between -1 and 1, where -1 is the most 
    # anticorrelated, 0 is no correlation and 1 is the most correlated.
    # 
    # For binary data we are using a normalized pearson correlation that
    # accounts for sample biases. We are using these logical steps to get there:
    #
    # 1. Use the extents of the node positions as the first window.
    #
    # 2. Divide the window into a 2 X 2 grid containing 4 new equal-sized windows.
    # 
    # 3. Scan across all nodes, placing them into the appropiate window. These
    #    are stored as a vector of windows, with a vector of node names in each
    #    window. For the toy example, we will show node counts rather than names.
    #             toy: C = [2, 1, 3, 5]
    # 
    # 4. Drop any windows from the vector whose node count is below the
    #    lower_threshold.
    #             toy: with a lower_threshold of 2:
    #             C1 = [2, 3, 5]
    #
    # 5. Repeat Steps 2 - 4 for each window containing more nodes than the upper
    #    threshold
    # 
    # 6. Normalize this vector of counts. For each element:
    #     a. divide by the sum of the elements.
    #             toy: C2 =  [1/5, 3/5, 1]
    #     b. multiply by the pseudocount of 5
    #             toy: C3 = [1, 3, 5]
    #
    # 7. Look at each attribute compared with each other attribute. For each
    #    pair of attributes:
    #     a. For attribute A, create a vector with counts of the number
    #        of nodes in which attribute A is 1. Create an element in A for each
    #        window in C1.
    #             toy: A = [1, 2, 2]
    # 
    #     b. Do the same for attribute B.
    #             toy: B = [0, 1, 1]
    # 
    #     c. Compare vectors A and B. For each node in a window that
    #        has a value of one for both A and B, decrement that window count
    #        in both vectors.
    #             toy: if the 2nd window has one node that is in A & B:
    #             A1 = [1, 1, 2],  B1 = [0, 0, 1]
    # 
    #     d. For each element of A1 & B2 add the corresponding element of C3.
    #             toy: A2 = [2, 4, 4],  B2 = [1, 3, 6]
    # 
    #     e. Correlate vectors A and B with the Pearson method which returns an
    #        R correlation and a P value.

    for layout in ctx.all_hexagons.iterkeys():
        # We look at all layouts for this.

        # Create the windows containing lists of node names in each window.
        # Following our naming scheme above, assign C to the curated windows
        # Note we find the windows even if we are not computing layout-aware
        # stats so we can use the windowing later for dynamic stats.
        C = window_tool(
            options.directory,
            nodes_multiple[layout],
            options.mi_window_threshold,
            options.mi_window_threshold_upper,
            layout,
        )

        # Transform the nodes lists in C to a list of node counts
        C1 = map(lambda x: len(x), C)

        # Normalize the node counts to create the windows addtives:
        # divide by the sum of the counts and multiply by the pseudocount.
        Sum = sum(C1)
        C2 = map(lambda x: float(x) * PSEUDOCOUNT / Sum, C1)

        # Write the window node counts and additives to a file for use in
        # dynamic stats initiated by the client
        filename = os.path.join(options.directory,
            'windows_' + str(layout) + '.tab')
        with open(filename, 'w') as f:
            f = csv.writer(f, delimiter='\t')
            i = 0
            for nodes in C:
                line = [C2[i]]
                for node in nodes:
                    line.append(node)
                f.writerow(line)
                i += 1

        if not options.mutualinfo:
            print 'Skipping sort stats for layout-aware'
            continue

        if ctx.binary_layers == 0:
            print 'No binary layers for layout-aware stats to process'
            continue

        # The number of pairs to compare without compare to self
        pairCount = len(ctx.binary_layers) ** 2 - len(ctx.binary_layers)
        print 'Starting to build', pairCount, 'layer pairs...'

        # Create the stats parameters
        parm = {
            'directory': options.directory,
            'layers': layers,
            'layout': str(layout),
            'statsLayers': ctx.binary_layers,
            'windowAdditives': C2,
            'windowNodes': C,
        }

        """
        # TODO easy testing without subprocesses
        for layerA in parm['statsLayers']:
            parm['layerA'] = layerA
            parm['layerIndex'] = layerNames.index(layerA)
            oneLayer = ForEachLayer(parm)
            oneLayer()
        """
        
        # Handle the stats for each layer, in parallel
        allLayers = []
        for layer in parm['statsLayers']:
            parm['layerA'] = layer
            parm['layerIndex'] = layerNames.index(layer)
            allLayers.append(ForEachLayer(parm))

        print pool.hostProcessorMsg()
        print len(ctx.binary_layers), 'subprocesses to run, one per layer.'

        pool.runSubProcesses(allLayers)

        print timestamp(), 'Stats complete for layout:', layout