def make_pass(stressor):

    #print "Parsing data for '%s'...\n" % stressor

    experiments = [ DataSet.load(input % stressor) for input in inputs ]
    reference = experiments[0]

    for data in experiments:

        # Find the ratio between the amount of red and green fluorescence that was
        # detected.  This ratio is assumed to be one for most data analysis
        # purposes, so the raw data needs to be corrected.

        green, red = 0, 0
        for feature in data:
            red += feature.signal.red.intensity
            green += feature.signal.green.intensity

        data.intensity_ratio = red / green
        data.log_ratio = math.log(red / green, 2)

        def correction(feature):
            feature.log_ratio -= data.log_ratio
            return feature

        data.apply(correction)

    for data in experiments:

        for feature, zero in zip(data, reference):
            feature.normed_ratio = feature.log_ratio - zero.log_ratio

    for data in experiments:

        def irrational(feature):
            return math.isnan(feature.normed_ratio)

        def noisy(feature):
            return (feature.signal.red.signal_to_noise < 1) or     \
                   (feature.signal.green.signal_to_noise < 1)

        def unnamed(feature):
            return feature.name in ('None', 'EMPTY')

        # This filter was proposed by team JKRW.
        def inconsistent(feature):
            return feature.regression_quality < 0.5

        data.prune(irrational)
        data.prune(noisy)
        data.prune(unnamed)
        data.prune(inconsistent)

    for data, output in zip(experiments, outputs):
        print "Saving %d features for '%s'." % (len(data), stressor)
        data.save(output % stressor)

    print
def find_interesting_genes(inputs, threshold):

    print "  Restoring pickled data (%d)..." % len(inputs)
    experiment = [ DataSet.restore(input) for input in inputs ]
    uninteresting = lambda feature: abs(feature.normed_ratio) < threshold

    print "  Pruning uninteresting data (%d)..." % len(inputs)
    for timepoint in experiment:
        timepoint.prune(uninteresting)

    print "  Flattening all timepoints (%d)...\n" % len(inputs)
    target, others = experiment[0], experiment[1:]
    target.union(*others)

    return target
Exemple #3
0
#!/usr/bin/env python
# vim: tw=0

import sys
from microarray import DataSet

try:
    stress, ours, theirs = sys.argv[1:]
except ValueError:
    print "Usage: display.py <stress> <ours> <theirs>"
    sys.exit()

input = 'pickles/{}/ours={},theirs={}.pkl'.format(stress, ours, theirs)
output = 'output/{}/ours={},theirs={}.txt'.format(stress, ours, theirs)

# Display a number of useful parameters:
#header = '{:<15}{:<15}{:<20}{:<20}{:<20}'.format("Gene_ID", "Gene_Name", "Expression_Level", "Signal_Quality", "Regression")
#template = '{0.id:<15}{0.name:<15}{0.normed_ratio:<20}{0.signal.red.signal_to_noise:<20}{0.regression_quality}'

# Produce only raw output (for use in database queries):
header = ""
template = '{0.name}'

data = DataSet.restore(input)
data.display(template, header, output)

print "Formatting %d genes." % len(data)
Exemple #4
0
#!/usr/bin/env python
# vim: tw=0

from microarray import DataSet

inputs = [
        'pickles/control.1/000.pkl', 'pickles/control.1/030.pkl', 
        'pickles/control.1/060.pkl', 'pickles/control.1/180.pkl' ] 

experiments = [
        DataSet.restore(input)
        for input in inputs ]

header = '{0.path}'
template = '{0.id}\t{0.normed_ratio}'

DataSet.tabulate(header, template, *experiments)

Exemple #5
0
#!/usr/bin/env python

from __future__ import division
from microarray import DataSet

inputs = [
        'data/A+D.000.gpr',
        'data/A+D.030.gpr',
        'data/A+D.060.gpr',
        'data/A+D.180.gpr' ]

for input in inputs:
    data = DataSet.load(input)
    print len(data)
#!/usr/bin/env python

from __future__ import division

import math
from microarray import DataSet

pickles = ["pickles/A+D.000.pkl", "pickles/A+D.030.pkl", "pickles/A+D.060.pkl", "pickles/A+D.180.pkl"]


def log_ratio(feature):
    return feature.log_ratio


def too_extreme(feature):
    return abs(feature.log_ratio) > 15


header = "{0.path} (R/G = {0.intensity_ratio})"
feature = "{0.id:<15} {0.log_ratio}"

timepoints = [DataSet.restore(path) for path in pickles]

for timepoint in timepoints:
    timepoint.prune(too_extreme)
    timepoint.sort(log_ratio, reverse=True)
    timepoint.truncate(50)

DataSet.tabulate(header, feature, *timepoints)