Ejemplo n.º 1
0
def fProximity(A, B=None, zeroDiag=True):
    ''' Return the proximity (similarity x correlation) as :
    - 2D nparray scalar between 2D nparray vectors (filled with zeros for diagonal and symetrix terms)
    - 1D nparray scalar between 1D nparray vector and 2D nparray vectors
    - 2D nparray scalar between 2D nparray vectors
    '''
    sA = A.shape
    sB = B.shape

    if B == None:
        corr = zeros((sA[0], sA[0]))
        for i in range(sA[0]):
            corr[i, i + 1:] = fProximity(A[i], A[i + 1:])
        return corr

    elif A.ndim == 1:
        dif = 1. - abs(A - B).sum(axis=-1) / (1. * sA[0])
        sim = B.dot(A) / (A**2).sum(axis=-1)**(0.5) / (B**
                                                       2).sum(axis=-1)**(0.5)
        return where(isfinite(sim), dif * sim, dif)

    elif B.ndim == 1:
        dif = 1. - abs(A - B).sum(axis=-1) / (1. * sB[0])
        sim = A.dot(B) / (A**2).sum(axis=-1)**(0.5) / (B**
                                                       2).sum(axis=-1)**(0.5)
        return where(isfinite(sim), dif * sim, dif)

    else:
        corr = zeros((sA[0], sB[0]))
        for i in range(sA[0]):
            corr[i] = fProximity(A[i], B)
        return corr - zeroDiag * diag(diag(corr))
Ejemplo n.º 2
0
def get_auto_data(data):
    """
    Extract auto data.
    """
    data = data[data['STAT_MainState'] == 3]
    data = data[pl.isfinite(data['GPSP_Lat'].astype(float))]
    data = data[pl.isfinite(data['GPSP_Lon'].astype(float))]
    if len(data) == 0:
        raise RuntimeError('no auto mode detected')
    return data
Ejemplo n.º 3
0
def get_float_data(dataframe):
    """
    Get float data out of dataframe.
    """
    dataframe = dataframe[pl.isfinite(dataframe.TIME_StartTime)]
    float_cols = [isfloatarray(col) for col in dataframe.values.T]
    return (dataframe.T[float_cols].T).astype(float)
Ejemplo n.º 4
0
    def apply(self, sim):

        t = sim.t

        # Process daily tests -- has to be here rather than init so have access to the sim object
        if isinstance(self.daily_tests, (pd.Series, pd.DataFrame)):
            start_date = sim['start_day']
            end_date = self.daily_tests.index[-1]
            dateindex = pd.date_range(start_date, end_date)
            self.daily_tests = self.daily_tests.reindex(dateindex, fill_value=0).to_numpy()

        # Check that there are still tests
        if t < len(self.daily_tests):
            n_tests = self.daily_tests[t]  # Number of tests for this day
            if not (n_tests and pl.isfinite(n_tests)): # If there are no tests today, abort early
                return
            else:
                sim.results['new_tests'][t] += n_tests
        else:
            return

        test_probs = np.ones(sim.n) # Begin by assigning equal tesitng probability to everyone
        symp_inds = cvu.true(sim.people.symptomatic)
        quar_inds = cvu.true(sim.people.quarantined)
        diag_inds = cvu.true(sim.people.diagnosed)
        test_probs[symp_inds] *= self.sympt_test
        test_probs[quar_inds] *= self.quar_test
        test_probs[diag_inds] = 0.

        test_inds = cvu.choose_w(probs=test_probs, n=n_tests, unique=False)

        sim.people.test(test_inds, self.sensitivity, loss_prob=self.loss_prob, test_delay=self.test_delay)

        return
Ejemplo n.º 5
0
    def apply(self, sim):

        t = sim.t
        if t < self.start_day:
            return
        elif self.end_day is not None and t > self.end_day:
            return

        # Check that there are still tests
        rel_t = t - self.start_day
        if rel_t < len(self.daily_tests):
            n_tests = int(self.daily_tests[rel_t]/sim.rescale_vec[t])  # Number of tests for this day -- rescaled
            if not (n_tests and pl.isfinite(n_tests)): # If there are no tests today, abort early
                return
            else:
                sim.results['new_tests'][t] += n_tests
        else:
            return

        test_probs = np.ones(sim.n) # Begin by assigning equal tesitng probability to everyone
        symp_inds  = cvu.true(sim.people.symptomatic)
        quar_inds  = cvu.true(sim.people.quarantined)
        diag_inds  = cvu.true(sim.people.diagnosed)
        test_probs[symp_inds] *= self.symp_test
        test_probs[quar_inds] *= self.quar_test
        test_probs[diag_inds] = 0.

        test_inds = cvu.choose_w(probs=test_probs, n=n_tests, unique=False)

        sim.people.test(test_inds, self.sensitivity, loss_prob=self.loss_prob, test_delay=self.test_delay)

        return
Ejemplo n.º 6
0
def pos_analysis(data):
    """
    Analyze position.
    """
    tmerc_map = create_map(data.GPS_Lon.values, data.GPS_Lat.values)
    gps_y, gps_x = tmerc_map(data.GPS_Lon.values, data.GPS_Lat.values)
    gpos_y, gpos_x = tmerc_map(data.GPOS_Lon.values, data.GPOS_Lat.values)
    gpsp_y, gpsp_x = tmerc_map(
        data.GPSP_Lon[pl.isfinite(data.GPSP_Lon.values)].values,
        data.GPSP_Lat[pl.isfinite(data.GPSP_Lat.values)].values)

    pl.plot(gpos_y, gpos_x, '.', label='est')

    pl.plot(gps_y, gps_x, 'x', label='GPS')

    pl.plot(gpsp_y, gpsp_x, 'ro', label='cmd')

    pl.xlabel('E, m')
    pl.ylabel('N, m')
    pl.grid()
    pl.autoscale(True, 'both', True)
    pl.legend(loc='best')
    return locals()
Ejemplo n.º 7
0
    def read_alberty_mathematica(self, fname):
        """
            example line:
            acetatesp={{-369.31,-486.01,-1,3},{-396.45,-485.76,0,4}};
            
            the order of values is: (dG0, dH0, z, nH)
        """
        fp = open(fname, 'r')
        alberty_name_to_pmap = {}
        alberty_name_to_hmap = {}  # same as pmap but for dH of formation
        for line in fp.readlines():
            line.rstrip()
            if line.find('=') == -1:
                continue
            (alberty_name, values) = line.split('sp=', 1)
            for token in re.findall("{([0-9\-\.\,_\s]+)}", values):
                val_list = token.split(',', 3)
                if len(val_list) != 4:
                    raise ValueError("Syntax error at: " + line)
                dG0 = float(val_list[0])
                try:
                    dH0 = float(val_list[1])
                except ValueError:
                    dH0 = NaN
                z = int(val_list[2])
                nH = int(val_list[3])
                if alberty_name.find("coA") != -1:
                    nH += 32
                nMg = 0

                alberty_name_to_pmap.setdefault(alberty_name,
                                                pseudoisomer.PseudoisomerMap())
                alberty_name_to_pmap[alberty_name].Add(nH,
                                                       z,
                                                       nMg,
                                                       dG0,
                                                       ref='Alberty 2006')
                if isfinite(dH0):
                    alberty_name_to_hmap.setdefault(
                        alberty_name, pseudoisomer.PseudoisomerMap())
                    alberty_name_to_hmap[alberty_name].Add(nH,
                                                           z,
                                                           nMg,
                                                           dH0,
                                                           ref='Alberty 2006')

        return alberty_name_to_pmap, alberty_name_to_hmap
Ejemplo n.º 8
0
    def apply(self, sim):

        t = sim.t

        # Process daily tests -- has to be here rather than init so have access to the sim object
        if isinstance(self.daily_tests, (pd.Series, pd.DataFrame)):
            start_date = sim['start_day']
            end_date = self.daily_tests.index[-1]
            dateindex = pd.date_range(start_date, end_date)
            self.daily_tests = self.daily_tests.reindex(dateindex, fill_value=0).to_numpy()

        # Check that there are still tests
        if t < len(self.daily_tests):
            n_tests = self.daily_tests[t]  # Number of tests for this day
            if not (n_tests and pl.isfinite(n_tests)): # If there are no tests today, abort early
                return
            else:
                sim.results['new_tests'][t] += n_tests
        else:
            return

        test_probs = np.ones(sim.n)
        new_diagnoses = 0

        for i,person in enumerate(sim.people):

            new_diagnoses += person.check_diagnosed(t)

            # Adjust testing probability based on what's happened to the person
            # NB, these need to be separate if statements, because a person can be both diagnosed and infectious/symptomatic
            if person.symptomatic:
                test_probs[i] *= self.sympt_test  # They're symptomatic
            if person.quarantine:
                test_probs[i] *= self.quar_test  # They're in quarantine
            if person.diagnosed:
                test_probs[i] = 0.0

        test_inds = cv.choose_weighted(probs=test_probs, n=n_tests, normalize=True, unique=False)
        sim.results['new_diagnoses'][t] += new_diagnoses

        for test_ind in test_inds:
            person = sim.people[test_ind]
            person.test(t, self.sensitivity, test_delay=self.test_delay)

        return
Ejemplo n.º 9
0
    def apply(self, sim):

        t = sim.t

        # Check that there are still tests
        if t < len(self.daily_tests):
            n_tests = self.daily_tests[t]  # Number of tests for this day
            sim.results['new_tests'][t] += n_tests
        else:
            return

        # If there are no tests today, abort early
        if not (n_tests and pl.isfinite(n_tests)):
            return

        test_probs = np.ones(sim.n)
        new_diagnoses = 0

        for i, person in enumerate(sim.people):

            new_diagnoses += person.check_diagnosed(t)

            # Adjust testing probability based on what's happened to the person
            # NB, these need to be separate if statements, because a person can be both diagnosed and infectious/symptomatic
            if person.symptomatic:
                test_probs[i] *= self.sympt_test  # They're symptomatic
            if person.known_contact:
                test_probs[
                    i] *= self.trace_test  # They've had contact with a known positive
            if person.diagnosed:
                test_probs[i] = 0.0

        test_inds = cv.choose_weighted(probs=test_probs,
                                       n=n_tests,
                                       normalize=True)
        sim.results['new_diagnoses'][t] += new_diagnoses

        for test_ind in test_inds:
            person = sim.people[test_ind]
            person.test(t, self.sensitivity, test_delay=self.test_delay)

        return
Ejemplo n.º 10
0
def astausgleich(ab2org, mn2org, rhoaorg):
    """shifts the branches of a dc sounding to generate a matching curve."""
    ab2 = P.asarray(ab2org)
    mn2 = P.asarray(mn2org)
    rhoa = P.asarray(rhoaorg)
    um = P.unique(mn2)
    for i in range(len(um) - 1):
        r0, r1 = [], []
        ac = P.intersect1d(ab2[mn2 == um[i]], ab2[mn2 == um[i + 1]])
        for a in ac:
            r0.append(rhoa[(ab2 == a) * (mn2 == um[i])][0])
            r1.append(rhoa[(ab2 == a) * (mn2 == um[i + 1])][0])

        if len(r0) > 0:
            fak = P.mean(P.array(r0) / P.array(r1))
            print(fak)
            if P.isfinite(fak) and fak > 0.:
                rhoa[mn2 == um[i + 1]] *= fak

    return rhoa  # formerly pg as vector
Ejemplo n.º 11
0
def astausgleich(ab2org, mn2org, rhoaorg):
    """shifts the branches of a dc sounding to generate a matching curve."""
    ab2 = P.asarray(ab2org)
    mn2 = P.asarray(mn2org)
    rhoa = P.asarray(rhoaorg)
    um = P.unique(mn2)
    for i in range(len(um) - 1):
        r0, r1 = [], []
        ac = P.intersect1d(ab2[mn2 == um[i]], ab2[mn2 == um[i + 1]])
        for a in ac:
            r0.append(rhoa[(ab2 == a) * (mn2 == um[i])][0])
            r1.append(rhoa[(ab2 == a) * (mn2 == um[i + 1])][0])

        if len(r0) > 0:
            fak = P.mean(P.array(r0) / P.array(r1))
            print(fak)
            if P.isfinite(fak) and fak > 0.:
                rhoa[mn2 == um[i + 1]] *= fak

    return rhoa  # formerly pg as vector
Ejemplo n.º 12
0
 def read_alberty_mathematica(self, fname):
     """
         example line:
         acetatesp={{-369.31,-486.01,-1,3},{-396.45,-485.76,0,4}};
         
         the order of values is: (dG0, dH0, z, nH)
     """
     fp = open(fname, 'r')
     alberty_name_to_pmap = {}
     alberty_name_to_hmap = {} # same as pmap but for dH of formation
     for line in fp.readlines():
         line.rstrip()
         if line.find('=') == -1:
             continue
         (alberty_name, values) = line.split('sp=', 1)
         for token in re.findall("{([0-9\-\.\,_\s]+)}", values):
             val_list = token.split(',', 3)
             if len(val_list) != 4:
                 raise ValueError("Syntax error at: " + line)
             dG0 = float(val_list[0])
             try:
                 dH0 = float(val_list[1])
             except ValueError:
                 dH0 = NaN
             z = int(val_list[2])
             nH = int(val_list[3])
             if alberty_name.find("coA") != -1:
                 nH += 32
             nMg = 0
             
             alberty_name_to_pmap.setdefault(alberty_name, pseudoisomer.PseudoisomerMap())
             alberty_name_to_pmap[alberty_name].Add(nH, z, nMg, dG0, 
                                                    ref='Alberty 2006')
             if isfinite(dH0):
                 alberty_name_to_hmap.setdefault(alberty_name, pseudoisomer.PseudoisomerMap())
                 alberty_name_to_hmap[alberty_name].Add(nH, z, nMg, dH0, 
                                                        ref='Alberty 2006')
         
     return alberty_name_to_pmap, alberty_name_to_hmap
Ejemplo n.º 13
0
    def apply(self, sim):

        t = sim.t
        if t < self.start_day:
            return
        elif self.end_day is not None and t > self.end_day:
            return

        # Check that there are still tests
        rel_t = t - self.start_day
        if rel_t < len(self.daily_tests):
            n_tests = int(
                self.daily_tests[rel_t] /
                sim.rescale_vec[t])  # Number of tests for this day -- rescaled
            if not (n_tests and pl.isfinite(n_tests)
                    ):  # If there are no tests today, abort early
                return
            else:
                sim.results['new_tests'][t] += n_tests
        else:
            return

        test_probs = np.ones(
            sim.n)  # Begin by assigning equal testing probability to everyone

        # Handle symptomatic testing, taking into account prevalence of ILI symptoms
        symp_inds = cvu.true(sim.people.symptomatic)
        if self.ili_prev is not None:
            if rel_t < len(self.ili_prev):
                n_ili = int(
                    self.ili_prev[rel_t] *
                    sim['pop_size'])  # Number with ILI symptoms on this day
                ili_inds = cvu.choose(
                    sim['pop_size'], n_ili
                )  # Give some people some symptoms. Assuming that this is independent of COVID symptomaticity...
                symp_inds = np.unique(np.concatenate((symp_inds, ili_inds)), 0)
        test_probs[symp_inds] *= self.symp_test

        # Handle quarantine testing
        quar_inds = cvu.true(sim.people.quarantined)
        test_probs[quar_inds] *= self.quar_test

        # Handle any other user-specified testing criteria
        if self.subtarget is not None:
            subtarget_inds, subtarget_vals = cv.get_subtargets(
                self.subtarget, sim)
            test_probs[
                subtarget_inds] = test_probs[subtarget_inds] * subtarget_vals

        # Don't re-diagnose people
        diag_inds = cvu.true(sim.people.diagnosed)
        test_probs[diag_inds] = 0.

        # Now choose who gets tested and test them
        test_inds = cvu.choose_w(probs=test_probs, n=n_tests, unique=False)
        sim.people.test(test_inds,
                        self.sensitivity,
                        loss_prob=self.loss_prob,
                        test_delay=self.test_delay)

        return
Ejemplo n.º 14
0
    def apply(self, sim):

        t = sim.t
        if t < self.start_day:
            return
        elif self.end_day is not None and t > self.end_day:
            return

        # Check that there are still tests
        rel_t = t - self.start_day
        if rel_t < len(self.daily_tests):
            n_tests = cvu.randround(
                self.daily_tests[rel_t] / sim.rescale_vec[t]
            )  # Correct for scaling that may be applied by rounding to the nearest number of tests
            if not (n_tests and pl.isfinite(n_tests)
                    ):  # If there are no tests today, abort early
                return
            else:
                sim.results['new_tests'][t] += n_tests
        else:
            return

        test_probs = np.ones(
            sim.n
        )  # Begin by assigning equal testing weight (converted to a probability) to everyone

        # Calculate test probabilities for people with symptoms
        symp_inds = cvu.true(sim.people.symptomatic)
        symp_test = self.symp_test
        if self.pdf:  # Handle the onset to swab delay
            symp_time = cvd.default_int(t -
                                        sim.people.date_symptomatic[symp_inds]
                                        )  # Find time since symptom onset
            inv_count = (
                np.bincount(symp_time) / len(symp_time)
            )  # Find how many people have had symptoms of a set time and invert
            count = np.nan * np.ones(inv_count.shape)  # Initialize the count
            count[inv_count != 0] = 1 / inv_count[
                inv_count != 0]  # Update the counts where defined
            symp_test *= self.pdf.pdf(symp_time) * count[
                symp_time]  # Put it all together

        test_probs[symp_inds] *= symp_test  # Update the test probabilities

        # Handle symptomatic testing, taking into account prevalence of ILI symptoms
        if self.ili_prev is not None:
            if rel_t < len(self.ili_prev):
                n_ili = int(
                    self.ili_prev[rel_t] *
                    sim['pop_size'])  # Number with ILI symptoms on this day
                ili_inds = cvu.choose(
                    sim['pop_size'], n_ili
                )  # Give some people some symptoms. Assuming that this is independent of COVID symptomaticity...
                ili_inds = np.setdiff1d(ili_inds, symp_inds)
                test_probs[ili_inds] *= self.symp_test

        # Handle quarantine testing
        quar_test_inds = get_quar_inds(self.quar_policy, sim)
        test_probs[quar_test_inds] *= self.quar_test

        # Handle any other user-specified testing criteria
        if self.subtarget is not None:
            subtarget_inds, subtarget_vals = get_subtargets(
                self.subtarget, sim)
            test_probs[
                subtarget_inds] = test_probs[subtarget_inds] * subtarget_vals

        # Don't re-diagnose people
        diag_inds = cvu.true(sim.people.diagnosed)
        test_probs[diag_inds] = 0.0

        # With dynamic rescaling, we have to correct for uninfected people outside of the population who would test
        if sim.rescale_vec[t] / sim[
                'pop_scale'] < 1:  # We still have rescaling to do
            in_pop_tot_prob = test_probs.sum() * sim.rescale_vec[
                t]  # Total "testing weight" of people in the subsampled population
            out_pop_tot_prob = sim.scaled_pop_size - sim.rescale_vec[t] * sim[
                'pop_size']  # Find out how many people are missing and assign them each weight 1
            in_frac = in_pop_tot_prob / (
                in_pop_tot_prob + out_pop_tot_prob
            )  # Fraction of tests which should fall in the sample population
            n_tests = cvu.randround(n_tests *
                                    in_frac)  # Recompute the number of tests

        # Now choose who gets tested and test them
        n_tests = min(
            n_tests, (test_probs != 0).sum()
        )  # Don't try to test more people than have nonzero testing probability
        test_inds = cvu.choose_w(probs=test_probs, n=n_tests,
                                 unique=True)  # Choose who actually tests
        sim.people.test(test_inds,
                        self.sensitivity,
                        loss_prob=self.loss_prob,
                        test_delay=self.test_delay)

        return
Ejemplo n.º 15
0
def thermodynamic_pathway_analysis(S, rids, fluxes, cids, thermodynamics,
                                   html_writer):
    Nr, Nc = S.shape

    # adjust the directions of the reactions in S to fit the fluxes
    fluxes = map(abs, fluxes)
    kegg = Kegg.getInstance()

    #kegg.write_reactions_to_html(html_writer, S, rids, fluxes, cids, show_cids=False)
    dG0_f = thermodynamics.GetTransformedFormationEnergies(cids)
    bounds = [thermodynamics.bounds.get(cid, (None, None)) for cid in cids]
    res = {}
    try:
        c_mid = thermodynamics.c_mid
        c_range = thermodynamics.c_range
        res['pCr'] = find_pCr(S, dG0_f, c_mid=c_mid, ratio=3.0, bounds=bounds)
        #res['PCR2'] = find_unfeasible_concentrations(S, dG0_f, c_range, c_mid=c_mid, bounds=bounds)
        res['MTDF'] = find_mtdf(S, dG0_f, c_range=c_range, bounds=bounds)

        #path = pathway_modelling.Pathway(S, dG0_f)
        #res['pCr_regularized'] = path.FindPcr_OptimizeConcentrations(
        #    c_mid=c_mid, ratio=3.0, bounds=bounds)
        #res['pCr_regularized (dGr < -2.7)'] = path.FindPcr_OptimizeConcentrations(
        #    c_mid=c_mid, ratio=3.0, bounds=bounds, max_reaction_dg=-2.7)
        #res['MTDF_regularized'] = path.FindMTDF_OptimizeConcentrations(
        #    c_range=c_range, bounds=bounds, c_mid=c_mid)

        #costs = []
        #for max_dg in pylab.arange(0.0,-4.25,-0.25):
        #    c = path.FindPcrEnzymeCost(c_mid=c_mid,
        #                               ratio=3.0,
        #                               bounds=bounds,
        #                               max_reaction_dg=max_dg,
        #                               fluxes=fluxes)
        #    costs.append(str(c))

        #print ', '.join(costs)

    except LinProgNoSolutionException:
        html_writer.write(
            '<b>No feasible solution found, cannot calculate the Margin</b>')

    # plot the profile graph
    pylab.rcParams['text.usetex'] = False
    pylab.rcParams['legend.fontsize'] = 10
    pylab.rcParams['font.family'] = 'sans-serif'
    pylab.rcParams['font.size'] = 12
    pylab.rcParams['lines.linewidth'] = 2
    pylab.rcParams['lines.markersize'] = 5
    pylab.rcParams['figure.figsize'] = [8.0, 6.0]
    pylab.rcParams['figure.dpi'] = 100

    # plot the thermodynamic profile in standard conditions

    profile_fig = pylab.figure()
    profile_fig.hold(True)

    pylab.title('Thermodynamic profile', figure=profile_fig)
    pylab.ylabel('cumulative dG [kJ/mol]', figure=profile_fig)
    pylab.xlabel('Reaction KEGG ID', figure=profile_fig)
    pylab.xticks(pylab.arange(1, Nr + 1),
                 ['R%05d' % rids[i] for i in xrange(Nr)],
                 fontproperties=FontProperties(size=8),
                 rotation=30)

    dG0_r = pylab.zeros((Nr, 1))
    for r in range(Nr):
        reactants = pylab.find(S[r, :])
        dG0_r[r, 0] = pylab.dot(S[r, reactants], dG0_f[reactants])

    nan_indices = pylab.find(pylab.isnan(dG0_r))
    finite_indices = pylab.find(pylab.isfinite(dG0_r))
    if (len(nan_indices) > 0):
        dG0_r_finite = pylab.zeros((Nr, 1))
        dG0_r_finite[finite_indices] = dG0_r[finite_indices]
        cum_dG0_r = pylab.cumsum(
            [0] + [dG0_r_finite[r, 0] * fluxes[r] for r in range(Nr)])
    else:
        cum_dG0_r = pylab.cumsum([0] +
                                 [dG0_r[r, 0] * fluxes[r] for r in range(Nr)])
    pylab.plot(pylab.arange(0.5, Nr + 1),
               cum_dG0_r,
               figure=profile_fig,
               label='Standard [1M]')

    # plot the thermodynamic profile for the different optimization schemes

    pylab.grid(True, figure=profile_fig)
    for optimization in res.keys():
        dG_f, conc, score = res[optimization]
        if score is None:
            continue

        dG_r = pylab.dot(S, dG_f)
        cum_dG_r = pylab.cumsum([0] +
                                [dG_r[i, 0] * fluxes[i] for i in range(Nr)])
        pylab.plot(pylab.arange(0.5, Nr + 1),
                   cum_dG_r,
                   figure=profile_fig,
                   label='%s = %.1f' % (optimization, score))

    pylab.legend()
    html_writer.embed_matplotlib_figure(profile_fig, width=480, height=360)

    # plot the optimal metabolite concentrations for the different optimization schemes
    ind_nan = pylab.find(pylab.isnan(dG0_f))
    for optimization in res.keys():
        dG_f, conc, score = res[optimization]
        if score is None:
            continue

        dG_r = pylab.dot(S, dG_f)
        conc[
            ind_nan] = thermodynamics.c_mid  # give all compounds with unknown dG0_f the middle concentration value

        conc_fig = pylab.figure()
        conc_fig.suptitle('Concentrations (%s = %.1f)' % (optimization, score))
        pylab.xscale('log', figure=conc_fig)
        pylab.ylabel('Compound KEGG ID', figure=conc_fig)
        pylab.xlabel('Concentration [M]', figure=conc_fig)
        pylab.yticks(range(Nc, 0, -1), ["C%05d" % cid for cid in cids],
                     fontproperties=FontProperties(size=8))
        pylab.plot(conc, range(Nc, 0, -1), '*b', figure=conc_fig)

        x_min = conc.min() / 10
        x_max = conc.max() * 10
        y_min = 0
        y_max = Nc + 1

        for c in range(Nc):
            pylab.text(conc[c, 0] * 1.1, Nc - c, kegg.cid2name(cids[c]), \
                       figure=conc_fig, fontsize=6, rotation=0)
            b_low, b_up = bounds[c]
            if b_low is None:
                b_low = x_min
            if b_up is None:
                b_up = x_max
            pylab.plot([b_low, b_up], [Nc - c, Nc - c], '-k', linewidth=0.4)

        if optimization.startswith('pCr'):
            c_range_opt = pC_to_range(score,
                                      c_mid=thermodynamics.c_mid,
                                      ratio=3.0)
            pylab.axvspan(c_range_opt[0],
                          c_range_opt[1],
                          facecolor='g',
                          alpha=0.3,
                          figure=conc_fig)
        else:
            pylab.axvspan(thermodynamics.c_range[0],
                          thermodynamics.c_range[1],
                          facecolor='r',
                          alpha=0.3,
                          figure=conc_fig)
        pylab.axis([x_min, x_max, y_min, y_max], figure=conc_fig)
        try:
            html_writer.embed_matplotlib_figure(conc_fig,
                                                width=420,
                                                height=360)
        except AttributeError:
            html_writer.write('<b>Failed to generate concentration figure</b>')

    # write all the results in tables as well

    for optimization in res.keys():
        (dG_f, conc, score) = res[optimization]
        html_writer.write(
            '<p>Biochemical Compound Formation Energies (%s = %.1f)<br>\n' %
            (optimization, score))
        html_writer.write('<table border="1">\n')
        html_writer.write('  ' + '<td>%s</td>' * 5 %
                          ("KEGG CID", "Compound Name", "Concentration [M]",
                           "dG'0_f [kJ/mol]", "dG'_f [kJ/mol]") + '\n')
        for c in range(Nc):
            cid = cids[c]
            name = kegg.cid2name(cid)

            if (pylab.isnan(dG0_f[c, 0])):
                html_writer.write('<tr><td><a href="%s">C%05d</a></td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>\n' % \
                                  (kegg.cid2link(cid), cid, name, "N/A", "N/A", "N/A"))
            else:
                html_writer.write('<tr><td><a href="%s">C%05d</a></td><td>%s</td><td>%.2g</td><td>%.2f</td><td>%.2f</td></tr>\n' % \
                                  (kegg.cid2link(cid), cid, name, conc[c, 0], dG0_f[c, 0], dG_f[c, 0]))
        html_writer.write('</table></p>\n')

        html_writer.write(
            '<p>Biochemical Reaction Energies (%s = %.1f)<br>\n' %
            (optimization, score))
        html_writer.write('<table border="1">\n')
        html_writer.write('  ' + '<td>%s</td>' * 3 %
                          ("KEGG RID", "dG'0_r [kJ/mol]", "dG'_r [kJ/mol]") +
                          '\n')
        dG_r = pylab.dot(S, dG_f)
        for r in range(Nr):
            rid = rids[r]
            if (pylab.isnan(dG0_r[r, 0])):
                html_writer.write('<tr><td><a href="%s" title="%s">R%05d</a></td><td>%s</td><td>%.2f</td></tr>\n' % \
                                  (kegg.rid2link(rid), kegg.rid2name(rid), rid, "N/A", dG_r[r, 0]))
            else:
                html_writer.write('<tr><td><a href="%s" title="%s">R%05d</a></td><td>%.2f</td><td>%.2f</td></tr>\n' % \
                                  (kegg.rid2link(rid), kegg.rid2name(rid), rid, dG0_r[r, 0], dG_r[r, 0]))
        html_writer.write('</table></p>\n')

    return res
Ejemplo n.º 16
0
def log_probfn(theta, x, y, xerr, yerr):
    lp = log_prior(theta)
    if not pyl.isfinite(lp):
        return -pyl.inf
    return log_prior(theta) + log_likelihood(theta, x, y, xerr, yerr)
Ejemplo n.º 17
0
def thermodynamic_pathway_analysis(S, rids, fluxes, cids, thermodynamics, html_writer):
    Nr, Nc = S.shape

    # adjust the directions of the reactions in S to fit the fluxes
    fluxes = map(abs, fluxes)
    kegg = Kegg.getInstance()
    
    #kegg.write_reactions_to_html(html_writer, S, rids, fluxes, cids, show_cids=False)
    dG0_f = thermodynamics.GetTransformedFormationEnergies(cids)
    bounds = [thermodynamics.bounds.get(cid, (None, None)) for cid in cids]
    res = {}
    try:
        c_mid = thermodynamics.c_mid
        c_range = thermodynamics.c_range
        res['pCr'] = find_pCr(S, dG0_f, c_mid=c_mid, ratio=3.0, bounds=bounds)
        #res['PCR2'] = find_unfeasible_concentrations(S, dG0_f, c_range, c_mid=c_mid, bounds=bounds)
        res['MTDF'] = find_mtdf(S, dG0_f, c_range=c_range, bounds=bounds)
        
        #path = pathway_modelling.Pathway(S, dG0_f)
        #res['pCr_regularized'] = path.FindPcr_OptimizeConcentrations(
        #    c_mid=c_mid, ratio=3.0, bounds=bounds)
        #res['pCr_regularized (dGr < -2.7)'] = path.FindPcr_OptimizeConcentrations(
        #    c_mid=c_mid, ratio=3.0, bounds=bounds, max_reaction_dg=-2.7)
        #res['MTDF_regularized'] = path.FindMTDF_OptimizeConcentrations(
        #    c_range=c_range, bounds=bounds, c_mid=c_mid)
        
        
        #costs = []
        #for max_dg in pylab.arange(0.0,-4.25,-0.25):
        #    c = path.FindPcrEnzymeCost(c_mid=c_mid,
        #                               ratio=3.0,
        #                               bounds=bounds,
        #                               max_reaction_dg=max_dg,
        #                               fluxes=fluxes)
        #    costs.append(str(c))
        
        #print ', '.join(costs)
            
        
    except LinProgNoSolutionException:
        html_writer.write('<b>No feasible solution found, cannot calculate the Margin</b>')
    
    # plot the profile graph
    pylab.rcParams['text.usetex'] = False
    pylab.rcParams['legend.fontsize'] = 10
    pylab.rcParams['font.family'] = 'sans-serif'
    pylab.rcParams['font.size'] = 12
    pylab.rcParams['lines.linewidth'] = 2
    pylab.rcParams['lines.markersize'] = 5
    pylab.rcParams['figure.figsize'] = [8.0, 6.0]
    pylab.rcParams['figure.dpi'] = 100

    # plot the thermodynamic profile in standard conditions
    
    profile_fig = pylab.figure()
    profile_fig.hold(True)

    pylab.title('Thermodynamic profile', figure=profile_fig)
    pylab.ylabel('cumulative dG [kJ/mol]', figure=profile_fig)
    pylab.xlabel('Reaction KEGG ID', figure=profile_fig)
    pylab.xticks(pylab.arange(1, Nr + 1), ['R%05d' % rids[i] for i in xrange(Nr)], fontproperties=FontProperties(size=8), rotation=30)

    dG0_r = pylab.zeros((Nr, 1))
    for r in range(Nr):
        reactants = pylab.find(S[r,:])
        dG0_r[r, 0] = pylab.dot(S[r, reactants], dG0_f[reactants])

    nan_indices = pylab.find(pylab.isnan(dG0_r))
    finite_indices = pylab.find(pylab.isfinite(dG0_r))
    if (len(nan_indices) > 0):
        dG0_r_finite = pylab.zeros((Nr, 1))
        dG0_r_finite[finite_indices] = dG0_r[finite_indices]
        cum_dG0_r = pylab.cumsum([0] + [dG0_r_finite[r, 0] * fluxes[r] for r in range(Nr)])
    else:
        cum_dG0_r = pylab.cumsum([0] + [dG0_r[r, 0] * fluxes[r] for r in range(Nr)])
    pylab.plot(pylab.arange(0.5, Nr + 1), cum_dG0_r, figure=profile_fig, label='Standard [1M]')
    
    # plot the thermodynamic profile for the different optimization schemes
    
    pylab.grid(True, figure=profile_fig)
    for optimization in res.keys():
        dG_f, conc, score = res[optimization]
        if score is None:
            continue

        dG_r = pylab.dot(S, dG_f)
        cum_dG_r = pylab.cumsum([0] + [dG_r[i, 0] * fluxes[i] for i in range(Nr)])
        pylab.plot(pylab.arange(0.5, Nr + 1), cum_dG_r, figure=profile_fig, label='%s = %.1f' % (optimization, score))

    pylab.legend()
    html_writer.embed_matplotlib_figure(profile_fig, width=480, height=360)

    # plot the optimal metabolite concentrations for the different optimization schemes
    ind_nan = pylab.find(pylab.isnan(dG0_f))
    for optimization in res.keys():
        dG_f, conc, score = res[optimization]
        if score is None:
            continue

        dG_r = pylab.dot(S, dG_f)
        conc[ind_nan] = thermodynamics.c_mid # give all compounds with unknown dG0_f the middle concentration value

        conc_fig = pylab.figure()
        conc_fig.suptitle('Concentrations (%s = %.1f)' % (optimization, score))
        pylab.xscale('log', figure=conc_fig)
        pylab.ylabel('Compound KEGG ID', figure=conc_fig)
        pylab.xlabel('Concentration [M]', figure=conc_fig)
        pylab.yticks(range(Nc, 0, -1), ["C%05d" % cid for cid in cids], fontproperties=FontProperties(size=8))
        pylab.plot(conc, range(Nc, 0, -1), '*b', figure=conc_fig)

        x_min = conc.min() / 10
        x_max = conc.max() * 10
        y_min = 0
        y_max = Nc + 1
        
        for c in range(Nc):
            pylab.text(conc[c, 0] * 1.1, Nc - c, kegg.cid2name(cids[c]), \
                       figure=conc_fig, fontsize=6, rotation=0)
            b_low, b_up = bounds[c]
            if b_low is None:
                b_low = x_min
            if b_up is None:
                b_up = x_max
            pylab.plot([b_low, b_up], [Nc - c, Nc - c], '-k', linewidth=0.4)

        if optimization.startswith('pCr'):
            c_range_opt = pC_to_range(score, c_mid=thermodynamics.c_mid, ratio=3.0)
            pylab.axvspan(c_range_opt[0], c_range_opt[1], facecolor='g', alpha=0.3, figure=conc_fig)
        else:
            pylab.axvspan(thermodynamics.c_range[0], thermodynamics.c_range[1], facecolor='r', alpha=0.3, figure=conc_fig)
        pylab.axis([x_min, x_max, y_min, y_max], figure=conc_fig)
        try:
            html_writer.embed_matplotlib_figure(conc_fig, width=420, height=360)
        except AttributeError:
            html_writer.write('<b>Failed to generate concentration figure</b>')

    # write all the results in tables as well

    for optimization in res.keys():
        (dG_f, conc, score) = res[optimization]
        html_writer.write('<p>Biochemical Compound Formation Energies (%s = %.1f)<br>\n' % (optimization, score))
        html_writer.write('<table border="1">\n')
        html_writer.write('  ' + '<td>%s</td>'*5 % ("KEGG CID", "Compound Name", "Concentration [M]", "dG'0_f [kJ/mol]", "dG'_f [kJ/mol]") + '\n')
        for c in range(Nc):
            cid = cids[c]
            name = kegg.cid2name(cid)

            if (pylab.isnan(dG0_f[c, 0])):
                html_writer.write('<tr><td><a href="%s">C%05d</a></td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>\n' % \
                                  (kegg.cid2link(cid), cid, name, "N/A", "N/A", "N/A"))
            else:
                html_writer.write('<tr><td><a href="%s">C%05d</a></td><td>%s</td><td>%.2g</td><td>%.2f</td><td>%.2f</td></tr>\n' % \
                                  (kegg.cid2link(cid), cid, name, conc[c, 0], dG0_f[c, 0], dG_f[c, 0]))
        html_writer.write('</table></p>\n')

        html_writer.write('<p>Biochemical Reaction Energies (%s = %.1f)<br>\n' % (optimization, score))
        html_writer.write('<table border="1">\n')
        html_writer.write('  ' + '<td>%s</td>'*3 % ("KEGG RID", "dG'0_r [kJ/mol]", "dG'_r [kJ/mol]") + '\n')
        dG_r = pylab.dot(S, dG_f)
        for r in range(Nr):
            rid = rids[r]
            if (pylab.isnan(dG0_r[r, 0])):
                html_writer.write('<tr><td><a href="%s" title="%s">R%05d</a></td><td>%s</td><td>%.2f</td></tr>\n' % \
                                  (kegg.rid2link(rid), kegg.rid2name(rid), rid, "N/A", dG_r[r, 0]))
            else:
                html_writer.write('<tr><td><a href="%s" title="%s">R%05d</a></td><td>%.2f</td><td>%.2f</td></tr>\n' % \
                                  (kegg.rid2link(rid), kegg.rid2name(rid), rid, dG0_r[r, 0], dG_r[r, 0]))
        html_writer.write('</table></p>\n')
        
    return res
Ejemplo n.º 18
0
    def CompareMtdf(self, target_mtdf=None):
        n_pathways = len(self.pathways)
        for i, (name, pathway_data) in enumerate(self.pathways.iteritems()):
            logging.info('Analyzing pathway %s', name)
            self.html_writer.write('<div margin="20px"><div><b>%s</b></div>' %
                                   name)
            self.GetConditions(pathway_data)
            S, rids, fluxes, cids = self.GetReactions(name, pathway_data)
            self.WriteReactionsToHtml(S, rids, fluxes, cids, show_cids=False)

            # Bounds on concentrations.
            bounds = [
                self.thermo.bounds.get(cid, (None, None)) for cid in cids
            ]

            # All fluxes are forwards
            fluxes = map(abs, fluxes)
            dG0_f = self.thermo.GetTransformedFormationEnergies(cids)
            c_mid = self.thermo.c_mid
            c_range = self.thermo.c_range

            path = pathway_modelling.Pathway(S, dG0_f)

            if target_mtdf is not None:
                _ln_conc, score = path.FindMtdf_Regularized(
                    c_range,
                    bounds,
                    c_mid,
                    min_mtdf=target_mtdf,
                    max_mtdf=target_mtdf)
            else:
                _ln_conc, score = path.FindMTDF_OptimizeConcentrations(
                    c_range, bounds, c_mid)
            if score is None:
                logging.error('No MTDF score for %s', name)
                continue

            Nr, Nc = S.shape
            profile_fig = pylab.figure()
            profile_fig.hold(True)
            pylab.title('Thermodynamic Profile', figure=profile_fig)
            pylab.ylabel('Cumulative dG [kJ/mol]', figure=profile_fig)
            pylab.xlabel('Reaction KEGG ID', figure=profile_fig)
            pylab.grid(True, figure=profile_fig)

            rids = ['%s' % rids[i] for i in xrange(Nr)]
            pylab.xticks(pylab.arange(1, Nr + 1),
                         rids,
                         fontproperties=FontProperties(size=8),
                         rotation=30)
            dG0_r = pylab.zeros((Nr, 1))
            for r in range(Nr):
                reactants = pylab.find(S[r, :])
                dG0_r[r, 0] = pylab.dot(S[r, reactants], dG0_f[reactants])

            nan_indices = pylab.find(pylab.isnan(dG0_r))
            finite_indices = pylab.find(pylab.isfinite(dG0_r))
            if (len(nan_indices) > 0):
                dG0_r_finite = pylab.zeros((Nr, 1))
                dG0_r_finite[finite_indices] = dG0_r[finite_indices]
                cum_dG0_r = pylab.cumsum(
                    [0] + [dG0_r_finite[r, 0] * fluxes[r] for r in range(Nr)])
            else:
                cum_dG0_r = pylab.cumsum(
                    [0] + [dG0_r[r, 0] * fluxes[r] for r in range(Nr)])
            pylab.plot(pylab.arange(0.5, Nr + 1),
                       cum_dG0_r,
                       'g--',
                       label='Standard [1M]',
                       figure=profile_fig)

            # plot the thermodynamic profile for the different optimization schemes
            dG_r = pylab.dot(S, dG_f)
            self.html_writer.write('<ol>')
            for i, dG in enumerate(dG_r):
                self.html_writer.write('<li>%s: %.2f' % (rids[i], dG))
            self.html_writer.write('</ol>')

            cum_dG_r = pylab.cumsum(
                [0] + [dG_r[i, 0] * fluxes[i] for i in range(Nr)])
            pylab.plot(pylab.arange(0.5, Nr + 1),
                       cum_dG_r,
                       figure=profile_fig,
                       label='%s MTDF = %.1f' % (name, score))

            pylab.legend(['Standard conditions', 'MTDF'], 'lower left')
            fname = '%s-profile-fig' % name

            html_writer.embed_matplotlib_figure(profile_fig,
                                                width=640,
                                                height=480,
                                                name=fname)

            # Give all compounds with unknown dG0_f the middle concentration value
            conc[nan_indices] = self.thermo.c_mid

            unconstrained_cs = []
            unconstrained_cids = []
            for i, bound in enumerate(bounds):
                b_low, b_up = bound
                if b_low is None and b_up is None:
                    unconstrained_cs.append(conc[i, 0])
                    unconstrained_cids.append(cids[i])

            n_constrained = len(unconstrained_cs)
            conc_fig = pylab.figure()
            conc_fig.suptitle('Concentrations %s (MTDF = %.1f)' %
                              (name, score))
            pylab.xscale('log', figure=conc_fig)
            pylab.ylabel('Compound KEGG ID', figure=conc_fig)
            pylab.xlabel('Concentration [M]', figure=conc_fig)
            cids_names = ["C%05d" % cid for cid in unconstrained_cids]
            pylab.yticks(range(n_constrained, 0, -1),
                         cids_names,
                         fontproperties=FontProperties(size=8))
            pylab.plot(unconstrained_cs,
                       range(n_constrained, 0, -1),
                       '*b',
                       figure=conc_fig)

            x_min = self.thermo.c_range[0] / 10
            x_max = self.thermo.c_range[1] * 50
            y_min = 0
            y_max = n_constrained + 1

            for i, concentration in enumerate(unconstrained_cs):
                pylab.text(concentration * 1.1,
                           n_constrained - i,
                           kegg.cid2name(unconstrained_cids[i]),
                           figure=conc_fig,
                           fontsize=6,
                           rotation=0)
                y_val = n_constrained - i
                pylab.plot([x_min, x_max], [y_val, y_val], '-k', linewidth=0.4)

            pylab.axvspan(min(unconstrained_cs),
                          max(unconstrained_cs),
                          facecolor='g',
                          alpha=0.3,
                          figure=conc_fig)
            pylab.axis([x_min, x_max, y_min, y_max], figure=conc_fig)

            fname = '%s-mtdf-conc-fig' % name
            html_writer.embed_matplotlib_figure(conc_fig,
                                                width=640,
                                                height=480,
                                                name=fname)

            self.html_writer.write('</div>')
Ejemplo n.º 19
0
    def CompareMtdf(self, target_mtdf=None):        
        n_pathways = len(self.pathways)
        for i, (name, pathway_data) in enumerate(self.pathways.iteritems()):
            logging.info('Analyzing pathway %s', name)
            self.html_writer.write('<div margin="20px"><div><b>%s</b></div>' % name)
            self.GetConditions(pathway_data)
            S, rids, fluxes, cids = self.GetReactions(name, pathway_data)
            self.WriteReactionsToHtml(S, rids, fluxes, cids, show_cids=False)
            
            # Bounds on concentrations.         
            bounds = [self.thermo.bounds.get(cid, (None, None))
                      for cid in cids]
            
            # All fluxes are forwards
            fluxes = map(abs, fluxes)
            dG0_f = self.thermo.GetTransformedFormationEnergies(cids)
            c_mid = self.thermo.c_mid
            c_range = self.thermo.c_range
            
            path = pathway_modelling.Pathway(S, dG0_f)
            
            if target_mtdf is not None:
                _ln_conc, score = path.FindMtdf_Regularized(
                    c_range, bounds, c_mid,
                    min_mtdf=target_mtdf,
                    max_mtdf=target_mtdf)
            else:
                _ln_conc, score = path.FindMTDF_OptimizeConcentrations(
                    c_range, bounds, c_mid)
            if score is None:
                logging.error('No MTDF score for %s', name)
                continue
        
            Nr, Nc = S.shape
            profile_fig = pylab.figure()
            profile_fig.hold(True)
            pylab.title('Thermodynamic Profile',
                        figure=profile_fig)
            pylab.ylabel('Cumulative dG [kJ/mol]', figure=profile_fig)
            pylab.xlabel('Reaction KEGG ID', figure=profile_fig)
            pylab.grid(True, figure=profile_fig)
            
            rids = ['%s' % rids[i] for i in xrange(Nr)]
            pylab.xticks(pylab.arange(1, Nr + 1), rids,
                         fontproperties=FontProperties(size=8),
                         rotation=30)
            dG0_r = pylab.zeros((Nr, 1))
            for r in range(Nr):
                reactants = pylab.find(S[r,:])
                dG0_r[r, 0] = pylab.dot(S[r, reactants], dG0_f[reactants])
        
            nan_indices = pylab.find(pylab.isnan(dG0_r))
            finite_indices = pylab.find(pylab.isfinite(dG0_r))
            if (len(nan_indices) > 0):
                dG0_r_finite = pylab.zeros((Nr, 1))
                dG0_r_finite[finite_indices] = dG0_r[finite_indices]
                cum_dG0_r = pylab.cumsum([0] + [dG0_r_finite[r, 0] * fluxes[r] for r in range(Nr)])
            else:
                cum_dG0_r = pylab.cumsum([0] + [dG0_r[r, 0] * fluxes[r] for r in range(Nr)])
            pylab.plot(pylab.arange(0.5, Nr + 1), cum_dG0_r, 'g--', label='Standard [1M]', figure=profile_fig)

            # plot the thermodynamic profile for the different optimization schemes
            dG_r = pylab.dot(S, dG_f)
            self.html_writer.write('<ol>')
            for i, dG in enumerate(dG_r):
                self.html_writer.write('<li>%s: %.2f' % (rids[i], dG))
            self.html_writer.write('</ol>')
            
            cum_dG_r = pylab.cumsum([0] + [dG_r[i, 0] * fluxes[i] for i in range(Nr)])
            pylab.plot(pylab.arange(0.5, Nr + 1), cum_dG_r, figure=profile_fig, label='%s MTDF = %.1f' % (name, score))
            
            pylab.legend(['Standard conditions', 'MTDF'], 'lower left')
            fname = '%s-profile-fig' % name
            
            html_writer.embed_matplotlib_figure(profile_fig, width=640, height=480,
                                                name=fname)

            # Give all compounds with unknown dG0_f the middle concentration value
            conc[nan_indices] = self.thermo.c_mid
            
            unconstrained_cs = []
            unconstrained_cids = []
            for i, bound in enumerate(bounds):
                b_low, b_up = bound
                if b_low is None and b_up is None:
                    unconstrained_cs.append(conc[i, 0])
                    unconstrained_cids.append(cids[i])
            
            n_constrained = len(unconstrained_cs)
            conc_fig = pylab.figure()
            conc_fig.suptitle('Concentrations %s (MTDF = %.1f)' % (name, score))
            pylab.xscale('log', figure=conc_fig)
            pylab.ylabel('Compound KEGG ID', figure=conc_fig)
            pylab.xlabel('Concentration [M]', figure=conc_fig)
            cids_names = ["C%05d" % cid for cid in unconstrained_cids]
            pylab.yticks(range(n_constrained, 0, -1), cids_names,
                         fontproperties=FontProperties(size=8))
            pylab.plot(unconstrained_cs, range(n_constrained, 0, -1),
                       '*b', figure=conc_fig)
    
            x_min = self.thermo.c_range[0] / 10
            x_max = self.thermo.c_range[1] * 50
            y_min = 0
            y_max = n_constrained + 1
            
            for i, concentration in enumerate(unconstrained_cs):
                pylab.text(concentration * 1.1, n_constrained - i,
                           kegg.cid2name(unconstrained_cids[i]),
                           figure=conc_fig, fontsize=6, rotation=0)
                y_val = n_constrained - i
                pylab.plot([x_min, x_max], [y_val, y_val], '-k', linewidth=0.4)
    
            pylab.axvspan(min(unconstrained_cs), max(unconstrained_cs),
                          facecolor='g', alpha=0.3, figure=conc_fig)
            pylab.axis([x_min, x_max, y_min, y_max], figure=conc_fig)
            
            fname = '%s-mtdf-conc-fig' % name
            html_writer.embed_matplotlib_figure(conc_fig, width=640, height=480,
                                                name=fname)

            self.html_writer.write('</div>')
Ejemplo n.º 20
0
figid = pylab.figure(num=3, figsize=(8, 6))
pn = figid.add_subplot(1, 1, 1)
pn.plot(freq, power)
pn.set_title('Power Spectrum of Sunspot Data')
pn.set_xlabel('Frequency (cycles/Year)')
pn.set_ylabel('|FFT(f(t))|$^2$')

period = 1. / freq

fig = pylab.figure(figsize=(8, 6))
pylab.plot(period, power)
pylab.xlim(0., 100.)
pylab.title('Power Spectrum of Sunspot Data')
pylab.xlabel('Period (Years/cycle)')
pylab.ylabel('|FFT(f(t))|^2')

# Find finite values
indFinite = pylab.where(pylab.isfinite(power))[0]

# Find index where |F|^2 is maximum
maxpower = power[indFinite].max()
ind = pylab.where(power[indFinite] == maxpower)[0]

# Peak value
pylab.plot(period[indFinite[ind]], power[indFinite[ind]], marker='o')

# Annotation
pylab.text(period[ indFinite[ ind ] ] + 3, power[ indFinite[ ind ] ], \
        'Period = %8.3f years' % period[ indFinite[ ind ] ], color='k' )
Ejemplo n.º 21
0
def weightedQuantilesByGroup(pandasDF,quantilesOf,byGroup=None,weightVar='weight',varPrefix='qtl_', varsByQuantile=None):#,suffix='',skipPlots=True,rankfileprefix=None,ginifileprefix=None,returnFilenamesOnly=False,forceUpdate=False,groupNames=None,ginisOf=None,loadAll=None,parallelSafe=None):
    #
    ##########################################################################
    ##########################################################################
    """

2013 Jan: This is derived from pystata's generateRankingData() for stata data, but this one takes pandas DataFrame instead. And we don't include ginis! (ugh). And leave plotting to a separate function, since we could return data.

e.g.: generateRankingData(pandasDF,'income', varsByQuantile=None,byGroup='year PRuid',weightVar='weight',suffix='',skipPlots=True,rankfileprefix=None,returnFilenamesOnly=False,forceUpdate=False,groupNames=None,parallelSafe=None):

As for the "varsByQuantile", those can easily be done in pandas using cut and groupby etc... not done yet.

This no longer creates files. It returns an augmented DataFrame.
Does not allow more than one variable for quantilesOf.
    """

    df=pandasDF
    from scipy import stats
    assert quantilesOf in df
    if isinstance(byGroup,str):
        byGroup=byGroup.split(' ')

    import numpy as np

    newvar=varPrefix+quantilesOf
    df[newvar]=np.nan
    def getq(adf):
        # If I remove the .values from the following, it fails to preserve order.
        ww=weightedQuantile(adf[quantilesOf].values,adf[weightVar].values)
        adf[newvar]=ww
        assert ww is np.nan or len(ww)==len(adf)
        return(adf)

    print 'Calculating quantiles...', #,end=' ')
    withquantiles=df.groupby(byGroup,group_keys=False).apply(getq)
    print(' [Done]')
    return(withquantiles)
    # 2013 Feb. Also calculate varsByQuantile, if desired.
    if varsByQuantile==None:
        varsByQuantile==[]
    assert all(vbq in df for vbq in varsByQuantile)
    assert not varsByQuantile
    if 0: # NOT WRITTEN YET!!!!!!!!!!!!!!!!!!!!!!!
        for iv,vname in enumerate(varsByQuantile+[quantilesOf]):
            # Use values with weights:
            vvww=[  finiteValues(array([respondent[vname] for respondent in byQtl[qtl]]),
				   array([respondent[weightVar] for respondent in byQtl[qtl]])
				   ) for qtl in pQtl]

            #qtlStats['uw_'+vname]=[np.mean(
            #            finiteValues(array([respondent[vname] for respondent in byQtl[qtl]]))
            # )                    for qtl in pQtl]
            qtlStats[vname]=[wtmean(vv,weights=ww) for vv,ww in vvww]
            #qtlStats['uw_se'+vname]=[stats.sem(
            #            finiteValues(array([respondent[vname] for respondent in byQtl[qtl]]))
            #            )             for qtl in pQtl]
            qtlStats['se'+vname]=[wtsem(vv,ww) for vv,ww in vvww]

	    # Ugly kludge:
	    if vname in ['SWL','lifeToday']:

                vvall,wwall=finiteValues(array([respondent[vname] for respondent in groupDfinite]),
				   array([respondent[weightVar] for respondent in groupDfinite]))
		from pylab import histogram,array
                qtlStats['hist'+vname]=histogram(vvall,bins=-0.5+array([0,1,2,3,4,5,6,7,8,9,10,11]),weights=wwall)


	    # Shall I also calculate Gini here? It seems it may be much faster than Stata's version. #:(, Though I won't have a standard error for it.
	    if doGini and (ginisOf is None or vname in ginisOf):
                # n.b. I don't just want the ones with finite rankVar. So go back to groupD:
                xxV=array([respondent[vname] for respondent in groupD])
		macroInequalities[agroup]['gini'+vname]= cpblGini(weightD,xxV)


		#print "             %s=%s: Gini=%f"%(byGroup,agroup,inequality.Gini)

            # ne=where(logical_and(logical_and(isfinite(x),isfinite(y)),logical_and(isfinite(yLow),isfinite(yHigh))))


            #vQtl=array([stats.mean(finiteValues(
            #            vv[find(logical_and(y<=yQtl[iq] , y>=([min(y)]+yQtl)[iq]))]      )) for iq in range(len(yQtl))])
            #sevQtl=array([stats.sem(finiteValues(
            #            vv[find(logical_and(y<=yQtl[iq] , y>=([min(y)]+yQtl)[iq]))]      )) for iq in range(len(yQtl))])

























    return(withquantiles) 





    if 0:
        def assignQs(x,w):#adf, xv,wv)
            from scipy import interpolate
            import numpy as np
            #w,x=adf[wv],adf[xv]
            CDF=np.cumsum(w)*1.0/sum(w)
            # interp1d returns a function...
            qinterp=interpolate.interp1d(np.array(CDF),np.array(x))
            return([np.nan if np.isnan(xi) else qinterp(xi) for xi in x])
        # else: # Return a value for quantile q
        #            return(interpolate.interp1d(array(CDF),array(x))(q))

    #quantiles=df.groupby(byGroup).apply(lambda adf: assignQs(adf[quantilesOf],adf[weightVar]))








    bb=quantiles0[quantiles0['PRuid']==24]
    plt.plot(bb['qtl_lnHHincome'],bb['lnHHincome'],'.')   
    plt.show()
    iuiui
    # as_index=False makes it so that the eventual returned value is not grouped!
    print 'Calculating quantiles...', #,end=' ')
    quantiles=df.groupby(byGroup, as_index=False).apply(lambda adf: weightedQuantile(adf[quantilesOf],adf[weightVar]))
    print(' [Done]')





    quantilesi=df.groupby(byGroup, group_keys=False).apply(lambda adf: weightedQuantile(adf[quantilesOf],adf[weightVar]))

    xdf=df.groupby(byGroup).transform(lambda adf: weightedQuantile(adf[quantilesOf],adf[weightVar]))
    #df.merge(quantilesi
    ###links2=links.merge(pd.DataFrame(fuelByStateYear),how='left',left_on=['MIN_AGE','state'],right_on=['year','state'])



    fooo
    # OLD FUNCTION BELOW

    from pylab import figure,plot,show,clf,arange,floor,array,find,logical_and,where,isfinite,xlabel,ylabel,cumsum,subplot,rcParams
    rcParams.update({'text.usetex': False,}) #Grrr. need it for plusminus sign, but can't deal with all foreign characters in country and region names?!
    import numpy as np
    from cpblUtilities import plotWithEnvelope,transLegend,savefigall,sortDictsIntoQuantiles,finiteValues,shelfSave,shelfLoad
    # Because numpy and scipy don't have basic weight option in mean, sem !!!
    from cpblUtilities import wtmean,wtsem,wtvar
    from inequality import ineq,cpblGini



    if byGroup==None:
        byGroup=''
    if varsByQuantile==None:
        varsByQuantile==[]
    if suffix:
        suffix='-'+suffix
    assert isinstance(byGroup,str)
    #tsvFile=WP+stripWPdta(stataFile)+'-qtlInput'+suffix+'.tsv'
    microQuantFile=WP+stripWPdta(stataFile)+'-qtlData'+suffix+'.tsv'
    macroQuantFileShelf=WP+stripWPdta(stataFile)+'-qtlData-'+byGroup+suffix+'.pyshelf'
    macroQuantFile=WP+stripWPdta(stataFile)+'-qtlData-'+byGroup+suffix+'.tsv'
    macroGiniFile=WP+stripWPdta(stataFile)+'-gini-'+byGroup+suffix+'.tsv'
    plotfileprefix=WP+'graphics/TMPRANK'
    if rankfileprefix:
        microQuantFile=rankfileprefix+'-'+byGroup+'.tsv'
        macroQuantFileShelf=rankfileprefix+'-'+byGroup+'.pyshelf'
        macroQuantFile=rankfileprefix+'-'+byGroup+'.tsv'
	plotfileprefix=WP+'graphics/'+stripWPdta(rankfileprefix)+byGroup
    if ginifileprefix:
	    macroGiniFile=ginifileprefix+'-'+byGroup+'.tsv'
    if not fileOlderThan([microQuantFile,macroQuantFileShelf]+doGini*[macroGiniFile],WPdta(stataFile)) and not forceUpdate:
        print '    (Skipping generateRankingData; no need to update %s/%s from %s...)'%(microQuantFile,macroQuantFileShelf,stataFile)
        return(os.path.splitext(microQuantFile)[0],os.path.splitext(macroQuantFileShelf)[0])
        #return(microQuantFile,macroQuantFileShelf)

    # Suffix is used in following to ensure that different calls to this function get the correct result exported from Stata, etc, (see notes in fcn below).
    # Caution! if
    onlyVars=None
    if not loadAll:
        onlyVars=' '.join(uniqueInOrder(inVars+[byGroup, quantilesOf]+varsByQuantile+[weightVar]))
    # If parallelSafe, Make the following force-updated, to avoid using shelve/shelf files simultanously by different processes!!
    dddT=loadStataDataForPlotting(stataFile,onlyVars=onlyVars,treeKeys=[byGroup],forceUpdate='parallel' if parallelSafe else forceUpdate,suffix=suffix)#vectors=True)#False,forceUpdate=False,singletLeaves=False):

    # Testing functionality aug 2012 to make this robust to weight variable not existing for all in dataset:
    for kk in dddT:
        plen=len(dddT[kk])
        dddT[kk]=[rrrr for rrrr in dddT[kk] if isfinite(rrrr[weightVar])]
        if not len(dddT[kk])==plen:
            print('CAUTION: I found and ditched some (%d/%d) individuals without weight %s for group %s in generateRankingData'%(plen-len(dddT[kk]),plen,weightVar,kk))

    if 0:
        from dictTrees import dictTree
        kk=ddd.keys()
        #for byKey in byGroup
        print 'Sorting by key...'
        dddT=dictTree([dict([[akey,ddd[akey][irow]] for akey in kk]) for irow in range(len(ddd[kk[0]]))],[byGroup])

    # Now.. Order these and assign ranking (between 0 and 1):  This should take into account the weights, properly.
    print '%d elements have no group (%s).'%(len(dddT.get('',[])),byGroup)
    rankGroups=[]
    macroStats=[]
    macroInequalities={}
    if not skipPlots:
        figure(126)
        clf()
        figure(124)
    for agroup in sorted(dddT.keys()):#.keys()[0:10]:
        if not agroup:
            continue
        groupD=dddT[agroup]
        weightD=array([respondent[weightVar] for respondent in groupD])
        groupDfinite=[xx for xx in groupD if isfinite(xx[quantilesOf]) ]
        # Hm, does the following fail if I include the nan's!?
        groupDfinite.sort(key=lambda x:x[quantilesOf])
	if doGini:
            macroInequalities[agroup]={byGroup:agroup}

        if 0: # I'm eliminating the following, unweighted ranking for now.
            if len(groupDfinite)==0:
                continue
            if len(groupDfinite)==1:
                groupDfinite[0]['rank'+quantilesOf]=0.5
            else:
                for iRank,respondent in enumerate(groupDfinite):
                    # THIS IS WRONG!!!!!!!!!! IT IGNORES WEIGHT. I SHOULD BE USING WEIGHTED RANK. I DO THIS BELOW. CANNOT FIND scipy ROUTINE TO DO QUANTILES WITH SAMPLE WEIGHTS.
                    respondent['rank'+quantilesOf]=iRank*1.0/(len(groupDfinite)-1)
                    x=array([respondent['rank'+quantilesOf] for respondent in groupDfinite])
        y=array([respondent[quantilesOf] for respondent in groupDfinite])
        w=array([respondent[weightVar] for respondent in groupDfinite])


        # Now, I also need to section these up into groups, in order to calculate other variables by quantile. How to do this? I could use a kernel smoothing, to estimate y(I), where, e.g. y is SWB and I is income.  OR I could calculate quantiles. e.g. qtlY(I) would be the mean y amongst all those in the ith quantile. I'll do the latter. This means that curves will NOT represent y(I), since it's mean(y) but i<I.
        minN=20
        nQuantiles=min(25,floor(len(y)/minN))
        pQtl=(1.0+1.0*arange(nQuantiles))/nQuantiles
        assert len(pQtl)==nQuantiles

        assert all(isfinite(w))  # Really? Couldn't I make this robust... [aug2012: okay, i have, above, by modifying ddTT]

        # Use my nifty sort-into-quantiles function
        minN=20
        if len(y)<minN/2:
            print ' SKIPPING '+agroup+' with only %d respondents...'%len(y)
            continue
        nQuantiles=max(2,min(25,floor(len(y)/minN)))
        # The following function ALSO fills in a new element of the weighted rank of each individual.
        byQtl=sortDictsIntoQuantiles(groupD,sortkey=quantilesOf,weightkey=weightVar,approxN=25,)#nQuantiles=min(25,floor(len(y)/minN)))
        pQtl=sorted(byQtl.keys())
        print '   Quantiles: parsing for group %s=%20s,\t with %d respondents,\t with %d having rank variable;\t therefore using %d quantiles...'%(byGroup,agroup,len(groupDfinite),len(finiteValues(y)),len(pQtl))


        # So since sortDictsIntoQ... filled in individual ranks, I can now plot these:
        x=array([respondent['rank'+quantilesOf[0].upper()+quantilesOf[1:]] for respondent in groupDfinite])
        if not skipPlots:
            figure(126)
            clf()
            subplot(121)
            plot(y,x,hold=True)
            xlabel(substitutedNames(quantilesOf))
            ylabel('Quantile')
	    print 'More up to date plots are made by a custom function using the .shelf data, in regressionsInequality'

        #print [stats.mean([gg['lnHHincome'] for gg in byQtl[qq]])  for qq in pQtl]
        #print [stats.mean([gg['lifeToday'] for gg in byQtl[qq]])  for qq in pQtl]


        # Cool! That worked nicely, and is even quite efficient.

        # I wonder how byQtl.keys() compares with the unweighted measure below...    (uses approximately quantile unbiased (Cunnane) parameters)
        yQtl2=stats.mstats.mquantiles(y, prob=pQtl, alphap=0.40000000000000002, betap=0.40000000000000002, axis=None, limit=())


        # Now calculate weighted means for variables of interest within each quantile group:
        qtlStats={'qtl':pQtl,'group':agroup}
        # Also save in the output any variables which are uniform within this group (ie markers of a group in which this is a subgroup):
        if 0:
            for vvv in [vv for vv in inVars if vv not in [byGroup]]:
                if all(array([respondent[vvv] for respondent in groupDfinite])==groupDfinite[0][vvv]): # ah, this variable is uniform within the group
                    qtlStats[vvv]=groupDfinite[0][vvv]

        qtlStats['n']=[ len(
                        finiteValues(array([respondent[quantilesOf] for respondent in byQtl[qtl]]))
                        )             for qtl in pQtl]
        for iv,vname in enumerate(varsByQuantile+[quantilesOf]):
            # Use values with weights:
            vvww=[  finiteValues(array([respondent[vname] for respondent in byQtl[qtl]]),
				   array([respondent[weightVar] for respondent in byQtl[qtl]])
				   ) for qtl in pQtl]

            #qtlStats['uw_'+vname]=[np.mean(
            #            finiteValues(array([respondent[vname] for respondent in byQtl[qtl]]))
            # )                    for qtl in pQtl]
            qtlStats[vname]=[wtmean(vv,weights=ww) for vv,ww in vvww]
            #qtlStats['uw_se'+vname]=[stats.sem(
            #            finiteValues(array([respondent[vname] for respondent in byQtl[qtl]]))
            #            )             for qtl in pQtl]
            qtlStats['se'+vname]=[wtsem(vv,ww) for vv,ww in vvww]

	    # Ugly kludge:
	    if vname in ['SWL','lifeToday']:

                vvall,wwall=finiteValues(array([respondent[vname] for respondent in groupDfinite]),
				   array([respondent[weightVar] for respondent in groupDfinite]))
		from pylab import histogram,array
                qtlStats['hist'+vname]=histogram(vvall,bins=-0.5+array([0,1,2,3,4,5,6,7,8,9,10,11]),weights=wwall)


	    # Shall I also calculate Gini here? It seems it may be much faster than Stata's version. #:(, Though I won't have a standard error for it.
	    if doGini and (ginisOf is None or vname in ginisOf):
                # n.b. I don't just want the ones with finite rankVar. So go back to groupD:
                xxV=array([respondent[vname] for respondent in groupD])
		macroInequalities[agroup]['gini'+vname]= cpblGini(weightD,xxV)


		#print "             %s=%s: Gini=%f"%(byGroup,agroup,inequality.Gini)

            # ne=where(logical_and(logical_and(isfinite(x),isfinite(y)),logical_and(isfinite(yLow),isfinite(yHigh))))


            #vQtl=array([stats.mean(finiteValues(
            #            vv[find(logical_and(y<=yQtl[iq] , y>=([min(y)]+yQtl)[iq]))]      )) for iq in range(len(yQtl))])
            #sevQtl=array([stats.sem(finiteValues(
            #            vv[find(logical_and(y<=yQtl[iq] , y>=([min(y)]+yQtl)[iq]))]      )) for iq in range(len(yQtl))])


            if (not skipPlots) and vname in varsByQuantile:
                figure(126)
                subplot(122)
                colors='rgbckm'
                vQtl= array(qtlStats[vname])
                sevQtl= array(qtlStats['se'+vname])
                pQtl=array(pQtl)
                plotWithEnvelope(pQtl,vQtl,vQtl+sevQtl,vQtl-sevQtl,linestyle='.-',linecolor=None,facecolor=colors[iv],alpha=0.5,label=None,lineLabel=None,patchLabel=vname,laxSkipNaNsSE=True,laxSkipNaNsXY=True,ax=None,skipZeroSE=True) # Why do I seem to need both lax flags?
                plot(pQtl,vQtl,'.',color=colors[iv],alpha=0.5)
                xlabel(substitutedNames(quantilesOf) +' quantile')

            ##ylabel(vname)
        from cpblUtilities import str2pathname
        if not skipPlots:
            transLegend(comments=[groupNames.get(agroup,agroup),r'$\pm$1s.e.'],loc='lower right')
            savefigall(plotfileprefix+'-'+str2pathname(agroup))
        rankGroups+=groupDfinite
        macroStats+=[qtlStats]


	if 0*'doRankCoefficients':
		groupVectors=dict([[kk,[gd[kk] for gd in groupDfinite ]] for kk in groupDfinite[0].keys()])
		from cpblUtilities import cpblOLS
		x=cpblOLS('lifeToday',groupVectors,rhsOnly=[ 'rankHHincome'],betacoefs=False,weights=groupVectors['weight'])
		foioi

        # assert not 'afg: Kabul' in agroup
        # Add the quantile info for this group to the data. Also, compile the summary stats for it.

#[, 0.25, 0.5, 0.75]
        # Centre a series of quantiles
        """
	No. Create 20 quantiles. Assign. if none there, weight nearest?

	e.g. 1  2 10 13


	scipy.stats.mstats.mquantiles

	scipy.stats.mstats.mquantiles(data, prob=[, 0.25, 0.5, 0.75], alphap=0.40000000000000002, betap=0.40000000000000002, axis=None, limit=())

	"""


    from cpblUtilities import dictToTsv
    dictToTsv(rankGroups,microQuantFile)
    tsv2dta(microQuantFile)
    if doGini:
        dictToTsv(macroInequalities.values(),macroGiniFile)
	tsv2dta(macroGiniFile)

    shelfSave(macroQuantFileShelf,macroStats)
    if 0: # whoooo... i think this was totally misguided. it's not a macro file..
        dictToTsv(macroStats,macroQuantFile)
        tsv2dta(macroQuantFile)

    #vectorsToTsv(qtlStats,macroQuantFile)
    #tsv2dta(macroQuantFile)

    #inequality,redundancy,equality,variation,thesum,absolute=ineq(zip(popn,wealth))

    return(os.path.splitext(microQuantFile)[0],os.path.splitext(macroQuantFileShelf)[0])
Ejemplo n.º 22
0
def filter_finite(data):

    return pl.all(data[pl.isfinite(data.values.astype(float))], axis=1)
Ejemplo n.º 23
0
    def apply(self, sim):

        t = sim.t
        if t < self.start_day:
            return
        elif self.end_day is not None and t > self.end_day:
            return

        # Check that there are still tests
        rel_t = t - self.start_day
        if rel_t < len(self.daily_tests):
            n_tests = int(
                self.daily_tests[rel_t] /
                sim.rescale_vec[t])  # Number of tests for this day -- rescaled
            if not (n_tests and pl.isfinite(n_tests)
                    ):  # If there are no tests today, abort early
                return
            else:
                sim.results['new_tests'][t] += n_tests
        else:
            return

        test_probs = np.ones(
            sim.n)  # Begin by assigning equal testing probability to everyone

        # Calculate test probabilities for people with symptoms
        symp_inds = cvu.true(sim.people.symptomatic)
        symp_test = self.symp_test
        if self.pdf:  # Handle the onset to swab delay
            symp_time = cvd.default_int(t -
                                        sim.people.date_symptomatic[symp_inds]
                                        )  # Find time since symptom onset
            inv_count = (
                np.bincount(symp_time) / len(symp_time)
            )  # Find how many people have had symptoms of a set time and invert
            count = np.nan * np.ones(inv_count.shape)  # Initialize the count
            count[inv_count != 0] = 1 / inv_count[
                inv_count != 0]  # Update the counts where defined
            symp_test *= self.pdf.pdf(symp_time) * count[
                symp_time]  # Put it all together

        test_probs[symp_inds] *= symp_test  # Update the test probabilities

        # Handle symptomatic testing, taking into account prevalence of ILI symptoms
        if self.ili_prev is not None:
            if rel_t < len(self.ili_prev):
                n_ili = int(
                    self.ili_prev[rel_t] *
                    sim['pop_size'])  # Number with ILI symptoms on this day
                ili_inds = cvu.choose(
                    sim['pop_size'], n_ili
                )  # Give some people some symptoms. Assuming that this is independent of COVID symptomaticity...
                ili_inds = np.setdiff1d(ili_inds, symp_inds)
                test_probs[ili_inds] *= self.symp_test

        # Handle quarantine testing
        quar_inds = get_quar_inds(self.quar_policy, sim)
        test_probs[quar_inds] *= self.quar_test

        # Handle any other user-specified testing criteria
        if self.subtarget is not None:
            subtarget_inds, subtarget_vals = get_subtargets(
                self.subtarget, sim)
            test_probs[
                subtarget_inds] = test_probs[subtarget_inds] * subtarget_vals

        # Don't re-diagnose people
        diag_inds = cvu.true(sim.people.diagnosed)
        test_probs[diag_inds] = 0.

        # Now choose who gets tested and test them
        test_inds = cvu.choose_w(probs=test_probs, n=n_tests, unique=False)
        sim.people.test(test_inds,
                        self.sensitivity,
                        loss_prob=self.loss_prob,
                        test_delay=self.test_delay)

        return
Ejemplo n.º 24
0
     data[i_scan] = pl.append(data[i_scan], 0)
     lentghs = map(len, data)
     dims = set(lentghs)
 dims = map(pl.shape, data)
 data = pl.vstack(data).T
 channels = pl.arange(data.shape[0])
 if doBGcorrect:
     for l in xrange(data.shape[1]):
         indf = data[:,l] < (pl.median(data[:,l]))
         poly = rt.PolynomialFit(channels, data[:,l], indf=indf)
         data[:,l] -= poly
 data /= mon
 if "3d" in colname and not saveonly:
     if "-log" in colname:
         imdata = pl.log(data)
         ind = pl.isfinite(imdata)
     else:
         imdata = data
         ind = slice(None)
     vmin[j] = 0#min(vmin[j], imdata[ind].min())
     vmax[j] = max(vmax[j], imdata[ind].max())
     thisax.imshow(pl.flipud(imdata), aspect="auto",
                      vmin=vmin[j], vmax=vmax[j],
                      extent=(x[0], x[-1], 0, data.shape[0]-1))
     thisax.set_title("#%i:  %s"%(i, scan.command()))
     thisax.grid(False)
 elif "2d" in colname:
     if "roi" in colname:
         imax = data.sum(1).argmax()
         ind = slice(imax - nint, imax + nint)
         print("Channels in roi: %i...%i"%(imax - nint, imax + nint))