Example #1
0
def trainStep(fnn, trainer, trndata, tstdata):
    trainer.trainEpochs(1)
    trnresult = percentError(trainer.testOnClassData(), trndata["class"])
    tstresult = percentError(trainer.testOnClassData(dataset=tstdata), tstdata["class"])

    print "epoch: %4d" % trainer.totalepochs, "  train error: %5.2f%%" % trnresult, "  test error: %5.2f%%" % tstresult

    out = fnn.activateOnDataset(griddata)
    out = out.argmax(axis=1)  # the highest output activation gives the class
    out = out.reshape(X.shape)

    figure(1)
    ioff()  # interactive graphics off
    clf()  # clear the plot
    hold(True)  # overplot on
    for c in [0, 1, 2]:
        here, _ = where(trndata["class"] == c)
        plot(trndata["input"][here, 0], trndata["input"][here, 1], "o")
    if out.max() != out.min():  # safety check against flat field
        contourf(X, Y, out)  # plot the contour
    ion()  # interactive graphics on
    draw()  # update the plot

    figure(2)
    ioff()  # interactive graphics off
    clf()  # clear the plot
    hold(True)  # overplot on
    for c in [0, 1, 2]:
        here, _ = where(tstdata["class"] == c)
        plot(tstdata["input"][here, 0], tstdata["input"][here, 1], "o")
    if out.max() != out.min():  # safety check against flat field
        contourf(X, Y, out)  # plot the contour
    ion()  # interactive graphics on
    draw()  # update the plot
Example #2
0
def get_indices(arr, vals, disp=False):
    """

    Get the indices of all the elements between vals[0] and vals[1].
    Alternatively also between vals[2] and vals[3] if they are given.

    Input:
        arr  : the array in which to look for the elements
        vals : a list with either 2 or 4 values that corresponds
               limits inbetween which the indices of the values

    Optional argument(s):
        disp : Bolean parameter, if True it displays start and end
               index and the number of channels inbetween. Only works
               for value lists of length 2.

    Assumes the values in 'arr' is the mid values and that it is evenly
    spaced for all values.

    ********************** Important! **********************************
    The output indices are Python friendly, i.e. they are 0-based. Take
    when using the indices in other software e.g. GILDAS, MIRIAD, which
    are 1-based.

    --------------------------------------------------------------------

                            oOO Changelog OOo

    *2012/02
        Added more documentation, "important" notice about indexing
    *2011/07
        Removed +1 in the output indices to be compatible with rest of
        module, where Pythons 0-based indexing is used.
    *2010/12
        Doc written
    *2010/06
        Funciton created
    """

    from scipy import concatenate, where, array, diff

    dx = abs(0.5 * diff(arr)[0])
    if len(vals) == 4:
        v1, v2, v3, v4 = vals + array([-1, 1, -1, 1]) * dx
        # if the user wants two velocity areas to calculate noise
        low = where((arr >= v1) * (arr <= v2))[0]
        high = where((arr >= v3) * (arr <= v4))[0]
        channels = concatenate((low, high))
    elif len(vals) == 2:
        v1, v2 = vals + array([-1, 1]) * dx
        # channels = where((arr>=v1)*(arr<v2))[0]+1
        # this is because if +1 it is FITS/Fortran safe
        # changed: removed +1 for consistency in program
        channels = where((arr >= v1) * (arr <= v2))[0]
    #
    if disp and len(vals) == 2:
        first, last = channels.min(), channels.max()
        n = last - first + 1
        print "\nFirst: %d,\n Last: %d\n Nchan: %d\n" % (first, last, n)
    return channels
Example #3
0
def make_introns_feasible(introns, genes, CFG):
# introns = make_introns_feasible(introns, genes, CFG)

    tmp1 = sp.array([x.shape[0] for x in introns[:, 0]])
    tmp2 = sp.array([x.shape[0] for x in introns[:, 1]])
    
    unfeas = sp.where((tmp1 > 200) | (tmp2 > 200))[0]
    print >> CFG['fd_log'], 'found %i unfeasible genes' % unfeas.shape[0]

    while unfeas.shape[0] > 0:
        ### make filter more stringent
        CFG['read_filter']['exon_len'] = min(36, CFG['read_filter']['exon_len'] + 4)
        CFG['read_filter']['mincount'] = 2 * CFG['read_filter']['mincount']
        CFG['read_filter']['mismatch'] = max(CFG['read_filter']['mismatch'] - 1, 0)

        ### get new intron counts
        tmp_introns = get_intron_list(genes[unfeas], CFG)
        introns[unfeas, :] = tmp_introns

        ### stil unfeasible?
        tmp1 = sp.array([x.shape[0] for x in introns[:, 0]])
        tmp2 = sp.array([x.shape[0] for x in introns[:, 1]])

        still_unfeas = sp.where((tmp1 > 200) | (tmp2 > 200))[0]
        idx = sp.where(~sp.in1d(unfeas, still_unfeas))[0]

        for i in unfeas[idx]:
            print >> CFG['fd_log'], '[feasibility] set criteria for gene %s to: min_ex %i, min_conf %i, max_mism %i' % (genes[i].name, CFG['read_filter']['exon_len'], CFG['read_filter']['mincount'], CFG['read_filter']['mismatch'])
        unfeas = still_unfeas;

    return introns
Example #4
0
def rectangle(iterable):
  """Turn the masks of an accessor iterable into the coordinates of an rectangle
  surrounding all the segements points.

  The coordinates are normalized to the interval [-1, 1]."""
  # TODO: make this work for multiclass data.
  for sample, target in iterable:
    orig_shape = target.shape[1], target.shape[2]
    target.shape = scipy.size(target) / 2, 2
    classes = target[:, 0].copy()
    classes.shape = orig_shape

    indices = scipy.where(classes.sum(axis=0) >= 1)
    min0, max0 = indices[0][0], indices[0][-1]
    indices = scipy.where(classes.sum(axis=1) >= 1)
    min1, max1 = indices[0][0], indices[0][-1]
    print min0, max0, min1, max1

    # Normalize.
    normalize = lambda x, rng: 2. * x / rng - 1
    size0, size1 = classes.shape[0], classes.shape[1]
    min0 = normalize(min0, size0)
    max0 = normalize(max0, size0)
    min1 = normalize(min1, size1)
    max1 = normalize(max1, size1)

    target = scipy.array((min0, max0, min1, max1))
    print target
    yield sample, target
def get_concentration_functions(composition_table_dict):

    meta = composition_table_dict['meta']
    composition_table = Table.from_dict(composition_table_dict['data'])
    elements = [col for col in composition_table.columns if col not in meta]
    x = composition_table["X"].values
    y = composition_table["Y"].values
    cats = composition_table["X"].unique()
    concentration, conc, d, y_c, functions = {}, {}, {}, {}, RecursiveDict()

    for el in elements:
        concentration[el] = to_numeric(composition_table[el].values)/100.
        conc[el], d[el], y_c[el] = {}, {}, {}

        if meta['X'] == 'category':
            for i in cats:
                k = '{:06.2f}'.format(float(i))
                y_c[el][k] = to_numeric(y[where(x==i)])
                conc[el][k] = to_numeric(concentration[el][where(x==i)])
                d[el][k] = interp1d(y_c[el][k], conc[el][k])

            functions[el] = lambda a, b, el=el: d[el][a](b)

        else:
            functions[el] = interp2d(float(x), float(y), concentration[el])

    return functions
 def __init__(self, which_case, LUT, RandomSamples, interp_type):
     print 'SciPy Interpolating ', which_case
     
     select = {\
     "rhoe":('Density','StaticEnergy'),\
     "PT":('Pressure','Temperature'),\
     "Prho":('Pressure','Density'),\
     "rhoT":('Density','Temperature'),\
     "Ps":('Pressure','Entropy'),\
     "hs":('Enthalpy','Entropy')\
     }
     
     thermo1, thermo2, = select[which_case]
     x =getattr(LUT,thermo1)
     y =getattr(LUT,thermo2)
     samples_x = getattr(RandomSamples,thermo1)
     samples_y = getattr(RandomSamples,thermo2)
     setattr(self,thermo1, samples_x)
     setattr(self,thermo2, samples_y)
     
     variables = sp.array(['Temperature','Density','Enthalpy','StaticEnergy',\
     'Entropy','Pressure','SoundSpeed2','dPdrho_e','dPde_rho',\
     'dTdrho_e','dTde_rho','Cp','Mu','Kt']);
     
     for var in variables[sp.where((variables!=thermo1) * (variables!=thermo2))]:
         z = getattr(LUT,var)            
         interp_func = sp.interpolate.griddata((x,y),z,sp.column_stack((samples_x,samples_y)),\
         method=interp_type) 
         nan_index = sp.where(sp.isnan(interp_func))
         interp_func[nan_index]= sp.interpolate.griddata((x,y),z,\
         sp.column_stack((samples_x[nan_index],samples_y[nan_index])),\
         method='nearest') 
         setattr(self,var,interp_func)
         
     return  
Example #7
0
def getEncodedData(filename,encoding="additive",phenotype_id=None,maf=0.0):
    f = h5py.File(filename,'r')
    phenotype_id = str(phenotype_id)
    if not phenotype_id==None:
        sample_ids = f['Genotype/sample_ids'][:]
        p_sample_ids = f['Phenotypes'][phenotype_id]['sample_ids'][:]
        y = f['Phenotypes'][phenotype_id]['y'][:]
        ind = sp.where(~sp.isnan(y))[0]
        y = y[ind]
        p_sample_ids = p_sample_ids[ind]
        ind = (sp.reshape(sample_ids,(sample_ids.shape[0],1))==p_sample_ids).nonzero()
        raw = f['Genotype/raw'][:]
        raw = raw[ind[0],:]
        [encoded,maf_v] = encodeHeterozygousData(raw)
        ind = sp.where(maf_v>=maf)[0]
        encoded = encoded[:,ind]
        identifiers = f['Genotype/identifiers'][:]
        identifiers = identifiers[ind]
        maf_v = maf_v[ind]
        f.close()
        return [encoded,maf_v,identifiers]
    if encoding=="additive":
        if 'encoded_additive' in f['Genotype'].keys():
            encoded = f['Genotype/encoded_additive'][:]
            maf_v = f['Genotype/global_maf'][:]
        else:
            [encoded,maf_v] = encodeHeterozygousData(f['Genotype/raw'][:])
    identifiers = f['Genotype/identifiers'][:]
    f.close()
    return [encoded,maf_v,identifiers]
Example #8
0
  def scanSound(self, source, minnotel):
    binarized = source
    scale = 60. / self.wavetempo * (binarized[0].size / self.duration)
    noise_length = scale*minnotel

    antinoised = sp.zeros_like(binarized)

    for i in range(sp.shape(binarized)[0]):
      new_line = binarized[i, :].copy()
      diffed = sp.diff(new_line)
      ones_keys = sp.where(diffed == 1)[0]
      minus_keys = sp.where(diffed == -1)[0]
      
      if(ones_keys.size != 0 and minus_keys.size != 0):
        if(ones_keys[0] > minus_keys[0]):
          new_line = self.cutNoise(
              (0, minus_keys[0]), noise_length, new_line)
          minus_keys = sp.delete(minus_keys, 0)

        if(ones_keys[-1] > minus_keys[-1]):
          new_line = self.cutNoise(
              (ones_keys[-1], new_line.size-1), noise_length, new_line)
          ones_keys = sp.delete(ones_keys, -1)

        for j in range(sp.size(ones_keys)):
          new_line = self.cutNoise(
              (ones_keys[j], minus_keys[j]), noise_length, new_line)

        antinoised[i, :] = new_line

    return antinoised
Example #9
0
 def cryptoInternal(self, data, base):
     addresses = scipy.array(range(base, base + (len(data) * 2), 2), scipy.uint32)
     for mask, xorVal in self.XOR_TABLE1:
         data = scipy.where((addresses & mask) == mask, data ^ xorVal, data)
     for mask, xorVal in self.XOR_TABLE2:
         data = scipy.where((addresses & mask) != 0,    data ^ xorVal, data)
     return data
Example #10
0
 def smart_threshold(self):
     self.median = numpy.median(self.data)
     self.std = numpy.std(self.data)
     blank = scipy.where(self.data < self.median+0.25*self.std)
     signal = scipy.where(self.data > self.median+0.25*self.std)
     self.data[blank] = 0.0
     self.data[signal] = 1.0
 def maskLowStddVoxels(self, dds, nMeanDds, nStddDds):
     unique = np.unique(sp.where(nStddDds.subd.asarray() <= 1.0/3, dds.subd.asarray(), dds.mtype.maskValue()))
     unique = unique[sp.where(unique != dds.mtype.maskValue())]
     if (dds.mpi.comm != None):
         unique = dds.mpi.comm.allreduce(unique.tolist(), op=mpi.SUM)
         unique = np.unique(unique)
     rootLogger.info("Unique constant stdd values = %s" % (unique,))
     rootLogger.info("Creating mask from unique constant values...")
     mskDds = mango.zeros_like(dds, mtype="segmented")
     for uVal in unique:
         mskDds.asarray()[...] = sp.where(dds.asarray() == uVal, 1, mskDds.asarray())
     rootLogger.info("Done creating mask from unique constant values.")
 
     rootLogger.info("Labeling connected constant zero-stdd regions...")
     mskDds.updateHaloRegions()
     mskDds.mirrorOuterLayersToBorder(False)
     self.writeIntermediateDds("_000ZeroStddForLabeling", mskDds)
     lblDds = mango.image.label(mskDds, 1)
     rootLogger.info("Done labeling connected constant stdd regions.")
     self.writeIntermediateDds("_000ZeroStdd", lblDds)
     
     countThresh = 0.01 * sp.product(lblDds.shape)
     rootLogger.info("Eliminating large clusters...")
     lblDds = mango.image.eliminate_labels_by_size(lblDds, minsz=int(countThresh), val=lblDds.mtype.maskValue())
     self.writeIntermediateDds("_000ZeroStddLargeEliminated", lblDds)
 
     rootLogger.info("Assigning mask values...")
     mskDds.subd.asarray()[...] = \
         sp.where(lblDds.subd.asarray() == lblDds.mtype.maskValue(), True, False)
     self.writeIntermediateDds("_000ZeroStddMskd", mskDds)
     del lblDds
     for tmpDds in [dds, nMeanDds, nStddDds]:
         tmpDds.subd.asarray()[...] = \
             sp.where(mskDds.subd.asarray(), tmpDds.mtype.maskValue(), tmpDds.subd.asarray())
 def eliminatePercentileTails(self, mskDds, loPercentile=10.0, hiPercentile=90.0):
     """
     Trims lower and/or upper image histogram tails by replacing :samp:`mskDds`
     voxel values with :samp:`mskDds.mtype.maskValue()`. 
     """
     rootLogger.info("Eliminating percentile tails...")
     rootLogger.info("Calculating element frequencies...")
     elems, counts = elemfreq(mskDds)
     rootLogger.info("elems:\n%s" % (elems,))
     rootLogger.info("counts:\n%s" % (counts,))
     cumSumCounts = sp.cumsum(counts, dtype="float64")
     percentiles = 100.0*(cumSumCounts/float(cumSumCounts[-1]))
     percentileElems = elems[sp.where(sp.logical_and(percentiles > loPercentile, percentiles < hiPercentile))]
     loThresh = percentileElems[0]
     hiThresh = percentileElems[-1]
     rootLogger.info("Masking percentiles range (%s,%s) = (%s,%s)" % (loPercentile, hiPercentile, loThresh, hiThresh))
     mskDds.asarray()[...] = \
         sp.where(
             sp.logical_and(
                 sp.logical_and(mskDds.asarray() >= loThresh, mskDds.asarray() <= hiThresh),
                 mskDds.asarray() != mskDds.mtype.maskValue()
             ),
             mskDds.asarray(),
             mskDds.mtype.maskValue()
         )
     rootLogger.info("Done eliminating percentile tails.")
Example #13
0
 def step(self, *args):
     """First update the step size, then actually take a step along the gradient."""
     g = self.model.grad(*args);
     
     # Update the weighted Exponential sq avg.
     self.sqExpAvgGrad *= self.exponentAvgM;
     self.sqExpAvgGrad += (1-self.exponentAvgM) * g**2;
     self.sqExpAvgGrad[:] = where(self.sqExpAvgGrad < EPSILON, EPSILON, self.sqExpAvgGrad);
     
     # Uodate the muVect
     possUpdate = 1 + self.qLearningRate * g * self.expAvgGrad / self.sqExpAvgGrad
     #log.debug('max(possUpdate): %.4f,  min(possUpdate): %.4f' % (max(possUpdate), min(possUpdate)))
     ## Keep this from going negative.
     possUpdate = where(possUpdate < 0.001, 0.001, possUpdate);
     self.muVect *= possUpdate
     
     # Do something to cap the update rate.  This is allowing the step rate to overpower the decay completely
     self.muVect = where(self.muVect > self.maxMuVect, self.maxMuVect, self.muVect);
     
     # Then update the exponential average
     self.expAvgGrad *= self.exponentAvgM;
     self.expAvgGrad += (1-self.exponentAvgM) * g;
     
     self.model.params -= self.muVect * g
     Trainer.step(self,*args)
Example #14
0
    def from_gene(self, gene): 

        sg = gene.splicegraph.vertices
        breakpoints = sp.unique(sg.ravel())
        self.segments = sp.zeros((2, 0), dtype='int')
        for j in range(1, breakpoints.shape[0]):
            s = sp.sum(sg[0, :] < breakpoints[j])
            e = sp.sum(sg[1, :] < breakpoints[j])
            if s > e:
                self.segments = sp.c_[self.segments, [breakpoints[j-1], breakpoints[j]]]

        ### match nodes to segments
        self.seg_match = sp.zeros((0, sg.shape[1]), dtype='bool')
        for j in range(sg.shape[1]):
            tmp = ((sg[0, j] <= self.segments[0, :]) & (sg[1, j] >= self.segments[1, :]))
            if self.seg_match.shape[0] == 0:
                self.seg_match = tmp.copy().reshape((1, tmp.shape[0]))
            else:
                self.seg_match = sp.r_[self.seg_match, tmp.reshape((1, tmp.shape[0]))]

        ### create edge graph between segments
        self.seg_edges = sp.zeros((self.segments.shape[1], self.segments.shape[1]), dtype='bool')
        k, l = sp.where(sp.triu(gene.splicegraph.edges))

        for m in range(k.shape[0]):
            ### donor segment
            d = sp.where(self.seg_match[k[m], :])[0][-1]
            ### acceptor segment
            a = sp.where(self.seg_match[l[m], :])[0][0]
            self.seg_edges[d, a] = True
Example #15
0
    def newEpisode(self):
        if self.learning:
            params = ravel(self.explorationlayer.module.params)
            target = ravel(sum(self.history.getSequence(self.history.getNumSequences()-1)[2]) / 500)
        
            if target != 0.0:
                self.gp.addSample(params, target)
                if len(self.gp.trainx) > 20:
                    self.gp.trainx = self.gp.trainx[-20:, :]
                    self.gp.trainy = self.gp.trainy[-20:]
                    self.gp.noise = self.gp.noise[-20:]
                    
                self.gp._calculate()
                        
                # get new parameters where mean was highest
                max_cov = diag(self.gp.pred_cov).max()
                indices = where(diag(self.gp.pred_cov) == max_cov)[0]
                pick = indices[random.randint(len(indices))]
                new_param = self.gp.testx[pick]
            
                # check if that one exists already in gp training set
                if len(where(self.gp.trainx == new_param)[0]) > 0:
                    # add some normal noise to it
                    new_param += random.normal(0, 1, len(new_param))

                self.explorationlayer.module._setParameters(new_param)

            else:
                self.explorationlayer.drawRandomWeights()
        
        # don't call StateDependentAgent.newEpisode() because it randomizes the params
        LearningAgent.newEpisode(self)
 def _do_outer_iteration_stage(self):
     #Generate curve from points
     for inv_val in self._inv_points:
         #Apply one applied pressure and determine invaded pores
         logger.info('Applying capillary pressure: '+str(inv_val))
         self._do_one_inner_iteration(inv_val)
     #Store results using networks' get/set method
     self['pore.inv_Pc'] = self._p_inv
     self['throat.inv_Pc'] = self._t_inv
     #Find invasion sequence values (to correspond with IP algorithm)
     self._p_seq = sp.searchsorted(sp.unique(self._p_inv),self._p_inv)
     self._t_seq = sp.searchsorted(sp.unique(self._t_inv),self._t_inv)
     self['pore.inv_seq'] = self._p_seq
     self['throat.inv_seq'] = self._t_seq
     #Calculate Saturations
     v_total = sp.sum(self._net['pore.volume'])+sp.sum(self._net['throat.volume'])
     sat = 0.
     self['pore.inv_sat'] = 1.
     self['throat.inv_sat'] = 1.
     for i in range(self._npts):
         inv_pores = sp.where(self._p_seq==i)[0]
         inv_throats = sp.where(self._t_seq==i)[0]
         new_sat = (sum(self._net['pore.volume'][inv_pores])+sum(self._net['throat.volume'][inv_throats]))/v_total
         sat += new_sat
         self['pore.inv_sat'][inv_pores] = sat
         self['throat.inv_sat'][inv_throats] = sat
Example #17
0
def spectralSlope(wl, flux, dFlux, wlStart, wlStop, beta_guess, **kwargs):
    bm = scipy.where( (wl > wlStart) & (wl < wlStop) & numpy.isfinite(flux) )[0]

    if ( 'strongLines' in kwargs ):
       for line, width in zip(kwargs['strongLines'], kwargs['lineWidths']):
           new_bm = scipy.where( abs(wl[bm]-line) > width)
           bm = bm[new_bm[0]]

    x = wl[bm]
    y = flux[bm]
    dy = dFlux[bm]

    normalization = y[0]
    z = normalization*(x/wlStart)**beta_guess

    coeffs = [normalization, beta_guess]
    
    fitfunc = lambda p, x : p[0]*(x/wlStart)**(p[1])
    errfunc = lambda p, x, z, dz: numpy.abs((fitfunc(p, x) - z)/dz)
    pfit = scipy.optimize.leastsq(errfunc, coeffs, args=(numpy.asarray(x, dtype=numpy.float64),
    numpy.asarray(y,dtype=numpy.float64), numpy.asarray(dy,dtype=numpy.float64)), full_output = 1)

    if ( 'plt' in kwargs ):
        original = Gnuplot.Data(x, y, with_='lines')
        guess = Gnuplot.Data(x, z, with_='lines')
        new = Gnuplot.Data(x, pfit[0][0]*(x/wlStart)**(pfit[0][1]), with_='lines')
        kwargs['plt'].plot(original, guess, new)
        #raw_input()

    return pfit[0]
Example #18
0
    def populate_out_of_dip_theta(self, n, dip):

        out_of_dip = asarray(self.populate_distribution(
            self.out_of_dip_theta_dist, n))

        (errorIndexes,) = where((out_of_dip > (175 - dip)) &
                                (out_of_dip < (185 - dip)))

        if len(errorIndexes) > 0:
            for i in errorIndexes:
                blnBadNum = True
                count = 0
                while blnBadNum:
                    newNum = self.populate_distribution(
                        self.out_of_dip_theta_dist, 1)
                    if ((newNum[0] <= (175 - dip)) | (newNum[0] >= (185 - dip))):
                        blnBadNum = False
                    count = count + 1
                    if count > 1000:
                        msg = "Bad out of dip theta range in fault \
                                     source file"
                        raise IOError(msg)
                out_of_dip[i] = newNum[0]
        (errorIndexes,) = where((out_of_dip > (175 - dip)) &
                                (out_of_dip < (185 - dip)))

        if len(errorIndexes) > 0:
            msg = "Bad out of dip theta range in fault \
                                     source file"
            raise IOError(msg)

        return out_of_dip
Example #19
0
def binSyntheticSpectrum(spectrum, native_wl, new_wl):
    """
        This routine pixelates a synthetic spectrum, in effect simulating the 
        discrete nature of detector pixels.
    """
    retval = numpy.zeros(len(new_wl))
    for i in range(len(new_wl)-1):
        bm = scipy.where( (native_wl > new_wl[i]) & (
            native_wl <= new_wl[i+1]))[0]
        if (len(bm) > 1):
            num=scipy.integrate.simps(spectrum[bm], x=native_wl[bm])
            denom = max(native_wl[bm]) - min(native_wl[bm])
            retval[i] = num/denom
        elif (len(bm) == 1):
            retval[i] = 0.0#native_wl[bm]
        else:
            retval[i] = 0.0#retval[-1]

    bm = scipy.where(native_wl > new_wl[-1])[0]
    if len(bm) > 1:
        num = scipy.integrate.simps(spectrum[bm], x=native_wl[bm])
        denom = max(native_wl[bm]) - min(native_wl[bm])
        retval[-1] = num/denom
    else:
        if len(bm) == 1:
            retval[-1] = spectrum[bm]
        else:
            retval[-1] = spectrum[-1]

    return retval
Example #20
0
def fit_dispersion(counts, disp_raw, disp_conv, sf, CFG, dmatrix1):

    mean_count = sp.mean(counts / sf, axis=1)[:, sp.newaxis]
    index = sp.where(disp_conv)[0]

    lowerBound = sp.percentile(sp.unique(disp_raw[index]), 1)
    upperBound = sp.percentile(sp.unique(disp_raw[index]), 99)

    idx = sp.where((disp_raw > lowerBound) & (disp_raw < upperBound))[0]

    matrix = sp.ones((idx.shape[0], 2), dtype='float')
    matrix[:, 0] /= mean_count[idx].ravel()

    modGamma = sm.GLM(disp_raw[idx], matrix, family=sm.families.Gamma(sm.families.links.identity))
    res = modGamma.fit()
    Lambda = res.params

    disp_fitted = disp_raw.copy()
    ok_idx = sp.where(~sp.isnan(disp_fitted))[0]
    disp_fitted[ok_idx] = Lambda[0] / mean_count[ok_idx] + Lambda[1]

    if sp.sum(disp_fitted > 0) > 0:
        print "Found dispersion fit"

    if CFG['diagnose_plots']:
        plot.mean_variance_plot(counts=counts,
                                disp=disp_fitted,
                                matrix=dmatrix1,
                                figtitle='Fitted Dispersion Estimate',
                                filename=os.path.join(CFG['plot_dir'], 'dispersion_fitted.pdf'),
                                CFG=CFG)

    return (disp_fitted, Lambda, idx)
Example #21
0
 def filterNonInformativeSNPs(self):
     tmp = sp.where((self.__x==2).sum(axis=0)!=self.__x.shape[0])[0]
     if not tmp.shape[0]==self.__x.shape[0]:
         self.__x = self.__x[:,tmp]
         self.__chr_index = self.__chr_index[tmp]
         self.__pos_index = self.__pos_index[tmp]
         self.__maf_data = self.__maf_data[tmp]
         self.__raw = self.__raw[:,tmp]
         if not self.__x_additive is None:
             self.__x_additive = self.__x_additive[:,tmp]
     tmp = sp.where((self.__x==1).sum(axis=0)!=self.__x.shape[0])[0]
     if not tmp.shape[0]==self.__x.shape[0]:
         self.__x = self.__x[:,tmp]
         self.__chr_index = self.__chr_index[tmp]
         self.__pos_index = self.__pos_index[tmp]
         self.__maf_data = self.__maf_data[tmp]
         self.__raw = self.__raw[:,tmp]
         if not self.__x_additive is None:
             self.__x_additive = self.__x_additive[:,tmp]
     tmp = sp.where((self.__x==0).sum(axis=0)!=self.__x.shape[0])[0]
     if not tmp.shape[0]==self.__x.shape[0]:
         self.__x = self.__x[:,tmp]
         self.__chr_index = self.__chr_index[tmp]
         self.__pos_index = self.__pos_index[tmp]
         self.__maf_data = self.__maf_data[tmp]
         self.__raw = self.__raw[:,tmp]
         if not self.__x_additive is None:
             self.__x_additive = self.__x_additive[:,tmp]
Example #22
0
def quantify_intron_retention(event, gene, counts_segments, counts_edges, counts_seg_pos):

    cov = sp.zeros((2, ), dtype='float')
    sg = gene.splicegraph
    segs = gene.segmentgraph

    seg_lens = segs.segments[1, :] - segs.segments[0, :]
    seg_shape = segs.seg_edges.shape
    order = 'C'
    offset = 0

    ### find exons corresponding to event
    idx_exon1  = sp.where((sg.vertices[0, :] == event.exons1[0, 0]) & (sg.vertices[1, :] == event.exons1[0, 1]))[0]
    idx_exon2  = sp.where((sg.vertices[0, :] == event.exons1[1, 0]) & (sg.vertices[1, :] == event.exons1[1, 1]))[0]

    ### find segments corresponding to exons
    seg_exon1 = sp.sort(sp.where(segs.seg_match[idx_exon1, :])[1])
    seg_exon2 = sp.sort(sp.where(segs.seg_match[idx_exon2, :])[1])
    seg_all = sp.arange(seg_exon1[0], seg_exon2[-1])

    seg_intron = sp.setdiff1d(seg_all, seg_exon1)
    seg_intron = sp.setdiff1d(seg_intron, seg_exon2)
    assert(seg_intron.shape[0] > 0)

    ### compute exon coverages as mean of position wise coverage
    # intron_cov
    cov[0] = sp.sum(counts_segments[seg_intron] * seg_lens[seg_intron]) / sp.sum(seg_lens[seg_intron])

    ### check intron confirmation as sum of valid intron scores
    ### intron score is the number of reads confirming this intron
    # intron conf
    idx = sp.where(counts_edges[:, 0] == sp.ravel_multi_index([seg_exon1[-1], seg_exon2[0]], seg_shape, order=order) + offset)[0]
    cov[1] = counts_edges[idx, 1]

    return cov
def calculate_kappa(magnitude, damping_s, damping_m, damping_l):
    """
    kappa=where(magnitude>5.5,self.damping_m,damping_s)
    kappa[where(magnitude>7.5)]=damping_l
    # where may cause issues if both mag and sites has
    # non-trivial dimension.
    # in that case we may have to try:
    """
    try:
        damping_s = damping_s.swapaxes(0, 1)
        damping_m = damping_m.swapaxes(0, 1)
        damping_l = damping_l.swapaxes(0, 1)
        magnitude = magnitude.swapaxes(0, 1)
    except ValueError:  # to avoid error with numpy version > 1.10.1
        pass

    kappa = damping_s * (magnitude <= 5.5)
    kappa[where(magnitude > 5.5)[0]] = damping_m
    kappa[where(magnitude > 7.5)[0]] = damping_l

    try:
        kappa = kappa.swapaxes(0, 1)
    except ValueError:  # to avoid error with numpy version > 1.10.1
        pass

    return kappa
Example #24
0
    def sort_traces(self):
        """ creates a (t,ID,stim,rep) np.array of the Traces """
        
        labels = sp.array(self.Main.Data.Metadata.trial_labels)

        # inferrence 
        stim_unique = sp.unique(labels)
        nStims = stim_unique.shape[0]
        nReps = len(labels) / nStims
        
        nFrames = self.Main.Data.nFrames
        nROIs = len(self.Main.ROIs.ROI_list)
        
        # dims are t, cell, odor, rep
        self.Main.Data.Traces_sorted = sp.zeros((nFrames,nROIs,nStims,nReps))
        
        for n in range(self.Main.Data.nTrials):
            # get the correct indices
            stim_index = sp.where(stim_unique == labels[n])[0][0] # this finds the index in stim_unique of the corresponding stim of the trial
            rep_index = sp.where(sp.where(labels == labels[n])[0] == n)[0][0] # das wievielte mal kommt n in stim_order[n] vor? -> rep index
            
            # get the traces and put it in the data structure at the correct place
            try:
                self.Main.Data.Traces_sorted[:,:,stim_index,rep_index] = self.Main.Data.Traces[:,:,n]
            except IndexError:
                sys.exit()
                pass
        pass
Example #25
0
    def cross_validation(self, x, y, v=5, sig_r=2.0 ** sp.arange(-8, 0), mu_r=10.0 ** sp.arange(-15, 0)):
        # Get parameters
        n = x.shape[0]
        ns = sig_r.size
        nm = mu_r.size
        err = sp.zeros((ns, nm))

        # Initialization of the indices for the cross validation
        cv = CV()
        cv.split_data_class(y, v=v)

        for i in range(ns):
            for j in range(nm):
                for k in range(v):
                    model_temp = KDA()
                    model_temp.train(x[cv.it[k], :], y[cv.it[k]], sig=sig_r[i], mu=mu_r[j])
                    yp = model_temp.predict(x[cv.iT[k], :], x[cv.it[k], :], y[cv.it[k]])
                    yp.shape = y[cv.iT[k]].shape
                    t = sp.where(yp != y[cv.iT[k]])[0]
                    err[i, j] += float(t.size) / yp.size
                    del model_temp
        err /= v
        t = sp.where(err == err.min())
        self.sig = sig_r[t[0][0]]
        self.mu = mu_r[t[1][0]]
        return sig_r[t[0][0]], mu_r[t[1][0]], err
Example #26
0
def reScale(Array, MaxMin=None, level=64, NoData=-9999):
    '''Rescale pixel values

    MaxMin should be a list containing max and min (max,min)
    it will be calculated from the inputed array if it is not provided'''


    if isinstance(Array, sp.ma.MaskedArray):
        Array = Array.astype(float)
    else:
        Array = sp.ma.masked_values(Array, NoData).astype(float)

    if MaxMin == None:
        Max = Array.max()
        Min = Array.min()
        Range = Max-Min
    else:
        Max = MaxMin[0]
        Min = MaxMin[1]
        Range = Max-Min

    Array = sp.where(Array<Min, Min, Array)
    Array = sp.where(Array>Max, Max, Array)
    newArray = ((Array - Min)/Range*(level-1)).round()

    return newArray
 def _generate_masked_mesh(self, cell_mask=None):
     r"""
     Generates the mesh based on the cell mask provided
     """
     #
     if cell_mask is None:
         cell_mask = sp.ones(self.data_map.shape, dtype=bool)
     #
     # initializing arrays
     self._edges = sp.ones(0, dtype=str)
     self._merge_patch_pairs = sp.ones(0, dtype=str)
     self._create_blocks(cell_mask)
     #
     # building face arrays
     mapper = sp.ravel(sp.array(cell_mask, dtype=int))
     mapper[mapper == 1] = sp.arange(sp.count_nonzero(mapper))
     mapper = sp.reshape(mapper, (self.nz, self.nx))
     mapper[~cell_mask] = -sp.iinfo(int).max
     #
     boundary_dict = {
         'bottom':
             {'bottom': mapper[0, :][cell_mask[0, :]]},
         'top':
             {'top': mapper[-1, :][cell_mask[-1, :]]},
         'left':
             {'left': mapper[:, 0][cell_mask[:, 0]]},
         'right':
             {'right': mapper[:, -1][cell_mask[:, -1]]},
         'front':
             {'front': mapper[cell_mask]},
         'back':
             {'back': mapper[cell_mask]},
         'internal':
             {'bottom': [], 'top': [], 'left': [], 'right': []}
     }
     #
     # determining cells linked to a masked cell
     cell_mask = sp.where(~sp.ravel(cell_mask))[0]
     inds = sp.in1d(self._field._cell_interfaces, cell_mask)
     inds = sp.reshape(inds, (len(self._field._cell_interfaces), 2))
     inds = inds[:, 0].astype(int) + inds[:, 1].astype(int)
     inds = (inds == 1)
     links = self._field._cell_interfaces[inds]
     #
     # adjusting order so masked cells are all on links[:, 1]
     swap = sp.in1d(links[:, 0], cell_mask)
     links[swap] = links[swap, ::-1]
     #
     # setting side based on index difference
     sides = sp.ndarray(len(links), dtype='<U6')
     sides[sp.where(links[:, 1] == links[:, 0]-self.nx)[0]] = 'bottom'
     sides[sp.where(links[:, 1] == links[:, 0]+self.nx)[0]] = 'top'
     sides[sp.where(links[:, 1] == links[:, 0]-1)[0]] = 'left'
     sides[sp.where(links[:, 1] == links[:, 0]+1)[0]] = 'right'
     #
     # adding each block to the internal face dictionary
     inds = sp.ravel(mapper)[links[:, 0]]
     for side, block_id in zip(sides, inds):
         boundary_dict['internal'][side].append(block_id)
     self.set_boundary_patches(boundary_dict, reset=True)
Example #28
0
 def simulate(self,x0,lambd):
     Dt = self.param['Dt']
     # Dt needs to be a multiple of param['Dt']
     dt = self.param['dt']
     D = lambd[1]
     a = lambd[0]
     N = self.param['N']
     drift = self.param['drift']
     x = scipy.array(x0)
     
     tstart = 0
     tcur = tstart
     while (tcur < tstart + Dt + dt/2 ):
         tcur += dt
         # the random number
         dW=self.rand.normal(loc=0.,scale=scipy.sqrt(2*D*dt),size=N)
       #  if tcur == dt:    #only print random number for first time step
       #      print 'dW =',  dW            
         
         # the process
         drift_term = a * drift(x)
         x=x+drift_term*dt+dW
         # and reflecting boundary conditions
         scipy.where(x>self.domain[1],2*self.domain[1]-x,x)
         scipy.where(x<self.domain[0],2*self.domain[0]-x,x)
     return x
Example #29
0
def compute_ndvi(im, r=0, ir=1, NODATA=-10000):
    """The function computes the NDVI of a multivalued image. It checks if
    there is NODATA value or division per zeros.
    
    Args:
    im: the image to process
    r: the number of the band that corresponds to the red band.
    ir: the number of the band that corresponds to the infra-red band.
    NODATA: the value of the NODATA
    
    Returns:
    ndvi =  the ndvi of the image
    """

    ## Get the size fo the image
    [nl, nc, nb] = im.shape

    ## Be sure that we can do 'floating operation'
    imf = im.astype(sp.float64)
    ndvi = sp.empty((nl, nc))

    if nb < 2:
        print "Two bands are needed to compute the NDVI"
        return None
    else:
        den = imf[:, :, ir - 1] + imf[:, :, r - 1]  # Pre compute the denominator
        t = sp.where((den > 0) & (imf[:, :, 1] != NODATA))
        ndvi[t] = (imf[t[0], t[1], ir - 1] - imf[t[0], t[1], r - 1]) / den[t]  # compute the ndvi on the safe samples

        if len(t[0]) < nl * nc:
            t = sp.where((den == 0) | (imf[:, :, 1] == NODATA))  # check for problematic pixels
            ndvi[t] = NODATA

        imf = []
        return ndvi
Example #30
0
def est_condprob2(data, val, given):
    """Calculate the probability of P(X|Y,Z)

    est_condprob2(data, 'A', ['M', 'LC'])"""

    if not isinstance(given, list):
        raise IndexError("Given must be a list or tuple of givens")
    elif len(given) != 2:
        raise IndexError("I need multiple givens!  Give me more...give me more!")

    gcols = []
    for g in given:
        if g in ['M', 'F']:
            gcols.append(1)
        elif g in ['LC', 'SC', 'T']:
            gcols.append(2)
        elif g in ['A', 'B', 'C']:
            gcols.append(0)

    if val in ['M', 'F']:
        vcol = 1
    elif val in ['LC', 'SC', 'T']:
        vcol = 2
    elif val in ['A', 'B', 'C']:
        vcol = 0

    datsize = data.shape[0]
    needed = [val, given[0], given[1]]
    t = sp.where([sp.all(data[i]==needed) for i in range(datsize)])[0]

    t2 = sp.where([sp.all(data[i,1:]==given) for i in range(datsize)])[0]
    
    return float(t.size)/t2.size
Example #31
0
def percentError(out, true):
    """ return percentage of mismatch between out and target values (lists and arrays accepted) """
    arrout = array(out).flatten()
    wrong = where(arrout != array(true).flatten())[0].size
    return 100. * float(wrong) / float(arrout.size)
Example #32
0
    def plot_spectrum(self, x, rdn_meas, geom, fname=None):

        if fname is None and hasattr(self.output, 'plot_directory') and\
                self.output.plot_directory is not None:
            fname = self.output.plot_directory + '/frame_%i.png' % self.iv.counts
        else:
            return

        plt.cla()
        xmin, xmax = min(self.wl), max(self.wl)
        fig = plt.subplots(1, 2, figsize=(10, 5))
        plt.subplot(1, 2, 1)
        rdn_est = self.iv.fm.calc_rdn(x, geom)
        for lo, hi in self.windows:
            idx = s.where(s.logical_and(self.wl > lo, self.wl < hi))[0]
            p1 = plt.plot(self.iv.fm.wl[idx],
                          rdn_meas[idx],
                          color=[0.7, 0.2, 0.2],
                          linewidth=2)
            plt.hold(True)
            p2 = plt.plot(self.iv.fm.wl, rdn_est, color='k', linewidth=2)
        plt.title("Radiance")
        ymax = max(rdn_meas) * 1.25
        plt.text(500, ymax * 0.92, "Measured", color=[0.7, 0.2, 0.2])
        plt.text(500, ymax * 0.86, "Model", color='k')
        plt.ylabel("$\mu$W nm$^{-1}$ sr$^{-1}$ cm$^{-2}$")
        plt.xlabel("Wavelength (nm)")
        plt.ylim([-0.001, ymax])
        plt.xlim([xmin, xmax])

        plt.subplot(1, 2, 2)
        lrfl_est = self.iv.fm.calc_lrfl(x, geom)
        ymax = min(max(lrfl_est) * 1.25, 0.7)
        for lo, hi in self.windows:
            if self.ref_wl is not None and self.ref_rfl is not None:
                # red line
                idx = s.where(s.logical_and(self.ref_wl > lo,
                                            self.ref_wl < hi))[0]
                p1 = plt.plot(self.ref_wl[idx],
                              self.ref_rfl[idx],
                              color=[0.7, 0.2, 0.2],
                              linewidth=2)
                ymax = max(max(self.ref_rfl[idx] * 1.2), ymax)
                plt.hold(True)
            # black line
            idx = s.where(s.logical_and(self.wl > lo, self.wl < hi))[0]
            p2 = plt.plot(self.iv.fm.wl[idx], lrfl_est[idx], 'k', linewidth=2)
            ymax = max(max(lrfl_est[idx] * 1.2), ymax)
            # green and blue lines - surface components
            if hasattr(self.iv.fm.surface, 'components'):
                p3 = plt.plot(self.iv.fm.wl[idx],
                              self.iv.fm.xa(x, geom)[idx],
                              'b',
                              linewidth=2)
                for j in range(len(self.iv.fm.surface.components)):
                    z = self.iv.fm.surface.norm(
                        lrfl_est[self.iv.fm.surface.refidx])
                    mu = self.iv.fm.surface.components[j][0] * z
                    plt.plot(self.iv.fm.wl[idx], mu[idx], 'g:', linewidth=1)
        plt.ylim([-0.0010, ymax])
        plt.xlim([xmin, xmax])
        plt.title("Reflectance")
        plt.xlabel("Wavelength (nm)")
        if self.ref_rfl is not None:
            plt.text(500,
                     ymax * 0.92,
                     "In situ reference",
                     color=[0.7, 0.2, 0.2])
            plt.text(500, ymax * 0.86, "Remote estimate", color='k')
            plt.text(500, ymax * 0.80, "Prior mean state ", color='b')
            plt.text(500, ymax * 0.74, "Surface components ", color='g')

        plt.savefig(fname)
        plt.close()
Example #33
0
from mpl_toolkits.basemap.cm import sstanom
from matplotlib.cm import jet

# Read Reynolds SST climatology
path = os.environ['NOBACKUP'] + '/verification/reynolds'
execfile(path + '/ctl.py')

obs = {}
obs['name'] = 'Reynolds SST'
obs['ctl'] = ctl

# Calculate climatology
obs['clim'] = obs['ctl'].fromfile('sst', tind=slice(1, None)).clim(12)
obs['clim'].shiftgrid(30.)
obs['clim'].grid['lon'] = sp.where(obs['clim'].grid['lon'] < 29.,
                                   obs['clim'].grid['lon'] + 360.,
                                   obs['clim'].grid['lon'])

# Calculate DJF, JJA and annual mean
obs['djf'] = obs['clim'].subset(tind=[0, 1, 11]).ave(0)
obs['djf'].name += ', DJF'
obs['jja'] = obs['clim'].subset(tind=[5, 6, 7]).ave(0)
obs['jja'].name += ', JJA'
obs['am'] = obs['clim'].ave(0)
obs['am'].name += ', Annual Mean'

# Equatorial annual cycle

lonind = sp.logical_and(obs['clim'].grid['lon'][0] >= 130.0,
                        obs['clim'].grid['lon'][0] <= 280.0)
latind = sp.logical_and(obs['clim'].grid['lat'][:, 0] >= -2.1,
Example #34
0
import muesli_functions as mf
import scipy as sp

# Load samples
X, Y = mf.read2bands("../Data/grassland_id_2m.sqlite", 70, 106)

ID = []

# Compute NDVI
NDVI = []
for i in xrange(len(X)):
    X_ = X[i]
    # Compute safe version of NDVI
    DENOM = (X_[:, 1] + X_[:, 0])
    t = sp.where(DENOM > 0)[0]
    NDVI_ = (X_[t, 1] - X_[t, 0]) / DENOM[t]
    if len(NDVI_) > 0:
        NDVI.append(NDVI_)

# Scan Grasslands
for i in xrange(len(NDVI)):
    m = sp.mean(NDVI[i][:, sp.newaxis])
    if m > 0.6:
        ID.append(Y[i])
    print("ID {} and mean NDVI {}".format(Y[i], m))
print("Number of selected grasslands: {}".format(len(ID)))
sp.savetxt("id_grasslands.csv", ID, delimiter=',')
Example #35
0
def estimate_dispersion(gene_counts, matrix, sf, CFG):

    if CFG['verbose']:
        print 'Estimating raw dispersions'

    if CFG['parallel'] > 1:
        disp_raw = sp.empty((gene_counts.shape[0], 1), dtype='float')
        disp_raw.fill(sp.nan)
        disp_raw_conv = sp.zeros((gene_counts.shape[0], 1), dtype='bool')

        pool = mp.Pool(processes=CFG['parallel'],
                       initializer=lambda: sig.signal(sig.SIGINT, sig.SIG_IGN))
        binsize = 30
        idx_chunks = [
            sp.arange(x, min(x + binsize, gene_counts.shape[0]))
            for x in range(0, gene_counts.shape[0], binsize)
        ]

        try:
            result = [
                pool.apply_async(estimate_dispersion_chunk,
                                 args=(
                                     gene_counts[idx, :],
                                     matrix,
                                     sf,
                                     CFG,
                                     idx,
                                 )) for idx in idx_chunks
            ]
            res_cnt = 0
            while result:
                tmp = result.pop(0).get()
                for i, j in enumerate(tmp[2]):
                    if CFG['verbose']:
                        log_progress(res_cnt, gene_counts.shape[0])
                        res_cnt += 1
                    disp_raw[j] = tmp[0][i]
                    disp_raw_conv[j] = tmp[1][i]
            if CFG['verbose']:
                log_progress(gene_counts.shape[0], gene_counts.shape[0])
                print ''
            pool.terminate()
            pool.join()
        except KeyboardInterrupt:
            print >> sys.stderr, 'Keyboard Interrupt - exiting'
            pool.terminate()
            pool.join()
            sys.exit(1)
    else:
        (disp_raw, disp_raw_conv,
         _) = estimate_dispersion_chunk(gene_counts,
                                        matrix,
                                        sf,
                                        CFG,
                                        sp.arange(gene_counts.shape[0]),
                                        log=CFG['verbose'])

    if CFG['debug']:
        fig = plt.figure(figsize=(8, 6), dpi=100)
        ax = fig.add_subplot(111)
        idx = sp.where(~sp.isnan(disp_raw))[0]
        ax.plot(
            sp.mean(sp.log10(gene_counts + 1), axis=1)[idx], disp_raw[idx],
            'bo')
        ax.set_title('Raw Dispersion Estimate')
        ax.set_xlabel('Mean expression count')
        ax.set_ylabel('Dispersion')
        plt.savefig('dispersion_raw.pdf', format='pdf', bbox_inches='tight')
        plt.close(fig)

    return (disp_raw, disp_raw_conv)
Example #36
0
def projection(dt, people, contacts, Vd, dmin = 0.0, \
               nb_iter_max = 100000, rho=0.1, tol = 0.01, log=False, method="cvxopt"):
    """
    From the desired velocities Vd, this projection step consists of computing \
    the global velocity field defined as the closest velocity to the \
    desired one among all the feasible fields (i.e. fields which do not lead \
    to disks overlapping).

    Parameters
    ----------
    dt: float
        time step
    people: numpy array
        people coordinates and radius : x,y,r
    contacts: numpy array
        all the contacts : i,j,dij,eij_x,eij_y
    Vd: numpy array
        people desired velocities
    dmin: float
        minimum distance guaranteed between individuals
    nb_iter_max: integer
        maximum number of iterations allowed
    rho: float
        parameter of the Uzawa method
    tol: float
        tolerance wished
    log: boolean
        to print the final accuracy, number of iterations,...
    method: string
        optimization algorithm : 'cvxopt' (default) or 'uzawa' (or 'mosek' if installed \
        with a license file).

    Returns
    -------
    B: numpy array
        constraint matrix
    U: numpy array
        new people velocities ensuring that there is no overlap \
        between individuals
    L: numpy array
        Lagrange multipliers (only when method='uzawa', None otherwise)
    P: numpy array
        pressure on each individual (only when method='uzawa', None otherwise)
    info: integer
        number of iterations needed
    """
    Np = people.shape[0]
    Nc = contacts.shape[0]
    info = 0
    if (Nc == 0):
        info = 1
        return info, None, Vd, None, None
    else:

        if (method == "cvxopt") or (method == "mosek"):

            import cvxopt
            cvxopt.solvers.options['show_progress'] = False
            cvxopt.solvers.maxiters = 1000
            cvxopt.solvers.abstol = 1e-8
            cvxopt.solvers.reltol = 1e-7
            L = None
            P = None
            U = sp.zeros((2 * Np, ))
            V = sp.zeros((2 * Np, ))
            Z = (contacts[:, 2] - dmin) / dt  ## ie Dij/dt
            V[::2] = Vd[:, 0]
            V[1::2] = Vd[:, 1]  ## A priori velocity
            V = cvxopt.matrix(V)
            Z = cvxopt.matrix(Z, (Nc, 1))
            Id = cvxopt.spdiag([1] * (U.shape[0]))
            if (Nc > 0):
                II = contacts[:, 0].astype(int)
                JJ = contacts[:, 1].astype(int)
                Jpos = sp.where(JJ >= 0)[0]
                Jneg = sp.where(JJ < 0)[0]
                row = sp.concatenate([Jpos, Jpos, Jpos, Jpos, Jneg, Jneg])
                col = sp.concatenate([
                    2 * II[Jpos], 2 * II[Jpos] + 1, 2 * JJ[Jpos],
                    2 * JJ[Jpos] + 1, 2 * II[Jneg], 2 * II[Jneg] + 1
                ])
                data = sp.concatenate([
                    contacts[Jpos, 3], contacts[Jpos, 4], -contacts[Jpos, 3],
                    -contacts[Jpos, 4], -contacts[Jneg, 3], -contacts[Jneg, 4]
                ])
                B = csr_matrix((data, (row, col)),
                               shape=(Nc, 2 * Np))  #.toarray()
                cvxoptB = cvxopt.spmatrix(sp.array(data),
                                          sp.array(row),
                                          sp.array(col),
                                          size=(Nc, 2 * Np))
                if (method == "mosek"):
                    from mosek import iparam
                    cvxopt.solvers.options['mosek'] = {iparam.log: 0}
                    solution = cvxopt.solvers.qp(Id,
                                                 -V,
                                                 cvxoptB,
                                                 Z,
                                                 solver='mosek')
                else:
                    solution = cvxopt.solvers.qp(Id, -V, cvxoptB, Z)
                    info = solution["iterations"]
            U = solution['x']
            if log:
                C = Z - B @ U
                if (method == "mosek"):
                    print("    projection (mosek) : nb of contacts = ", Nc,
                          ", contrainte (Z-B@U).min() = ", C.min())
                else:
                    print("    projection (cvxopt) : nb of contacts = ", Nc,
                          ", nb of iterations = ", solution["iterations"],
                          ", status = ", solution["status"],
                          ", contrainte (Z-B@U).min() = ", C.min())
            U = sp.array(U).reshape((Np, 2))

        elif (method == "uzawa"):

            info = 0
            II = contacts[:, 0].astype(int)
            JJ = contacts[:, 1].astype(int)
            Jpos = sp.where(JJ >= 0)[0]
            Jneg = sp.where(JJ < 0)[0]
            row = sp.concatenate([Jpos, Jpos, Jpos, Jpos, Jneg, Jneg])
            col = sp.concatenate([
                2 * II[Jpos], 2 * II[Jpos] + 1, 2 * JJ[Jpos], 2 * JJ[Jpos] + 1,
                2 * II[Jneg], 2 * II[Jneg] + 1
            ])
            data = sp.concatenate([
                contacts[Jpos, 3], contacts[Jpos, 4], -contacts[Jpos, 3],
                -contacts[Jpos, 4], -contacts[Jneg, 3], -contacts[Jneg, 4]
            ])
            B = csr_matrix((data, (row, col)), shape=(Nc, 2 * Np))  #.toarray()
            L = sp.zeros((Nc, ))
            R = 99 * sp.ones((Nc, ))
            U = sp.zeros((2 * Np, ))
            V = sp.zeros((2 * Np, ))
            D = contacts[:, 2]
            V[::2] = Vd[:, 0]
            V[1::2] = Vd[:, 1]
            k = 0
            while ((dt * R.max() > tol * 2 * people[:, 2].min())
                   and (k < nb_iter_max)):
                U[:] = V[:] - B.transpose() @ L[:]
                R[:] = B @ U[:] - (D[:] - dmin) / dt
                L[:] = sp.maximum(L[:] + rho * R[:], 0)
                k += 1
            P = sp.zeros(Np)  ## Pressure
            P[II[Jpos]] += 3 / (4 * sp.pi * people[II[Jpos], 2]**2) * L[Jpos]
            P[JJ[Jpos]] += 3 / (4 * sp.pi * people[JJ[Jpos], 2]**2) * L[Jpos]
            P[II[Jneg]] += 3 / (4 * sp.pi * people[II[Jneg], 2]**2) * L[Jneg]
            if log:
                print("    projection (uzawa) : nb of contacts = ", Nc,
                      ", nb of iterations = ", k, ", min = ", R.min(),
                      ", max = ", R.max(), ", tol = ", tol)
            if (k == nb_iter_max):
                print("** WARNING : Method projection **")
                print(
                    "** WARNING : you have reached the maximum number of iterations,"
                )
                print("** WARNING : it remains unsatisfied constraints !! ")
                info = -1
            else:
                info = k

        return info, B, U.reshape((Np, 2)), L, P
griddata = ClassificationDataSet(2, 1, nb_classes=3)
for i in xrange(X.size):
    griddata.addSample([X.ravel()[i], Y.ravel()[i]], [0])
griddata._convertToOneOfMany()  # hace la red fiable
# comenzamos las iteraciones de entreno
for i in range(20):
    trainer.trainEpochs(1)
    trnresult = percentError(trainer.testOnClassData(), trndata['class'])
    tstresult = percentError(trainer.testOnClassData(dataset=tstdata),
                             tstdata['class'])

    print "epoch: %4d" % trainer.totalepochs, \
        "  train error: %5.2f%%" % trnresult, \
        "  test error: %5.2f%%" % tstresult
    out = fnn.activateOnDataset(griddata)
    out = out.argmax(axis=1)  # the highest output activation gives the class
    out = out.reshape(X.shape)
    figure(1)
    ioff()  # interactive graphics off
    clf()  # clear the plot
    hold(True)  # overplot on
    for c in [0, 1, 2]:
        here, _ = where(tstdata['class'] == c)
        plot(tstdata['input'][here, 0], tstdata['input'][here, 1], 'o')
    if out.max() != out.min():  # safety check against flat field
        contourf(X, Y, out)  # plot the contour
    ion()  # interactive graphics on
    draw()  # update the plot
    ioff()
    show()
Example #38
0
def plot_sensor_data(ifig, sensor_data, time, initial_door_dist=None, axis = None, \
                     flux_timestep=1, \
                     savefig=False, filename='fig.png', cmap='winter'):
    """
    When a sensor line is defined this function allows to draw the \
    repartition of the people exit times.

    Parameters
    ----------

    ifig: int
        figure number
    sensor_data : numpy array
        [time, direction, intersection_point[2]] for each individual
    time: float
        time in seconds
    initial_door_dist: numpy array
        people initial distance to the door
    axis: numpy array
        matplotlib axis : [xmin, xmax, ymin, ymax]
    flux_timestep: float
        timestep for the fluxes : number of persons per flux_timestep seconds
    savefig: boolean
        writes the figure as a png file if true
    filename: string
        png filename used to write the figure
    cmap: string
        matplotlib colormap name
    """
    Np = sensor_data.shape[0]
    tmin = 0
    tmax = time

    fig = plt.figure(ifig)
    plt.clf()

    ax1 = fig.add_subplot(211)
    if (initial_door_dist is None):
        ax1.plot(sp.arange(Np), sensor_data[:, 0], 'b+')
        ax1.set_title('Crossing time (s) vs people id')
    else:
        ax1.plot(initial_door_dist, sensor_data[:, 0], 'b+')
        ax1.set_title('Crossing time (s) vs initial door distance (m)')
    if (axis):
        ax1.set_xlim(axis[0], axis[1])
        ax1.set_ylim(axis[2], axis[3])
    #ax1.set_xticks([])
    #ax1.set_yticks([])
    #ax1.axis('off')

    tgrid = sp.arange(tmin, tmax, step=flux_timestep)
    tgrid = sp.append(tgrid, tgrid[-1] + flux_timestep)
    flux_exits = sp.zeros(tgrid.shape)
    flux_entries = sp.zeros(tgrid.shape)
    exits = sp.where(sensor_data[:, 1] == 1)[0]
    entries = sp.where(sensor_data[:, 1] == -1)[0]
    t_exits = sp.ceil((sensor_data[exits, 0] - tmin) / flux_timestep)
    t_entries = sp.ceil((sensor_data[entries, 0] - tmin) / flux_timestep)
    #t_exits = sp.floor((sensor_data[exits,0]-tmin)/flux_timestep)
    #t_entries = sp.floor((sensor_data[entries,0]-tmin)/flux_timestep)
    unique_exits, counts_exits = sp.unique(t_exits, return_counts=True)
    unique_entries, counts_entries = sp.unique(t_entries, return_counts=True)
    flux_exits[unique_exits.astype(int)] = counts_exits
    flux_entries[unique_entries.astype(int)] = counts_entries

    ax2 = fig.add_subplot(212)
    ax2.plot(tgrid, flux_entries, ':og', tgrid, flux_exits, ':or')
    ax2.set_title("Entries (green) and exits (red) per " + str(flux_timestep) +
                  " s")
    if (axis):
        ax2.set_xlim(axis[0], axis[1])
        ax2.set_ylim(axis[2], axis[3])
    #ax2.set_xticks([])
    #ax2.set_yticks([])
    #ax2.axis('off')
    # Optionally : adds some histograms
    # if (exits.shape[0]>0):
    #     ax3 = fig.add_subplot(413)
    #     t_exits_sorted = sp.sort(sensor_data[exits,0])
    #     #print("t_exits_sorted = ",t_exits_sorted)
    #     tmp = sp.concatenate(([0],t_exits_sorted))
    #     bins = 0.5*(tmp[:-1]+tmp[1:])
    #     widths = tmp[1:]-tmp[:-1]
    #     heights = 1/widths
    #     ax3.bar(bins, heights, width=widths,color='r',align='center')
    #
    # if (entries.shape[0]>0):
    #     ax4 = fig.add_subplot(414)
    #     t_entries_sorted = sp.sort(sensor_data[entries,0])
    #     tmp = sp.concatenate(([0],t_entries_sorted))
    #     bins = 0.5*(tmp[:-1]+tmp[1:])
    #     widths = tmp[1:]-tmp[:-1]
    #     heights = 1/widths
    #     ax4.bar(bins, heights, width=widths,color='r',align='center')
    fig.set_tight_layout(True)
    fig.canvas.draw()
    if (savefig):
        fig.savefig(filename, dpi=300)
Example #39
0
def compute_contacts(dom, people, dmax):
    """
    This function uses a KDTree method to find the contacts \
    between individuals. Moreover the contacts with the walls \
    are also determined from the wall distance (obtained by the \
    fast-marching method).

    Parameters
    ----------
    dom: Domain
        contains everything for managing the domain
    people: numpy array
        people coordinates and radius : x,y,r
    dmax: float
        threshold value used to consider a contact as \
        active (dij<dmax)

    Returns
    -------
    contacts: numpy array
        all the contacts i,j,dij,eij_x,eij_y such that dij<dmax \
        and i<j (no duplication)
    """
    # lf : the number of points at which the algorithm
    # switches over to brute-force. Has to be positive.
    lf = 100
    if (lf > sys.getrecursionlimit()):
        sys.setrecursionlimit(lf)
    kd = cKDTree(people[:, :2], leafsize=lf)
    ## Find all pairs of points whose distance is at most dmax+2*rmax
    rmax = people[:, 2].max()
    neighbors = kd.query_ball_tree(kd, dmax + 2 * rmax)
    ## Create the contact array : i,j,dij,eij_x,eij_y
    first_elements = sp.arange(people.shape[0])  ## i.e. i
    other_elements = list(map(lambda x: x[1:],
                              neighbors))  ## i.e. all the j values for each i
    lengths = list(map(len, other_elements))
    tt = sp.stack([first_elements, lengths], axis=1)
    I = sp.concatenate(list(map(lambda x: sp.full((x[1], ), x[0]),
                                tt))).astype(int)
    J = sp.concatenate(other_elements).astype(int)
    ind = sp.where(I < J)[0]
    I = I[ind]
    J = J[ind]
    DP = people[J, :2] - people[I, :2]
    Norm = sp.linalg.norm(DP, axis=1, ord=2)
    Dij = Norm - people[I, 2] - people[J, 2]
    ind = sp.where(Dij < dmax)[0]
    Dij = Dij[ind]
    I = I[ind]
    J = J[ind]
    Norm = Norm[ind]
    DP = DP[ind]
    contacts = sp.stack([I, J, Dij, DP[:, 0] / Norm, DP[:, 1] / Norm], axis=1)
    # Add contacts with the walls
    II = sp.floor((people[:, 1] - dom.ymin - 0.5 * dom.pixel_size) /
                  dom.pixel_size).astype(int)
    JJ = sp.floor((people[:, 0] - dom.xmin - 0.5 * dom.pixel_size) /
                  dom.pixel_size).astype(int)
    DD = dom.wall_distance[II, JJ] - people[:, 2]
    ind = sp.where(DD < dmax)[0]
    wall_contacts = sp.stack([
        ind, -1 * sp.ones(ind.shape), DD[ind],
        dom.wall_grad_X[II[ind], JJ[ind]], dom.wall_grad_Y[II[ind], JJ[ind]]
    ],
                             axis=1)
    contacts = sp.vstack([contacts, wall_contacts])
    return sp.array(contacts)
Example #40
0
def count_graph_coverage(genes, fn_bam=None, CFG=None, fn_out=None):
    # [counts] = count_graph_coverage(genes, fn_bam, CFG, fn_out)

    if fn_bam is None and isinstance(genes, dict):
        PAR = genes
        genes = PAR['genes']
        fn_bam = PAR['fn_bam']
        if 'fn_out' in PAR:
            fn_out = PAR['fn_out']
        CFG = PAR['CFG']

    if not isinstance(fn_bam, list):
        fn_bam = [fn_bam]
    counts = sp.zeros((len(fn_bam), genes.shape[0]), dtype='object')

    intron_tol = 0

    sys.stdout.write('genes: %i\n' % genes.shape[0])
    for f in range(counts.shape[0]):
        sys.stdout.write('\nsample %i/%i\n' % (f + 1, counts.shape[0]))

        ### iterate over all genes and generate counts for
        ### the segments in the segment graph
        ### and the splice junctions in the splice graph
        ### iterate per contig, so the bam caching works better
        contigs = sp.array([x.chr for x in genes])
        for contig in sp.unique(contigs):
            contig_idx = sp.where(contigs == contig)[0]
            bam_cache = dict()
            print '\ncounting %i genes on contig %s' % (contig_idx.shape[0],
                                                        contig)
            for ii, i in enumerate(contig_idx):
                sys.stdout.write('.')
                if ii > 0 and ii % 50 == 0:
                    sys.stdout.write('%i/%i\n' % (ii, contig_idx.shape[0]))
                sys.stdout.flush()
                gg = genes[i]
                if gg.segmentgraph.is_empty():
                    gg.segmentgraph = Segmentgraph(gg)
                gg.start = gg.segmentgraph.segments.ravel().min()
                gg.stop = gg.segmentgraph.segments.ravel().max()

                counts[f, i] = Counts(gg.segmentgraph.segments.shape[1])

                if CFG['bam_to_sparse'] and (
                        fn_bam[f].endswith('npz') or
                        os.path.exists(re.sub(r'bam$', '', fn_bam[f]) +
                                       'npz')):
                    ### make sure that we query the right contig from cache
                    assert (gg.chr == contig)
                    (tracks, intron_list) = add_reads_from_sparse_bam(
                        gg,
                        fn_bam[f],
                        contig,
                        types=['exon_track', 'intron_list'],
                        filter=None,
                        cache=bam_cache)
                else:
                    ### add RNA-seq evidence to the gene structure
                    (tracks, intron_list) = add_reads_from_bam(
                        gg, fn_bam[f], ['exon_track', 'intron_list'], None,
                        CFG['var_aware'], CFG['primary_only'])
                    intron_list = intron_list[0]  ### TODO

                ### extract mean exon coverage for all segments
                for j in range(gg.segmentgraph.segments.shape[1]):
                    idx = sp.arange(gg.segmentgraph.segments[0, j],
                                    gg.segmentgraph.segments[1, j]) - gg.start
                    counts[f, i].segments[j] = sp.mean(
                        sp.sum(tracks[:, idx], axis=0))
                    counts[f, i].seg_pos[j] = sp.sum(
                        sp.sum(tracks[:, idx], axis=0) > 0)

                k, l = sp.where(gg.segmentgraph.seg_edges == 1)

                ### there are no introns to count
                if intron_list.shape[0] == 0:
                    for m in range(k.shape[0]):
                        if counts[f, i].edges.shape[0] == 0:
                            counts[f, i].edges = sp.atleast_2d(
                                sp.array([
                                    sp.ravel_multi_index(
                                        [k[m], l[m]],
                                        gg.segmentgraph.seg_edges.shape), 0
                                ]))
                        else:
                            counts[f, i].edges = sp.r_[
                                counts[f, i].edges,
                                sp.atleast_2d(
                                    sp.array([
                                        sp.ravel_multi_index(
                                            [k[m], l[m]], gg.segmentgraph.
                                            seg_edges.shape), 0
                                    ]))]
                    continue

                ### extract intron counts
                for m in range(k.shape[0]):
                    idx = sp.where(
                        (sp.absolute(intron_list[:, 0] - gg.segmentgraph.
                                     segments[1, k[m]]) <= intron_tol)
                        & (sp.absolute(intron_list[:, 1] - gg.segmentgraph.
                                       segments[0, l[m]]) <= intron_tol))[0]
                    if counts[f, i].edges.shape[0] == 0:
                        if idx.shape[0] > 0:
                            counts[f, i].edges = sp.atleast_2d(
                                sp.array([
                                    sp.ravel_multi_index(
                                        [k[m], l[m]],
                                        gg.segmentgraph.seg_edges.shape),
                                    sp.sum(intron_list[idx, 2])
                                ]))
                        else:
                            counts[f, i].edges = sp.atleast_2d(
                                sp.array([
                                    sp.ravel_multi_index(
                                        [k[m], l[m]],
                                        gg.segmentgraph.seg_edges.shape), 0
                                ]))
                    else:
                        if idx.shape[0] > 0:
                            counts[f, i].edges = sp.r_[
                                counts[f, i].edges,
                                sp.atleast_2d(
                                    sp.array([
                                        sp.ravel_multi_index([k[m], l[m]], gg.
                                                             segmentgraph.
                                                             seg_edges.shape),
                                        sp.sum(intron_list[idx, 2])
                                    ]))]
                        else:
                            counts[f, i].edges = sp.r_[
                                counts[f, i].edges,
                                sp.atleast_2d(
                                    sp.array([
                                        sp.ravel_multi_index(
                                            [k[m], l[m]], gg.segmentgraph.
                                            seg_edges.shape), 0
                                    ]))]

    if fn_out is not None:
        cPickle.dump(counts, open(fn_out, 'w'), -1)
    else:
        return counts
Example #41
0
    def preprocess_data_stack(self, stack_num, n_jobs, file_list, pattern,
                              white, dark):
        # Average, merge and preprocess a stack of images
        # Typically a stack corresponds to one ptychographic position
        l = []
        tmp = None
        # First - average according to the pattern
        if pattern in [1, 2]:
            # Averaging only
            for filename in file_list:
                if tmp is None:
                    tmp = self.openup(filename)
                else:
                    tmp += self.openup(filename)
            l.append(tmp / len(file_list))
        elif pattern == 3:
            # Average then merge
            d = {}
            unique_times = list(set([t.split('_')[3] for t in file_list]))
            for filename in file_list:
                t = filename.split('.')[0].split('_')[-1]
                if t not in d.keys():
                    d[t] = (1, self.openup(filename))
                else:
                    d[t][0] += 1
                    d[t][1] += self.openup(filename)

            for key, (i, val) in d.iteritems():
                val /= i

            # Check for saturated values and merge variable exposure times
            max_time = max(unique_times)
            if CXP.preprocessing.saturation_level > 0:
                for key in d.keys():
                    wh = sp.where(d[key] >= CXP.preprocessing.saturation_level)
                    d[key][wh] = 0
                    if tmp == 0:
                        tmp = d[key] * max_time / float(key)
                    else:
                        tmp += d[key] * max_time / float(key)

            l.append(tmp)

        else:
            raise Exception('NamingConventionError')

        # Do preprocessing

        data = CXData()
        data.data = l

        if CXP.measurement.beam_stop:
            data.treat_beamstop()

        data.symmetrize_array_shape()

        # CCD Specific Preprocessing
        if CXP.preprocessing.detector_type == 'ccd':

            try:
                # Dark field correction
                if dark is not None:
                    print('Dark field correcting data')
                    data -= dark

                # Dark correct white field
                if white is not None:
                    print('Dark field correcting whitefield')
                    white -= dark

            except UnboundLocalError:
                print('No darkfield subtraction performed.')

        # PAD Specific Preprocessing
        elif CXP.preprocessing.detector_type == 'pad':
            pass

        # Threshhold data
        if CXP.preprocessing.threshhold_raw_data > 0:
            data.threshhold()
            if white is not None:
                white.threshhold()

        # Bin data
        if CXP.preprocessing.bin > 1:
            data.bin()
            if white is not None:
                white.bin()

        if CXP.preprocessing.rot90 != 0:
            data.rot90(CXP.preprocessing.rot90)
            if white is not None:
                white.rot90(CXP.preprocessing.rot90)

        # Take square root
        data.square_root()
        if white is not None:
            white.square_root()

        # Put in FFT shifted
        data.fft_shift()
        if white is not None:
            white.fft_shift()

        return (stack_num, data.data)
    if len(l) == 3:
        x.append(float(l[0]))
        t.append(float(l[1]))
        y.append(float(l[2]))

x = numpy.array(x)
t = numpy.array(t)
y = numpy.array(y)
dosage = []
sig = []
means = []
xpts = numpy.unique(x)

plots = []
for i in xpts:
    bm = scipy.where(x == i)
    a = numpy.array(t[bm]).view(numpy.recarray)
    duplicates = numpy.core.records.find_duplicate(a)
    for dup in duplicates:
        dup_bm = scipy.where((x == i) & (t == dup))[0]
        x = numpy.delete(x, dup_bm[-1])
        t = numpy.delete(t, dup_bm[-1])
        y = numpy.delete(y, dup_bm[-1])
        bm = scipy.where(x == i)

    #plots.append(Gnuplot.Data(y[bm], with_='lines'))
    #dosage.append(scipy.integrate.simps(y[bm]))
    plots.append(Gnuplot.Data(t[bm], y[bm], with_='lines'))
    dosage.append(scipy.integrate.simps(y[bm], x=t[bm]))
    sig.append(numpy.std(y[bm]))
    means.append(numpy.mean(y[bm]))
Example #43
0
def n_to_one(arr):
    """ Returns the reverse of a 1-in-n binary encoding. """
    return where(arr == 1)[0][0]
Example #44
0
def check_people_in_box(dom, box, p, rng):
    """
    To check that people coordinates are in the given box (test 1) and in an \
    usable space i.e. in an area accessible and not concerned by obstacles \
    (test 2). On the other hand, one moves the individuals which do not satisfy \
    these two tests.

    Parameters
    ----------
    dom: Domain
        contains everything for managing the domain
    box: list
        coordinates of the box [xmin, xmax, ymin, ymax]
    p: numpy array
        people coordinates x y r
    rng: RandomState
        scipy random state object (see scipy.random.RandomState)

    Returns
    -------
    p: numpy array
        new people coordinates x y r

    """
    print("------ check_people_in_box --> To verify that "+str(p.shape[0])+ \
          " individuals are in the domain, in the box and with a defined"+ \
          " desired velocity")
    p_rmax = p[:, 2].max()
    xmin, xmax, ymin, ymax = box
    info = False
    while True:
        ## test 1
        I = sp.floor((p[:, 1] - dom.ymin - 0.5 * dom.pixel_size) /
                     dom.pixel_size).astype(int)
        J = sp.floor((p[:, 0] - dom.xmin - 0.5 * dom.pixel_size) /
                     dom.pixel_size).astype(int)
        test1 = (I >= 0) * (I < dom.height) * (J >= 0) * (J < dom.width)
        ind1 = sp.where(test1 == 0)[0]
        if (ind1.shape[0] > 0):
            print("------ check_people_in_box --> "+str(ind1.shape[0])+ \
                  " individuals outside the domain")
            info = True
            p[ind1, 0] = rng.uniform(xmin + p_rmax, xmax - p_rmax,
                                     ind1.shape[0])
            p[ind1, 1] = rng.uniform(ymin + p_rmax, ymax - p_rmax,
                                     ind1.shape[0])
        else:
            ## test 2
            I, J, Vd = compute_desired_velocity(dom, p)
            normVd = Vd[:, 0]**2 + Vd[:, 1]**2
            test2 = (p[:,0]>xmin+p_rmax)*(p[:,0]<xmax-p_rmax) \
                  *(p[:,1]>ymin+p_rmax)*(p[:,1]<ymax-p_rmax) \
                  *(normVd>0)
            ind2 = sp.where(test2 == 0)[0]
            if (ind2.shape[0] > 0):
                print("------ check_people_in_box --> "+str(ind2.shape[0])+ \
                      " individuals with an undefined desired velocity ")
                info = True
                p[ind2, 0] = rng.uniform(xmin + p_rmax, xmax - p_rmax,
                                         ind2.shape[0])
                p[ind2, 1] = rng.uniform(ymin + p_rmax, ymax - p_rmax,
                                         ind2.shape[0])
            else:
                print("------ check_people_in_box --> OK !")
                break
    return info, p
Example #45
0
def _get_counts(chr_name,
                start,
                stop,
                files,
                intron_cov,
                intron_cnt=False,
                verbose=False,
                collapsed=True,
                bins=0):
    """Internal function that queries the bam files and produces the counts"""

    ### PYSAM CIGAR ENCODING
    # M   BAM_CMATCH  0
    # I   BAM_CINS    1
    # D   BAM_CDEL    2
    # N   BAM_CREF_SKIP   3
    # S   BAM_CSOFT_CLIP  4
    # H   BAM_CHARD_CLIP  5
    # P   BAM_CPAD    6
    # =   BAM_CEQUAL  7
    # X   BAM_CDIFF   8

    ### init counts
    counts = sp.zeros((len(files), stop - start + 1))
    intron_counts = sp.zeros((len(files), stop - start + 1))
    intron_list = [dict() for i in range(len(files))]

    for f_i, fn in enumerate(files):
        if fn.lower().endswith('bam'):
            if verbose:
                print >> sys.stdout, "reading bam %i of %i" % (f_i + 1,
                                                               len(files))
            try:
                infile = pysam.Samfile(str(fn), "rb")
            except ValueError:
                print >> sys.stderr, 'Could not load file %s - skipping' % fn
                continue
            c_len = stop - start + 1

            for line in infile.fetch(chr_name, start, stop):
                if line.is_secondary:
                    continue
                pos = line.pos
                for o in line.cigar:
                    if o[0] in [0, 2, 3]:
                        ### get segment overlap to current region
                        seg_offset = max(0, start - pos)
                        seg_len = o[1] - seg_offset
                        if seg_len > 0:
                            seg_start = max(pos - start, 0)
                            if o[0] in [0, 2]:
                                counts[f_i,
                                       seg_start:min(seg_start +
                                                     seg_len, c_len)] += 1
                            elif (intron_cov or intron_cnt) and o[0] == 3:
                                if pos >= start and (pos + o[1]) <= stop:
                                    if intron_cov:
                                        intron_counts[f_i, seg_start:min(
                                            seg_start + seg_len, c_len)] += 1
                                    if intron_cnt and (seg_start + seg_len <
                                                       c_len):
                                        try:
                                            intron_list[f_i][(seg_start,
                                                              seg_len)] += 1
                                        except KeyError:
                                            intron_list[f_i][(seg_start,
                                                              seg_len)] = 1

                    if not o[0] in [1, 4, 5]:
                        pos += o[1]

        elif fn.lower().endswith('npz'):

            try:
                infile = sp.load(str(fn))
            except:
                print >> sys.stderr, 'Could not load file %s - skipping' % fn
                continue
            c_len = stop - start + 1
            bam_reads = spsp.coo_matrix((infile[chr_name + '_reads_dat'],
                                         (infile[chr_name + '_reads_row'],
                                          infile[chr_name + '_reads_col'])),
                                        shape=infile[chr_name + '_reads_shp'],
                                        dtype='uint32').tocsc()
            bam_introns_m = infile[chr_name + '_introns_m']
            bam_introns_p = infile[chr_name + '_introns_p']
            counts[f_i, :] = sp.sum(bam_reads[:, start:stop + 1].todense(),
                                    axis=0)
            if intron_cnt:
                idx = sp.where((bam_introns_m[:, 0] > start)
                               & (bam_introns_m[:, 1] < stop))[0]
                for _i in idx:
                    try:
                        intron_list[f_i][(
                            bam_introns_m[_i, 0] - start,
                            bam_introns_m[_i, 1] -
                            bam_introns_m[_i, 0])] += bam_introns_m[_i, 2]
                    except KeyError:
                        intron_list[f_i][(
                            bam_introns_m[_i, 0] - start,
                            bam_introns_m[_i, 1] -
                            bam_introns_m[_i, 0])] = bam_introns_m[_i, 2]
                    if intron_cov:
                        intron_counts[f_i, bam_introns_m[_i, 0]:bam_introns_m[
                            _i, 1]] += bam_introns_m[_i, 2]
                idx = sp.where((bam_introns_p[:, 0] > start)
                               & (bam_introns_p[:, 1] < stop))[0]
                for _i in idx:
                    try:
                        intron_list[f_i][(
                            bam_introns_p[_i, 0] - start,
                            bam_introns_p[_i, 1] -
                            bam_introns_p[_i, 0])] += bam_introns_p[_i, 2]
                    except KeyError:
                        intron_list[f_i][(
                            bam_introns_p[_i, 0] - start,
                            bam_introns_p[_i, 1] -
                            bam_introns_p[_i, 0])] = bam_introns_p[_i, 2]
                    if intron_cov:
                        intron_counts[f_i, bam_introns_p[_i, 0]:bam_introns_p[
                            _i, 1]] += bam_introns_p[_i, 2]

    if collapsed:
        counts = sp.sum(counts, axis=0)
        intron_counts = sp.sum(intron_counts, axis=0)
        if intron_cnt:
            for f in range(1, len(files)):
                for intron in intron_list[f]:
                    try:
                        intron_list[0][intron] += intron_list[f][intron]
                    except KeyError:
                        intron_list[0][intron] = intron_list[f][intron]
            intron_list = intron_list[0]

    return (counts, intron_counts, intron_list)
Example #46
0
def sensor(door, xy0, xy1, t0, t1):
    """
    Compute the number of entries/exits through a door as a pedestrian
    sensor could do

    Parameters
    ----------
    door: numpy array
        door coordinates [x0,y0,x1,y1]
    t0: float
        time
    t1: float
        time
    xy0: numpy array
        people coordinates at time t0
    xy1: numpy array
        people coordinates at time t1

    Returns
    -------
    id: numpy array
        index of persons who go through the door
    p: numpy array
        coordinates of intersection points between the door and people trajectories
    io: numpy array
        the exit direction is the normal direction, 1 = exit, -1 = entry
    times: numpy array
        exit or entry times
    entries: int
        number of entries
    exits: int
        number of exits
    """
    #
    #   trajectories :
    #               xy0
    #                |
    #                 |
    #   door :     d0--p--d1
    #                   |
    #                   xy1
    d0 = sp.empty(xy0.shape)
    d0[:, 0] = door[0]
    d0[:, 1] = door[1]
    d1 = sp.empty(xy1.shape)
    d1[:, 0] = door[2]
    d1[:, 1] = door[3]
    T = sp.array([[0, -1], [1, 0]])
    vdoor = sp.atleast_2d(d1 - d0)
    vtraj = sp.atleast_2d(xy1 - xy0)
    v0 = sp.atleast_2d(d0 - xy0)
    dot_vdoor_T = sp.dot(vdoor, T)
    denom = sp.sum(dot_vdoor_T * vtraj, axis=1)
    num = sp.sum(dot_vdoor_T * v0, axis=1)
    # Intersection points
    # can be inf or nan if parallel lines...
    p = sp.atleast_2d(num / denom).T * vtraj + xy0
    # Test if the intersection point is on the door segment
    vp0 = sp.atleast_2d(p - d0)
    norm_vdoor_2 = sp.sum(vdoor * vdoor, axis=1)
    dot_vdoor_vp0 = sp.sum(vdoor * vp0, axis=1)
    is_p_in_door = (dot_vdoor_vp0 >= 0) * (dot_vdoor_vp0 <= norm_vdoor_2)
    # Test if the intersection point is on the person trajectory
    vpxy0 = sp.atleast_2d(p - xy0)
    norm_vtraj_2 = sp.sum(vtraj * vtraj, axis=1)
    dot_vtraj_vpxy0 = sp.sum(vtraj * vpxy0, axis=1)
    is_p_in_traj = (dot_vtraj_vpxy0 >= 0) * (dot_vtraj_vpxy0 <= norm_vtraj_2)
    # Keep only points on the door and on the trajectory
    is_p_intersect = is_p_in_door * is_p_in_traj
    id = sp.where(is_p_intersect == True)[0]
    # Test if the direction is the output normal : (d1-d0)_y , (d1-d0)_x
    vn = sp.empty(vdoor.shape)
    vn[:, 0] = vdoor[:, 1]
    vn[:, 1] = -vdoor[:, 0]
    dot_vn_vtraj = sp.sum(vn * vtraj, axis=1)
    is_normal_dir = (dot_vn_vtraj > 0)
    io = (is_normal_dir[id] == True) * 1 + (is_normal_dir[id] == False) * (-1)
    exits = sp.sum(io == 1)
    entries = sp.sum(io == -1)
    # Compute the distance from the intersection point p to xy0
    norm_vpxy0_2 = sp.sqrt(sp.sum(vpxy0 * vpxy0, axis=1))
    # Compute the distance from the intersection point p to xy1
    vpxy1 = sp.atleast_2d(p - xy1)
    norm_vpxy1_2 = sp.sqrt(sp.sum(vpxy1 * vpxy1, axis=1))
    # Compute the intersection time
    norm_vtraj = sp.sqrt(norm_vtraj_2)
    dt = t1 - t0
    times = t0 + (is_normal_dir==True)*(norm_vpxy0_2*dt/norm_vtraj) + \
            (is_normal_dir==False)*(norm_vpxy1_2*dt/norm_vtraj)
    return id, p[id, :], io, times[id], entries, exits
Example #47
0
def collect_events(CFG):

    ### which events do we call
    do_exon_skip = ('exon_skip' in CFG['event_types'])
    do_intron_retention = ('intron_retention' in CFG['event_types'])
    do_mult_exon_skip = ('mult_exon_skip' in CFG['event_types'])
    do_alt_3prime = ('alt_3prime' in CFG['event_types'])
    do_alt_5prime = ('alt_5prime' in CFG['event_types'])
    do_mutex_exons = ('mutex_exons' in CFG['event_types'])

    ### init empty event fields
    if do_intron_retention:
        intron_reten_pos = sp.zeros((len(CFG['replicate_idxs']), 1),
                                    dtype='object')
    if do_exon_skip:
        exon_skip_pos = sp.zeros((len(CFG['replicate_idxs']), 1),
                                 dtype='object')
    if do_alt_3prime or do_alt_5prime:
        alt_end_5prime_pos = sp.zeros((len(CFG['replicate_idxs']), 1),
                                      dtype='object')
        alt_end_3prime_pos = sp.zeros((len(CFG['replicate_idxs']), 1),
                                      dtype='object')
    if do_mult_exon_skip:
        mult_exon_skip_pos = sp.zeros((len(CFG['replicate_idxs']), 1),
                                      dtype='object')
    if do_mutex_exons:
        mutex_exons_pos = sp.zeros((len(CFG['replicate_idxs']), 1),
                                   dtype='object')

    validate_tag = ''
    if 'validate_splicegraphs' in CFG and CFG['validate_splicegraphs']:
        validate_tag = '.validated'

    for i in range(len(CFG['samples'])):
        if CFG['same_genestruct_for_all_samples'] == 1 and i == 1:
            break

        if i > 0:
            if do_intron_retention:
                intron_reten_pos = sp.c_[
                    intron_reten_pos,
                    sp.zeros((len(CFG['replicate_idxs']), 1), dtype='object')]
            if do_exon_skip:
                exon_skip_pos = sp.c_[
                    exon_skip_pos,
                    sp.zeros((len(CFG['replicate_idxs']), 1), dtype='object')]
            if do_alt_3prime:
                alt_end_3prime_pos = sp.c_[
                    alt_end_3prime_pos,
                    sp.zeros((len(CFG['replicate_idxs']), 1), dtype='object')]
            if do_alt_5prime:
                alt_end_5prime_pos = sp.c_[
                    alt_end_5prime_pos,
                    sp.zeros((len(CFG['replicate_idxs']), 1), dtype='object')]
            if do_mult_exon_skip:
                mult_exon_skip_pos = sp.c_[
                    mult_exon_skip_pos,
                    sp.zeros((len(CFG['replicate_idxs']), 1), dtype='object')]
            if do_mutex_exons:
                mutex_exons_pos = sp.c_[
                    mutex_exons_pos,
                    sp.zeros((len(CFG['replicate_idxs']), 1), dtype='object')]

        strain = CFG['strains'][i]

        for ridx in CFG['replicate_idxs']:
            if len(CFG['replicate_idxs']) > 1:
                rep_tag = '_R%i' % ridx
            else:
                rep_tag = ''

            if 'spladder_infile' in CFG:
                genes_fnames = CFG['spladder_infile']
            elif CFG['merge_strategy'] == 'single':
                genes_fnames = '%s/spladder/genes_graph_conf%i%s.%s.pickle' % (
                    CFG['out_dirname'], CFG['confidence_level'], rep_tag,
                    CFG['samples'][i])
            else:
                genes_fnames = '%s/spladder/genes_graph_conf%i%s.%s%s.pickle' % (
                    CFG['out_dirname'], CFG['confidence_level'], rep_tag,
                    CFG['merge_strategy'], validate_tag)

            ### define outfile names
            if CFG['merge_strategy'] == 'single':
                fn_out_ir = '%s/%s_intron_retention%s_C%i.pickle' % (
                    CFG['out_dirname'], CFG['samples'][i], rep_tag,
                    CFG['confidence_level'])
                fn_out_es = '%s/%s_exon_skip%s_C%i.pickle' % (
                    CFG['out_dirname'], CFG['samples'][i], rep_tag,
                    CFG['confidence_level'])
                fn_out_mes = '%s/%s_mult_exon_skip%s_C%i.pickle' % (
                    CFG['out_dirname'], CFG['samples'][i], rep_tag,
                    CFG['confidence_level'])
                fn_out_a5 = '%s/%s_alt_5prime%s_C%i.pickle' % (
                    CFG['out_dirname'], CFG['samples'][i], rep_tag,
                    CFG['confidence_level'])
                fn_out_a3 = '%s/%s_alt_3prime%s_C%i.pickle' % (
                    CFG['out_dirname'], CFG['samples'][i], rep_tag,
                    CFG['confidence_level'])
                fn_out_mex = '%s/%s_mutex_exons%s_C%i.pickle' % (
                    CFG['out_dirname'], CFG['samples'][i], rep_tag,
                    CFG['confidence_level'])
            else:
                fn_out_ir = '%s/%s_intron_retention%s_C%i.pickle' % (
                    CFG['out_dirname'], CFG['merge_strategy'], rep_tag,
                    CFG['confidence_level'])
                fn_out_es = '%s/%s_exon_skip%s_C%i.pickle' % (
                    CFG['out_dirname'], CFG['merge_strategy'], rep_tag,
                    CFG['confidence_level'])
                fn_out_mes = '%s/%s_mult_exon_skip%s_C%i.pickle' % (
                    CFG['out_dirname'], CFG['merge_strategy'], rep_tag,
                    CFG['confidence_level'])
                fn_out_a5 = '%s/%s_alt_5prime%s_C%i.pickle' % (
                    CFG['out_dirname'], CFG['merge_strategy'], rep_tag,
                    CFG['confidence_level'])
                fn_out_a3 = '%s/%s_alt_3prime%s_C%i.pickle' % (
                    CFG['out_dirname'], CFG['merge_strategy'], rep_tag,
                    CFG['confidence_level'])
                fn_out_mex = '%s/%s_mutex_exons%s_C%i.pickle' % (
                    CFG['out_dirname'], CFG['merge_strategy'], rep_tag,
                    CFG['confidence_level'])

            if do_intron_retention:
                intron_reten_pos[ridx, i] = []
            if do_exon_skip:
                exon_skip_pos[ridx, i] = []
            if do_mult_exon_skip:
                mult_exon_skip_pos[ridx, i] = []
            if do_alt_5prime:
                alt_end_5prime_pos[ridx, i] = []
            if do_alt_3prime:
                alt_end_3prime_pos[ridx, i] = []
            if do_mutex_exons:
                mutex_exons_pos[ridx, i] = []

            print '\nconfidence %i / sample %i / replicate %i' % (
                CFG['confidence_level'], i, ridx)

            if os.path.exists(genes_fnames):
                print 'Loading gene structure from %s ...' % genes_fnames
                (genes, inserted) = cPickle.load(open(genes_fnames, 'r'))
                print '... done.'

                if not 'chrm_lookup' in CFG:
                    CFG = append_chrms(
                        sp.unique(sp.array([x.chr for x in genes],
                                           dtype='str')), CFG)

                ### detect intron retentions from splicegraph
                if do_intron_retention:
                    if not os.path.exists(fn_out_ir):
                        idx_intron_reten, intron_intron_reten = detect_events(
                            genes, 'intron_retention',
                            sp.where([x.is_alt for x in genes])[0], CFG)
                        for k in range(len(idx_intron_reten)):
                            gene = genes[idx_intron_reten[k]]

                            ### perform liftover between strains if necessary
                            exons = gene.splicegraph.vertices
                            if not 'reference_strain' in CFG:
                                exons_col = exons
                                exons_col_pos = exons
                            else:
                                exons_col = convert_strain_pos_intervals(
                                    gene.chr, gene.splicegraph.vertices.T,
                                    strain, CFG['reference_strain']).T
                                exons_col_pos = convert_strain_pos(
                                    gene.chr, gene.splicegraph.vertices.T,
                                    strain, CFG['reference_strain']).T
                            if exons_col.shape != exons_col_pos.shape:
                                print 'skipping non-mappable intron retention event'
                                continue

                            ### build intron retention data structure
                            event = Event('intron_retention', gene.chr,
                                          gene.strand)
                            event.strain = sp.array([strain])
                            event.exons1 = sp.c_[
                                exons[:, intron_intron_reten[k][0]],
                                exons[:, intron_intron_reten[k][1]]].T
                            event.exons2 = sp.array([
                                exons[:, intron_intron_reten[k][0]][0],
                                exons[:, intron_intron_reten[k][1]][1]
                            ])
                            #event.exons2 = exons[:, intron_intron_reten[k][2]]
                            event.exons1_col = sp.c_[
                                exons_col[:, intron_intron_reten[k][0]],
                                exons_col[:, intron_intron_reten[k][1]]]
                            event.exons2_col = sp.array([
                                exons_col[:, intron_intron_reten[k][0]][0],
                                exons_col[:, intron_intron_reten[k][1]][1]
                            ])
                            #event.exons2_col = exons_col[:, intron_intron_reten[k][2]]
                            event.gene_name = sp.array([gene.name])
                            event.gene_idx = idx_intron_reten[k]
                            #event.transcript_type = sp.array([gene.transcript_type])
                            intron_reten_pos[ridx, i].append(event)
                    else:
                        print '%s already exists' % fn_out_ir

                ### detect exon_skips from splicegraph
                if do_exon_skip:
                    if not os.path.exists(fn_out_es):
                        idx_exon_skip, exon_exon_skip = detect_events(
                            genes, 'exon_skip',
                            sp.where([x.is_alt for x in genes])[0], CFG)
                        for k in range(len(idx_exon_skip)):
                            gene = genes[idx_exon_skip[k]]

                            ### perform liftover between strains if necessary
                            exons = gene.splicegraph.vertices
                            if not 'reference_strain' in CFG:
                                exons_col = exons
                                exons_col_pos = exons
                            else:
                                exons_col = convert_strain_pos_intervals(
                                    gene.chr, gene.splicegraph.vertices.T,
                                    strain, CFG['reference_strain']).T
                                exons_col_pos = convert_strain_pos(
                                    gene.chr, gene.splicegraph.vertices.T,
                                    strain, CFG['reference_strain']).T
                            if exons_col.shape != exons_col_pos.shape:
                                print 'skipping non-mappable exon_skip event'
                                continue

                            ### build exon skip data structure
                            event = Event('exon_skip', gene.chr, gene.strand)
                            event.strain = sp.array([strain])
                            event.exons1 = sp.c_[exons[:,
                                                       exon_exon_skip[k][0]],
                                                 exons[:,
                                                       exon_exon_skip[k][2]]].T
                            event.exons2 = sp.c_[exons[:,
                                                       exon_exon_skip[k][0]],
                                                 exons[:,
                                                       exon_exon_skip[k][1]],
                                                 exons[:,
                                                       exon_exon_skip[k][2]]].T
                            event.exons1_col = sp.c_[
                                exons_col[:, exon_exon_skip[k][0]],
                                exons_col[:, exon_exon_skip[k][2]]].T
                            event.exons2_col = sp.c_[
                                exons_col[:, exon_exon_skip[k][0]],
                                exons_col[:, exon_exon_skip[k][1]],
                                exons_col[:, exon_exon_skip[k][2]]].T
                            event.gene_name = sp.array([gene.name])
                            event.gene_idx = idx_exon_skip[k]
                            #event.transcript_type = sp.array([gene.transcript_type])
                            exon_skip_pos[ridx, i].append(event)
                    else:
                        print '%s already exists' % fn_out_es

                ### detect alternative intron_ends from splicegraph
                if do_alt_3prime or do_alt_5prime:
                    if not os.path.exists(fn_out_a5) or not os.path.exists(
                            fn_out_a3):
                        idx_alt_end_5prime, exon_alt_end_5prime, idx_alt_end_3prime, exon_alt_end_3prime = detect_events(
                            genes, 'alt_prime',
                            sp.where([x.is_alt for x in genes])[0], CFG)
                        ### handle 5 prime events
                        for k in range(len(idx_alt_end_5prime)):
                            gene = genes[idx_alt_end_5prime[k]]

                            ### perform liftover between strains if necessary
                            exons = gene.splicegraph.vertices
                            if not 'reference_strain' in CFG:
                                exons_col = exons
                                exons_col_pos = exons
                            else:
                                exons_col = convert_strain_pos_intervals(
                                    gene.chr, gene.splicegraph.vertices.T,
                                    strain, CFG['reference_strain']).T
                                exons_col_pos = convert_strain_pos(
                                    gene.chr, gene.splicegraph.vertices.T,
                                    strain, CFG['reference_strain']).T
                            if exons_col.shape != exons_col_pos.shape:
                                print 'skipping non-mappable alt 5-prime event'
                                continue

                            for k1 in range(
                                    len(exon_alt_end_5prime[k]
                                        ['fiveprimesites']) - 1):
                                for k2 in range(
                                        k1 + 1,
                                        len(exon_alt_end_5prime[k]
                                            ['fiveprimesites'])):

                                    exon_alt1_col = exons_col[:,
                                                              exon_alt_end_5prime[
                                                                  k]
                                                              ['fiveprimesites']
                                                              [k1]].T
                                    exon_alt2_col = exons_col[:,
                                                              exon_alt_end_5prime[
                                                                  k]
                                                              ['fiveprimesites']
                                                              [k2]].T

                                    ### check if exons overlap
                                    if (exon_alt1_col[0] >= exon_alt2_col[1]
                                        ) or (exon_alt1_col[1] <=
                                              exon_alt2_col[0]):
                                        continue

                                    event = Event('alt_5prime', gene.chr,
                                                  gene.strand)
                                    event.strain = sp.array([strain])
                                    if gene.strand == '+':
                                        event.exons1 = sp.c_[
                                            exons[:, exon_alt_end_5prime[k]
                                                  ['fiveprimesites'][k1]],
                                            exons[:, exon_alt_end_5prime[k]
                                                  ['threeprimesite']]].T
                                        event.exons2 = sp.c_[
                                            exons[:, exon_alt_end_5prime[k]
                                                  ['fiveprimesites'][k2]],
                                            exons[:, exon_alt_end_5prime[k]
                                                  ['threeprimesite']]].T
                                        event.exons1_col = sp.c_[
                                            exons_col[:, exon_alt_end_5prime[k]
                                                      ['fiveprimesites'][k1]],
                                            exons_col[:, exon_alt_end_5prime[k]
                                                      ['threeprimesite']]].T
                                        event.exons2_col = sp.c_[
                                            exons_col[:, exon_alt_end_5prime[k]
                                                      ['fiveprimesites'][k2]],
                                            exons_col[:, exon_alt_end_5prime[k]
                                                      ['threeprimesite']]].T
                                    else:
                                        event.exons1 = sp.c_[
                                            exons[:, exon_alt_end_5prime[k]
                                                  ['threeprimesite']],
                                            exons[:, exon_alt_end_5prime[k]
                                                  ['fiveprimesites'][k1]]].T
                                        event.exons2 = sp.c_[
                                            exons[:, exon_alt_end_5prime[k]
                                                  ['threeprimesite']],
                                            exons[:, exon_alt_end_5prime[k]
                                                  ['fiveprimesites'][k2]]].T
                                        event.exons1_col = sp.c_[
                                            exons_col[:, exon_alt_end_5prime[k]
                                                      ['threeprimesite']],
                                            exons_col[:, exon_alt_end_5prime[
                                                k]['fiveprimesites'][k1]]].T
                                        event.exons2_col = sp.c_[
                                            exons_col[:, exon_alt_end_5prime[k]
                                                      ['threeprimesite']],
                                            exons_col[:, exon_alt_end_5prime[
                                                k]['fiveprimesites'][k2]]].T
                                    event.gene_name = sp.array([gene.name])
                                    event.gene_idx = idx_alt_end_5prime[k]

                                    ### assert that first isoform is always the shorter one
                                    if sp.sum(event.exons1[:, 1] -
                                              event.exons1[:, 0]) > sp.sum(
                                                  event.exons2[:, 1] -
                                                  event.exons2[:, 0]):
                                        _tmp = event.exons1.copy()
                                        event.exons1 = event.exons2.copy()
                                        event.exons2 = _tmp
                                    #event.transcript_type = sp.array([gene.transcript_type])
                                    if do_alt_5prime:
                                        alt_end_5prime_pos[ridx,
                                                           i].append(event)

                        ### handle 3 prime events
                        for k in range(len(idx_alt_end_3prime)):
                            gene = genes[idx_alt_end_3prime[k]]

                            ### perform liftover between strains if necessary
                            exons = gene.splicegraph.vertices
                            if not 'reference_strain' in CFG:
                                exons_col = exons
                                exons_col_pos = exons
                            else:
                                exons_col = convert_strain_pos_intervals(
                                    gene.chr, gene.splicegraph.vertices.T,
                                    strain, CFG['reference_strain']).T
                                exons_col_pos = convert_strain_pos(
                                    gene.chr, gene.splicegraph.vertices.T,
                                    strain, CFG['reference_strain']).T
                            if exons_col.shape != exons_col_pos.shape:
                                print 'skipping non-mappable alt 3-prime event'
                                continue

                            for k1 in range(
                                    len(exon_alt_end_3prime[k]
                                        ['threeprimesites']) - 1):
                                for k2 in range(
                                        k1 + 1,
                                        len(exon_alt_end_3prime[k]
                                            ['threeprimesites'])):

                                    exon_alt1_col = exons_col[:,
                                                              exon_alt_end_3prime[
                                                                  k]
                                                              ['threeprimesites']
                                                              [k1]].T
                                    exon_alt2_col = exons_col[:,
                                                              exon_alt_end_3prime[
                                                                  k]
                                                              ['threeprimesites']
                                                              [k2]].T

                                    ### check if exons overlap
                                    if (exon_alt1_col[0] >= exon_alt2_col[1]
                                        ) or (exon_alt1_col[1] <=
                                              exon_alt2_col[0]):
                                        continue

                                    event = Event('alt_3prime', gene.chr,
                                                  gene.strand)
                                    event.strain = sp.array([strain])
                                    if gene.strand == '+':
                                        event.exons1 = sp.c_[
                                            exons[:, exon_alt_end_3prime[k]
                                                  ['threeprimesites'][k1]],
                                            exons[:, exon_alt_end_3prime[k]
                                                  ['fiveprimesite']]].T
                                        event.exons2 = sp.c_[
                                            exons[:, exon_alt_end_3prime[k]
                                                  ['threeprimesites'][k2]],
                                            exons[:, exon_alt_end_3prime[k]
                                                  ['fiveprimesite']]].T
                                        event.exons1_col = sp.c_[
                                            exons_col[:, exon_alt_end_3prime[k]
                                                      ['threeprimesites'][k1]],
                                            exons_col[:, exon_alt_end_3prime[k]
                                                      ['fiveprimesite']]].T
                                        event.exons2_col = sp.c_[
                                            exons_col[:, exon_alt_end_3prime[k]
                                                      ['threeprimesites'][k2]],
                                            exons_col[:, exon_alt_end_3prime[k]
                                                      ['fiveprimesite']]].T
                                    else:
                                        event.exons1 = sp.c_[
                                            exons[:, exon_alt_end_3prime[k]
                                                  ['fiveprimesite']],
                                            exons[:, exon_alt_end_3prime[k]
                                                  ['threeprimesites'][k1]]].T
                                        event.exons2 = sp.c_[
                                            exons[:, exon_alt_end_3prime[k]
                                                  ['fiveprimesite']],
                                            exons[:, exon_alt_end_3prime[k]
                                                  ['threeprimesites'][k2]]].T
                                        event.exons1_col = sp.c_[
                                            exons_col[:, exon_alt_end_3prime[k]
                                                      ['fiveprimesite']],
                                            exons_col[:, exon_alt_end_3prime[
                                                k]['threeprimesites'][k1]]].T
                                        event.exons2_col = sp.c_[
                                            exons_col[:, exon_alt_end_3prime[k]
                                                      ['fiveprimesite']],
                                            exons_col[:, exon_alt_end_3prime[
                                                k]['threeprimesites'][k2]]].T
                                    event.gene_name = sp.array([gene.name])
                                    event.gene_idx = idx_alt_end_3prime[k]

                                    ### assert that first isoform is always the shorter one
                                    if sp.sum(event.exons1[:, 1] -
                                              event.exons1[:, 0]) > sp.sum(
                                                  event.exons2[:, 1] -
                                                  event.exons2[:, 0]):
                                        _tmp = event.exons1.copy()
                                        event.exons1 = event.exons2.copy()
                                        event.exons2 = _tmp

                                    #event.transcript_type = sp.array([gene.transcript_type])
                                    if do_alt_3prime:
                                        alt_end_3prime_pos[ridx,
                                                           i].append(event)
                    else:
                        print '%s and %s already exists' % (fn_out_a5,
                                                            fn_out_a3)

                ### detect multiple_exon_skips from splicegraph
                if do_mult_exon_skip:
                    if not os.path.exists(fn_out_mes):
                        idx_mult_exon_skip, exon_mult_exon_skip = detect_events(
                            genes, 'mult_exon_skip',
                            sp.where([x.is_alt for x in genes])[0], CFG)
                        for k, gidx in enumerate(idx_mult_exon_skip):
                            gene = genes[gidx]

                            ### perform liftover between strains if necessary
                            exons = gene.splicegraph.vertices
                            if not 'reference_strain' in CFG:
                                exons_col = exons
                                exons_col_pos = exons
                            else:
                                exons_col = convert_strain_pos_intervals(
                                    gene.chr, gene.splicegraph.vertices.T,
                                    strain, CFG['reference_strain']).T
                                exons_col_pos = convert_strain_pos(
                                    gene.chr, gene.splicegraph.vertices.T,
                                    strain, CFG['reference_strain']).T
                            if exons_col.shape != exons_col_pos.shape:
                                print 'skipping non-mappable multiple exon skip event'
                                continue

                            ### build multiple exon skip data structure
                            event = Event('mult_exon_skip', gene.chr,
                                          gene.strand)
                            event.strain = sp.array([strain])
                            event.exons1 = sp.c_[
                                exons[:, exon_mult_exon_skip[k][0]],
                                exons[:, exon_mult_exon_skip[k][2]]].T
                            event.exons2 = sp.c_[
                                exons[:, exon_mult_exon_skip[k][0]],
                                exons[:, exon_mult_exon_skip[k][1]],
                                exons[:, exon_mult_exon_skip[k][2]]].T
                            event.exons1_col = sp.c_[
                                exons_col[:, exon_mult_exon_skip[k][0]],
                                exons_col[:, exon_mult_exon_skip[k][2]]].T
                            event.exons2_col = sp.c_[
                                exons_col[:, exon_mult_exon_skip[k][0]],
                                exons_col[:, exon_mult_exon_skip[k][1]],
                                exons_col[:, exon_mult_exon_skip[k][2]]].T
                            event.gene_name = sp.array([gene.name])
                            event.gene_idx = gidx
                            #event.transcript_type = sp.array([gene.transcript_type])
                            mult_exon_skip_pos[ridx, i].append(event)
                    else:
                        print '%s already exists' % fn_out_mes

                ### detect mutually exclusive exons from splicegraph
                if do_mutex_exons:
                    if not os.path.exists(fn_out_mex):
                        idx_mutex_exons, exon_mutex_exons = detect_events(
                            genes, 'mutex_exons',
                            sp.where([x.is_alt for x in genes])[0], CFG)
                        if len(idx_mutex_exons) > 0:
                            for k in range(len(exon_mutex_exons)):
                                gene = genes[idx_mutex_exons[k]]

                                ### perform liftover between strains if necessary
                                exons = gene.splicegraph.vertices
                                if not 'reference_strain' in CFG:
                                    exons_col = exons
                                    exons_col_pos = exons
                                else:
                                    exons_col = convert_strain_pos_intervals(
                                        gene.chr, gene.splicegraph.vertices.T,
                                        strain, CFG['reference_strain']).T
                                    exons_col_pos = convert_strain_pos(
                                        gene.chr, gene.splicegraph.vertices.T,
                                        strain, CFG['reference_strain']).T

                                if exons_col.shape != exons_col_pos.shape:
                                    print 'skipping non-mappable mutex exons event'
                                    continue

                                ### build data structure for mutually exclusive exons
                                event = Event('mutex_exons', gene.chr,
                                              gene.strand)
                                event.strain = sp.array([strain])
                                event.exons1 = sp.c_[
                                    exons[:, exon_mutex_exons[k][0]],
                                    exons[:, exon_mutex_exons[k][1]],
                                    exons[:, exon_mutex_exons[k][3]]].T
                                event.exons2 = sp.c_[
                                    exons[:, exon_mutex_exons[k][0]],
                                    exons[:, exon_mutex_exons[k][2]],
                                    exons[:, exon_mutex_exons[k][3]]].T
                                event.exons1_col = sp.c_[
                                    exons_col[:, exon_mutex_exons[k][0]],
                                    exons_col[:, exon_mutex_exons[k][1]],
                                    exons_col[:, exon_mutex_exons[k][3]]].T
                                event.exons2_col = sp.c_[
                                    exons_col[:, exon_mutex_exons[k][0]],
                                    exons_col[:, exon_mutex_exons[k][2]],
                                    exons_col[:, exon_mutex_exons[k][3]]].T
                                event.gene_name = sp.array([gene.name])
                                event.gene_idx = idx_mutex_exons[k]
                                #event.transcript_type = sp.array([gene.transcript_type])
                                mutex_exons_pos[ridx, i].append(event)
                    else:
                        print '%s already exists' % fn_out_mex

            ### genes file does not exist
            else:
                print 'result file not found: %s' % genes_fnames

    ### combine events for all samples
    for ridx in CFG['replicate_idxs']:

        ################################################%
        ### COMBINE INTRON RETENTIONS
        ################################################%
        if do_intron_retention:
            if not os.path.exists(fn_out_ir):
                intron_reten_pos_all = sp.array([
                    item for sublist in intron_reten_pos[ridx, :]
                    for item in sublist
                ])

                ### post process event structure by sorting and making events unique
                events_all = post_process_event_struct(intron_reten_pos_all,
                                                       CFG)

                ### store intron retentions
                print 'saving intron retentions to %s' % fn_out_ir
                cPickle.dump(events_all, open(fn_out_ir, 'w'), -1)
            else:
                print '%s already exists' % fn_out_ir

        ################################################%
        ### COMBINE EXON SKIPS
        ################################################%
        if do_exon_skip:
            if not os.path.exists(fn_out_es):
                exon_skip_pos_all = sp.array([
                    item for sublist in exon_skip_pos[ridx, :]
                    for item in sublist
                ])

                ### post process event structure by sorting and making events unique
                events_all = post_process_event_struct(exon_skip_pos_all, CFG)

                ### store exon skip events
                print 'saving exon skips to %s' % fn_out_es
                cPickle.dump(events_all, open(fn_out_es, 'w'), -1)
            else:
                print '%s already exists' % fn_out_es

        ################################################%
        ### COMBINE MULTIPLE EXON SKIPS
        ################################################%
        if do_mult_exon_skip:
            if not os.path.exists(fn_out_mes):
                mult_exon_skip_pos_all = sp.array([
                    item for sublist in mult_exon_skip_pos[ridx, :]
                    for item in sublist
                ])

                ### post process event structure by sorting and making events unique
                events_all = post_process_event_struct(mult_exon_skip_pos_all,
                                                       CFG)

                ### store multiple exon skip events
                print 'saving multiple exon skips to %s' % fn_out_mes
                cPickle.dump(events_all, open(fn_out_mes, 'w'), -1)
            else:
                print '%s already exists' % fn_out_mes

        ################################################%
        ### COMBINE ALT FIVE PRIME EVENTS
        ################################################%
        if do_alt_5prime:
            if not os.path.exists(fn_out_a5):
                alt_end_5prime_pos_all = sp.array([
                    item for sublist in alt_end_5prime_pos[ridx, :]
                    for item in sublist
                ])

                ### post process event structure by sorting and making events unique
                events_all = post_process_event_struct(alt_end_5prime_pos_all,
                                                       CFG)

                ### curate alt prime events
                ### cut to min len, if alt exon lengths differ
                ### remove, if alt exons do not overlap
                if CFG['curate_alt_prime_events']:
                    events_all = curate_alt_prime(events_all, CFG)

                ### store alt 5 prime events
                print 'saving alt 5 prime events to %s' % fn_out_a5
                cPickle.dump(events_all, open(fn_out_a5, 'w'), -1)
            else:
                print '%s already exists' % fn_out_a5

        ################################################%
        ### COMBINE ALT THREE PRIME EVENTS
        ################################################%
        if do_alt_3prime:
            if not os.path.exists(fn_out_a3):
                alt_end_3prime_pos_all = sp.array([
                    item for sublist in alt_end_3prime_pos[ridx, :]
                    for item in sublist
                ])
                ### post process event structure by sorting and making events unique
                events_all = post_process_event_struct(alt_end_3prime_pos_all,
                                                       CFG)

                ### curate alt prime events
                ### cut to min len, if alt exon lengths differ
                ### remove, if alt exons do not overlap
                if CFG['curate_alt_prime_events']:
                    events_all = curate_alt_prime(events_all, CFG)

                ### store alt 3 prime events
                print 'saving alt 3 prime events to %s' % fn_out_a3
                cPickle.dump(events_all, open(fn_out_a3, 'w'), -1)
            else:
                print '%s already exists' % fn_out_a3

        ################################################%
        ### COMBINE MUTUALLY EXCLUSIVE EXONS
        ################################################%
        if do_mutex_exons:
            if not os.path.exists(fn_out_mex):
                mutex_exons_pos_all = sp.array([
                    item for sublist in mutex_exons_pos[ridx, :]
                    for item in sublist
                ])

                ### post process event structure by sorting and making events unique
                events_all = post_process_event_struct(mutex_exons_pos_all,
                                                       CFG)

                ### store multiple exon skip events
                print 'saving mutually exclusive exons to %s' % fn_out_mex
                cPickle.dump(events_all, open(fn_out_mex, 'w'), -1)
            else:
                print '%s already exists' % fn_out_mex
Example #48
0
def main():

    ### get command line options
    options = parse_options(sys.argv)

    ### parse parameters from options object
    CFG = settings.parse_args(options, identity='test')

    ### generate output directory
    outdir = os.path.join(options.outdir, 'testing')
    if options.timestamp == 'y':
        outdir = '%s_%s' % (outdir, str(datetime.datetime.now()).replace(
            ' ', '_'))

    if options.labelA != 'condA' and options.labelB != 'condB':
        outdir = '%s_%s_vs_%s' % (outdir, options.labelA, options.labelB)
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    if CFG['debug']:

        print "Generating simulated dataset"

        npr.seed(23)
        CFG['is_matlab'] = False
        #cov = npr.permutation(20000-20).astype('float').reshape(999, 20)
        #cov = sp.r_[cov, sp.c_[sp.ones((1, 10)) *10, sp.ones((1, 10)) * 500000] + npr.normal(10, 1, 20)]
        #sf = sp.ones((cov.shape[1], ), dtype='float')

        setsize = 50
        ### diff event counts
        cov = sp.zeros((500, 2 * setsize), dtype='int')
        for i in range(10):
            cov[i, :setsize] = nbinom.rvs(30, 0.8, size=setsize)
            cov[i, setsize:] = nbinom.rvs(10, 0.8, size=setsize)
        for i in range(10, cov.shape[0]):
            cov[i, :] = nbinom.rvs(30, 0.8, size=2 * setsize)

        ### diff gene expression
        cov2 = sp.zeros((500, 2 * setsize), dtype='int')
        for i in range(20):
            cov2[i, :setsize] = nbinom.rvs(2000, 0.2, size=setsize)
            cov2[i, setsize:] = nbinom.rvs(2000, 0.3, size=setsize)
        for i in range(20, cov2.shape[0]):
            cov2[i, :] = nbinom.rvs(2000, 0.3, size=2 * setsize)

        cov = sp.c_[cov, cov2] * 10000

        tidx = sp.arange(setsize)

        sf = npr.uniform(0, 5, 2 * setsize)
        sf = sp.r_[sf, sf]

        #dmatrix0 = sp.ones((cov.shape[1], 3), dtype='bool')
        dmatrix1 = sp.zeros((cov.shape[1], 4), dtype='float')
        dmatrix1[:, 0] = 1
        dmatrix1[tidx, 1] = 1
        #dmatrix1[tidx, 2] = 1
        dmatrix1[tidx + (2 * setsize), 2] = 1
        dmatrix1[(2 * setsize):, 3] = 1
        #dmatrix1[:, 4] = sp.log(sf)
        dmatrix0 = dmatrix1[:, [0, 2, 3]]

        cov = cov * sf
        #sf = sp.ones((cov.shape[1], ), dtype='float')

        pvals = run_testing(cov, dmatrix0, dmatrix1, sf, CFG)
        pvals_adj = adj_pval(pvals, CFG)
        pdb.set_trace()
    else:
        val_tag = ''
        if CFG['validate_splicegraphs']:
            val_tag = '.validated'

        if CFG['is_matlab']:
            CFG['fname_genes'] = os.path.join(
                CFG['out_dirname'], 'spladder', 'genes_graph_conf%i.%s%s.mat' %
                (CFG['confidence_level'], CFG['merge_strategy'], val_tag))
            CFG['fname_count_in'] = os.path.join(
                CFG['out_dirname'], 'spladder',
                'genes_graph_conf%i.%s%s.count.mat' %
                (CFG['confidence_level'], CFG['merge_strategy'], val_tag))
        else:
            CFG['fname_genes'] = os.path.join(
                CFG['out_dirname'], 'spladder',
                'genes_graph_conf%i.%s%s.pickle' %
                (CFG['confidence_level'], CFG['merge_strategy'], val_tag))
            CFG['fname_count_in'] = os.path.join(
                CFG['out_dirname'], 'spladder',
                'genes_graph_conf%i.%s%s.count.pickle' %
                (CFG['confidence_level'], CFG['merge_strategy'], val_tag))

        condition_strains = None
        CFG['fname_exp_hdf5'] = os.path.join(
            CFG['out_dirname'], 'spladder',
            'genes_graph_conf%i.%s%s.gene_exp.hdf5' %
            (CFG['confidence_level'], CFG['merge_strategy'], val_tag))
        if os.path.exists(CFG['fname_exp_hdf5']):
            if CFG['verbose']:
                print 'Loading expression counts from %s' % CFG[
                    'fname_exp_hdf5']
            IN = h5py.File(CFG['fname_exp_hdf5'], 'r')
            gene_counts = IN['raw_count'][:]
            gene_strains = IN['strains'][:]
            gene_ids = IN['genes'][:]
            IN.close()
        else:
            if options.subset_samples == 'y':
                condition_strains = sp.unique(
                    sp.r_[sp.array(CFG['conditionA']),
                          sp.array(CFG['conditionB'])])
                CFG['fname_exp_hdf5'] = os.path.join(
                    CFG['out_dirname'], 'spladder',
                    'genes_graph_conf%i.%s%s.gene_exp.%i.hdf5' %
                    (CFG['confidence_level'], CFG['merge_strategy'], val_tag,
                     hash(tuple(sp.unique(condition_strains))) * -1))
            if os.path.exists(CFG['fname_exp_hdf5']):
                if CFG['verbose']:
                    print 'Loading expression counts from %s' % CFG[
                        'fname_exp_hdf5']
                IN = h5py.File(CFG['fname_exp_hdf5'], 'r')
                gene_counts = IN['raw_count'][:]
                gene_strains = IN['strains'][:]
                gene_ids = IN['genes'][:]
                IN.close()
            else:
                gene_counts, gene_strains, gene_ids = get_gene_expression(
                    CFG,
                    fn_out=CFG['fname_exp_hdf5'],
                    strain_subset=condition_strains)

        gene_strains = sp.array(
            [x.split(':')[1] if ':' in x else x for x in gene_strains])

        ### estimate size factors for library size normalization
        sf = get_size_factors(gene_counts, CFG)

        ### get index of samples for difftest
        idx1 = sp.where(sp.in1d(gene_strains, CFG['conditionA']))[0]
        idx2 = sp.where(sp.in1d(gene_strains, CFG['conditionB']))[0]

        ### for TESTING
        #setsize = 100
        #idx1 = sp.arange(0, setsize / 2)
        #idx2 = sp.arange(setsize / 2, setsize)

        ### subset expression counts to tested samples
        gene_counts = gene_counts[:, sp.r_[idx1, idx2]]
        sf = sf[sp.r_[idx1, idx2]]
        sf = sp.r_[sf, sf]

        ### test each event type individually
        for event_type in CFG['event_types']:

            if CFG['verbose']:
                print 'Testing %s events' % event_type

            CFG['fname_events'] = os.path.join(
                CFG['out_dirname'], 'merge_graphs_%s_C%i.counts.hdf5' %
                (event_type, CFG['confidence_level']))

            ### quantify events
            (cov, gene_idx, event_idx,
             event_strains) = quantify.quantify_from_counted_events(
                 CFG['fname_events'], sp.r_[idx1, idx2], event_type, CFG)

            assert (sp.all(gene_strains == event_strains))

            ### map gene expression to event order
            curr_gene_counts = gene_counts[gene_idx, :]

            ### filter for min expression
            if event_type == 'intron_retention':
                k_idx = sp.where(
                    (sp.mean(cov[0] == 0, axis=1) < CFG['max_0_frac'])
                    | (sp.mean(cov[1] == 0, axis=1) < CFG['max_0_frac']))[0]
            else:
                k_idx = sp.where(
                    ((sp.mean(cov[0] == 0, axis=1) < CFG['max_0_frac'])
                     | (sp.mean(cov[1] == 0, axis=1) < CFG['max_0_frac']))
                    & (sp.mean(sp.c_[cov[0][:, :idx1.shape[0]],
                                     cov[1][:, :idx1.shape[0]]] == 0,
                               axis=1) < CFG['max_0_frac'])
                    & (sp.mean(sp.c_[cov[0][:, idx2.shape[0]:],
                                     cov[1][:, idx2.shape[0]:]] == 0,
                               axis=1) < CFG['max_0_frac']))[0]
            if CFG['verbose']:
                print 'Exclude %i of %i %s events (%.2f percent) from testing due to low coverage' % (
                    cov[0].shape[0] - k_idx.shape[0], cov[0].shape[0],
                    event_type,
                    (1 - float(k_idx.shape[0]) / cov[0].shape[0]) * 100)
            if k_idx.shape[0] == 0:
                print 'All events of type %s were filtered out due to low coverage. Please try re-running with less stringent filter criteria' % event_type
                continue
        # k_idx = sp.where((sp.mean(sp.c_[cov[0], cov[1]], axis=1) > 2))[0]
        # k_idx = sp.where((sp.mean(cov[0], axis=1) > 2) & (sp.mean(cov[1], axis=1) > 2))[0]
            cov[0] = cov[0][k_idx, :]
            cov[1] = cov[1][k_idx, :]
            curr_gene_counts = curr_gene_counts[k_idx, :]
            event_idx = event_idx[k_idx]
            gene_idx = gene_idx[k_idx]

            cov[0] = sp.around(sp.hstack([cov[0], curr_gene_counts]))
            cov[1] = sp.around(sp.hstack([cov[1], curr_gene_counts]))
            cov = sp.vstack(cov)

            tidx = sp.arange(idx1.shape[0])

            #if CFG['debug']:
            #    for i in range(cov.shape[0]):
            #        fig = plt.figure(figsize=(8, 6), dpi=100)
            #        ax = fig.add_subplot(111)
            #        ax.hist(cov[i, :] * sf, 50, histtype='bar', rwidth=0.8)
            #        #ax.plot(sp.arange(cov.shape[1]), sorted(cov[i, :]), 'bo')
            #        ax.set_title('Count Distribution - Sample %i' % i )
            #        plt.savefig('count_dist.%i.pdf' % i, format='pdf', bbox_inches='tight')
            #        plt.close(fig)

            ### build design matrix for testing
            dmatrix1 = sp.zeros((cov.shape[1], 4), dtype='bool')
            dmatrix1[:, 0] = 1  # intercept
            dmatrix1[tidx, 1] = 1  # delta a
            dmatrix1[tidx, 2] = 1  # delta g
            dmatrix1[tidx + (idx1.shape[0] + idx2.shape[0]), 2] = 1  # delta g
            dmatrix1[(idx1.shape[0] + idx2.shape[0]):, 3] = 1  # is g
            dmatrix0 = dmatrix1[:, [0, 2, 3]]

            pvals = run_testing(cov, dmatrix0, dmatrix1, sf, CFG)
            pvals_adj = adj_pval(pvals, CFG)

            ### write output
            out_fname = os.path.join(
                outdir,
                'test_results_C%i_%s.tsv' % (options.confidence, event_type))
            if CFG['verbose']:
                print 'Writing test results to %s' % out_fname
            s_idx = sp.argsort(pvals_adj)
            header = sp.array(['event_id', 'gene', 'p_val', 'p_val_adj'])
            event_ids = sp.array(
                ['%s_%i' % (event_type, i + 1) for i in event_idx],
                dtype='str')
            if CFG['is_matlab']:
                data_out = sp.c_[event_ids[s_idx], gene_ids[gene_idx[s_idx],
                                                            0],
                                 pvals[s_idx].astype('str'),
                                 pvals_adj[s_idx].astype('str')]
            else:
                data_out = sp.c_[event_ids[s_idx], gene_ids[gene_idx[s_idx]],
                                 pvals[s_idx].astype('str'),
                                 pvals_adj[s_idx].astype('str')]
            data_out = sp.r_[header[sp.newaxis, :], data_out]
            sp.savetxt(out_fname, data_out, delimiter='\t', fmt='%s')
Example #49
0
def get_gene_expression(CFG, fn_out=None, strain_subset=None):

    if CFG['verbose']:
        sys.stdout.write('Quantifying gene expression ...\n')

    ### load gene information
    if CFG['is_matlab']:
        genes = scio.loadmat(CFG['fname_genes'],
                             struct_as_record=False)['genes'][0, :]
        numgenes = len(genes)
    else:
        genes = cPickle.load(open(CFG['fname_genes'], 'r'))[0]
        numgenes = genes.shape[0]

    ### open hdf5 file containing graph count information
    IN = h5py.File(CFG['fname_count_in'], 'r')
    strains = IN['strains'][:].astype('str')
    if strain_subset is None:
        strain_idx = sp.arange(strains.shape[0])
    else:
        strain_idx = sp.where(sp.in1d(strains, strain_subset))[0]
    gene_counts = sp.zeros((numgenes, strain_idx.shape[0]), dtype='float')
    gene_names = sp.array([x.name for x in genes], dtype='str')

    if CFG['is_matlab']:
        seg_lens = IN['seg_len'][:, 0]
        gene_ids_segs = IN['gene_ids_segs'][0, :].astype('int') - 1
    else:
        seg_lens = IN['seg_len'][:]
        gene_ids_segs = IN['gene_ids_segs'][:].astype('int')

    ### no longer assume that the gene_ids_segs are sorted by gene ID
    s_idx = sp.argsort(gene_ids_segs[:, 0], kind='mergesort')
    _, u_idx = sp.unique(gene_ids_segs[s_idx, 0], return_index=True)
    s_idx = s_idx[u_idx]

    ### iterate over genes
    #seg_offset = 0
    #tut = sp.where(gene_names == 'ENSG00000163812.9')[0]
    #for gidx in tut:
    for gidx, iidx in enumerate(s_idx):

        if CFG['verbose']:
            log_progress(gidx, numgenes, 100)
        ### get idx of non alternative segments
        if CFG['is_matlab']:
            #non_alt_idx = get_non_alt_seg_ids_matlab(genes[gidx])
            #seg_idx = sp.arange(seg_offset, seg_offset + genes[gidx].segmentgraph[0, 2].shape[0])
            seg_idx = sp.arange(iidx,
                                iidx + genes[gidx].segmentgraph[0, 2].shape[0])
            if len(seg_idx) == 0:
                continue
        else:
            #non_alt_idx = genes[gidx].get_non_alt_seg_ids()
            #seg_idx = sp.arange(seg_offset, seg_offset + genes[gidx].segmentgraph.seg_edges.shape[0])
            seg_idx = sp.arange(
                iidx, iidx + genes[gidx].segmentgraph.seg_edges.shape[0])

        gene_idx = gene_ids_segs[seg_idx]
        if len(gene_idx.shape) > 0:
            gene_idx = gene_idx[0]

        if CFG['is_matlab']:
            assert (IN['gene_names'][gene_idx] == genes[gidx].name)
        else:
            assert (IN['gene_names'][:][gene_idx] == genes[gidx].name)
        assert (genes[gidx].name == gene_names[gidx])
        #seg_idx = seg_idx[non_alt_idx]

        ### compute gene expression as the read count over all non alternative segments
        if CFG['is_matlab']:
            #gene_counts[gidx, :] = sp.dot(IN['segments'][:, seg_idx], IN['seg_len'][seg_idx, 0]) / sp.sum(IN['seg_len'][seg_idx, 0])
            gene_counts[gidx, :] = sp.dot(
                IN['segments'][:, seg_idx][strain_idx],
                seg_lens[seg_idx]) / CFG['read_length']
            #seg_offset += genes[gidx].segmentgraph[0, 2].shape[0]
        else:
            #gene_counts[gidx, :] = sp.dot(IN['segments'][seg_idx, :].T, IN['seg_len'][:][seg_idx]) / sp.sum(IN['seg_len'][:][seg_idx])
            if seg_idx.shape[0] > 1:
                gene_counts[gidx, :] = sp.dot(
                    IN['segments'][seg_idx, :][:, strain_idx].T,
                    seg_lens[seg_idx, 0]) / CFG['read_length']
            else:
                gene_counts[gidx, :] = IN['segments'][
                    seg_idx, :][strain_idx] * seg_lens[seg_idx,
                                                       0] / CFG['read_length']
            #seg_offset += genes[gidx].segmentgraph.seg_edges.shape[0]

    IN.close()

    if CFG['verbose']:
        sys.stdout.write('\n... done.\n')

    ### write results to hdf5
    if fn_out is not None:
        OUT = h5py.File(fn_out, 'w')
        OUT.create_dataset(name='strains', data=strains[strain_idx])
        OUT.create_dataset(name='genes', data=gene_names)
        OUT.create_dataset(name='raw_count',
                           data=gene_counts,
                           compression="gzip")
        OUT.close()

    return (gene_counts, strains, gene_names)
### event id
fin_data.append(full_data[:, 0])
### event pos
fin_data.append(sp.array([x[1] + '-' + ':'.join(x[2:8])  for x in full_data]))
### strand
fin_data.append(full_data[:, 11])
### ensemble id
fin_data.append(full_data[:, 8])
### gene name
fin_data.append(full_data[:, 9])
### max dPSI
fin_data.append(full_data[:, 10])
### coding status
fin_data.append(full_data[:, 12])
### overlapping SNVs
snvs = []
for i in range(full_data.shape[0]):
    tmp = []
    for j in sp.where(snv_data[:, 1] == full_data[i, 0])[0]:
        tmp.append(snv_data[j, 0])
    if len(tmp) > 0:
        snvs.append(','.join(tmp))
    else:
        snvs.append('NA')
fin_data.append(sp.array(snvs))

### gen header
header = sp.array(['event_id', 'event_pos', 'strand', 'ensemble_id', 'gene_name', 'max_dpsi', 'coding_status', 'overlap_snv'])
fin_data = sp.r_[header[sp.newaxis, :], sp.array(fin_data).T]
sp.savetxt(os.path.join(basedir, 'supplemental_table_exonization_candidates.tsv'), fin_data, fmt='%s', delimiter='\t') 
Example #51
0
def adjust_dispersion(counts, dmatrix1, disp_raw, disp_fitted, idx, sf, CFG):

    if CFG['verbose']:
        print 'Start to estimate adjusted dispersions.'

    varLogDispSamp = polygamma(
        1, (dmatrix1.shape[0] - dmatrix1.shape[1]) /
        2)  ## number of samples - number of coefficients
    varPrior = calculate_varPrior(disp_raw, disp_fitted, idx, varLogDispSamp)

    if CFG['parallel'] > 1:
        disp_adj = sp.empty((counts.shape[0], 1))
        disp_adj.fill(sp.nan)
        disp_adj_conv = sp.zeros_like(disp_adj, dtype='bool')

        pool = mp.Pool(processes=CFG['parallel'],
                       initializer=lambda: sig.signal(sig.SIGINT, sig.SIG_IGN))
        binsize = 30
        idx_chunks = [
            sp.arange(x, min(x + binsize, counts.shape[0]))
            for x in range(0, counts.shape[0], binsize)
        ]

        try:
            result = [
                pool.apply_async(adjust_dispersion_chunk,
                                 args=(
                                     counts[cidx, :],
                                     dmatrix1,
                                     disp_raw[cidx],
                                     disp_fitted[cidx],
                                     varPrior,
                                     sf,
                                     CFG,
                                     cidx,
                                 )) for cidx in idx_chunks
            ]
            res_cnt = 0
            while result:
                tmp = result.pop(0).get()
                for i, j in enumerate(tmp[2]):
                    if CFG['verbose']:
                        log_progress(res_cnt, counts.shape[0])
                        res_cnt += 1
                    disp_adj[j] = tmp[0][i]
                    disp_adj_conv[j] = tmp[1][i]
            if CFG['verbose']:
                log_progress(counts.shape[0], counts.shape[0])
                print ''
            pool.terminate()
            pool.join()
        except KeyboardInterrupt:
            print >> sys.stderr, 'Keyboard Interrupt - exiting'
            pool.terminate()
            pool.join()
            sys.exit(1)
    else:
        (disp_adj, disp_adj_conv,
         _) = adjust_dispersion_chunk(counts,
                                      dmatrix1,
                                      disp_raw,
                                      disp_fitted,
                                      varPrior,
                                      sf,
                                      CFG,
                                      sp.arange(counts.shape[0]),
                                      log=CFG['verbose'])

    if CFG['debug']:
        fig = plt.figure(figsize=(8, 6), dpi=100)
        ax = fig.add_subplot(111)
        idx = sp.where(~sp.isnan(disp_adj))[0]
        ax.plot(
            sp.mean(sp.log10(counts + 1), axis=1)[idx], disp_adj[idx], 'bo')
        ax.set_title('Adjusted Dispersion Estimate')
        ax.set_xlabel('Mean expression count')
        ax.set_ylabel('Dispersion')
        plt.savefig('dispersion_adjusted.pdf',
                    format='pdf',
                    bbox_inches='tight')
        plt.close(fig)

    return (disp_adj, disp_adj_conv)
Example #52
0
def doskysub(straight, ylen, xlen, sci, yback, sky2x, sky2y, ccd2wave, disp,
             mswave, offsets, cutoff, airmass):
    sci = sci.copy()

    # If cutoff is not a float, we are using the blueside
    locutoff = cutoff
    hicutoff = 10400.

    nsci = sci.shape[0]
    width = sci.shape[2]

    # Perform telluric correction
    coords = spectools.array_coords(sci[0].shape)
    x = coords[1].flatten()
    y = coords[0].flatten()

    for k in range(nsci):
        w = genfunc(x, y, ccd2wave[k])
        telluric = correct_telluric.correct(w, airmass[k], disp)
        sci[k] *= telluric.reshape(sci[k].shape)
    del coords, x, y, telluric

    # Create arrays for output images
    outcoords = spectools.array_coords((ylen, xlen))
    outcoords[1] *= disp
    outcoords[1] += mswave - disp * xlen / 2.
    xout = outcoords[1].flatten()
    yout = outcoords[0].flatten()

    out = scipy.zeros((nsci, ylen, xlen))

    fudge = scipy.ceil(abs(offsets).max())
    bgimage = scipy.zeros((nsci, ylen + fudge, xlen))
    varimage = bgimage.copy()

    bgcoords = spectools.array_coords((ylen + fudge, xlen))
    bgcoords[1] *= disp
    bgcoords[1] += mswave - disp * xlen / 2.

    #
    # Cosmic Ray Rejection and Background Subtraction
    #
    yfit = yback.flatten()
    ycond = (yfit > straight - 0.4) & (yfit < straight + ylen - 0.6)

    coords = spectools.array_coords(yback.shape)
    xvals = coords[1].flatten()
    yvals = coords[0].flatten()

    ap_y = scipy.zeros(0)
    aper = scipy.zeros(0)
    for k in range(nsci):
        xfit = genfunc(xvals, yfit - straight, ccd2wave[k])
        zfit = sci[k].flatten()

        x = xfit[ycond]
        y = yfit[ycond]
        z = zfit[ycond]

        # The plus/minus 20 provides a better solution for the edges
        wavecond = (x > locutoff - 20.) & (x < hicutoff + 20.)
        x = x[wavecond]
        y = y[wavecond]
        z = z[wavecond]

        # If only resampling...
        if RESAMPLE == 1:
            coords = outcoords.copy()
            samp_x = genfunc(xout, yout, sky2x[k])
            samp_y = genfunc(xout, yout, sky2y[k])
            coords[0] = samp_y.reshape(coords[0].shape)
            coords[1] = samp_x.reshape(coords[1].shape)
            out[k] = scipy.ndimage.map_coordinates(sci[k],
                                                   coords,
                                                   output=scipy.float64,
                                                   order=5,
                                                   cval=-32768,
                                                   prefilter=False)
            out[k][xout.reshape(coords[1].shape) < locutoff] = scipy.nan
            out[k][xout.reshape(coords[1].shape) > hicutoff] = scipy.nan
            out[k][out[k] == -32768] = scipy.nan
            continue

        bgfit = skysub.skysub(x, y, z, disp)

        background = zfit.copy()
        for indx in range(background.size):
            x0 = xfit[indx]
            y0 = yfit[indx]
            if x0 < locutoff - 10 or x0 > hicutoff + 10:
                background[indx] = scipy.nan
            else:
                background[indx] = interpolate.bisplev(x0, y0, bgfit)
        sub = zfit - background
        sub[scipy.isnan(sub)] = 0.
        sky = sub * 0.
        sky[ycond] = sub[ycond]
        sky = sky.reshape(sci[k].shape)
        sub = sky.copy()

        background[scipy.isnan(background)] = 0.

        # Note that 2d filtering may flag very sharp source traces!
        sub = sub.reshape(sci[k].shape)
        sky = ndimage.median_filter(sky, 5)
        diff = sub - sky
        model = scipy.sqrt(background.reshape(sci[k].shape) + sky)
        crmask = scipy.where(diff > 4. * model, diff, 0.)
        sub -= crmask
        sci[k] -= crmask

        # Create straightened slit
        coords = outcoords.copy()
        samp_x = genfunc(xout, yout, sky2x[k])
        samp_y = genfunc(xout, yout, sky2y[k])
        coords[0] = samp_y.reshape(coords[0].shape)
        coords[1] = samp_x.reshape(coords[1].shape)
        out[k] = scipy.ndimage.map_coordinates(sci[k],
                                               coords,
                                               output=scipy.float64,
                                               order=5,
                                               cval=magicnum,
                                               prefilter=False)
        out[k][xout.reshape(coords[1].shape) < locutoff] = scipy.nan
        out[k][xout.reshape(coords[1].shape) > hicutoff] = scipy.nan
        out[k][out[k] == magicnum] = scipy.nan

        # Output bgsub image
        coords = bgcoords.copy()
        bgy = bgcoords[0].flatten() + offsets[k]
        bgx = bgcoords[1].flatten()
        samp_x = genfunc(bgx, bgy, sky2x[k])
        samp_y = genfunc(bgx, bgy, sky2y[k])
        coords[0] = samp_y.reshape(coords[0].shape)
        coords[1] = samp_x.reshape(coords[1].shape)

        varimage[k] = scipy.ndimage.map_coordinates(sci[k],
                                                    coords,
                                                    output=scipy.float64,
                                                    order=5,
                                                    cval=magicnum,
                                                    prefilter=False)

        # Only include good data (ie positive variance, wavelength
        #   greater than dichroic cutoff)
        cond = (bgcoords[0] + offsets[k] < 0.) | (bgcoords[0] + offsets[k] >
                                                  ylen)
        cond = (varimage[k] <= 0) | cond
        cond = (bgcoords[1] < locutoff) | (bgcoords[1] > hicutoff) | cond
        varimage[k][cond] = scipy.nan

        bgimage[k] = scipy.ndimage.map_coordinates(sub,
                                                   coords,
                                                   output=scipy.float64,
                                                   order=5,
                                                   cval=magicnum,
                                                   prefilter=False)
        bgimage[k][cond] = scipy.nan
        bgimage[k][bgimage[k] == magicnum] = scipy.nan  # Shouldn't be
        #   necessary...

    if RESAMPLE == 1:
        return out, bgimage, varimage

    bgimage = fastmed(bgimage)
    varimage = fastmed(varimage) / nsci

    return out, bgimage, varimage
Example #53
0
    def __init__(self,
                 inRaster,
                 inVector,
                 inField='Class',
                 outModel=None,
                 inSplit=1,
                 inSeed=0,
                 outMatrix=None,
                 inClassifier='GMM'):

        learningProgress = progressBar('Learning model...', 6)

        # Convert vector to raster
        try:
            try:
                temp_folder = tempfile.mkdtemp()
                filename = os.path.join(temp_folder, 'temp.tif')

                data = gdal.Open(inRaster, gdal.GA_ReadOnly)
                shp = ogr.Open(inVector)

                lyr = shp.GetLayer()
            except:
                QgsMessageLog.logMessage(
                    "Problem with making tempfile or opening raster or vector")

            # Create temporary data set
            try:
                driver = gdal.GetDriverByName('GTiff')
                dst_ds = driver.Create(filename, data.RasterXSize,
                                       data.RasterYSize, 1, gdal.GDT_Byte)
                dst_ds.SetGeoTransform(data.GetGeoTransform())
                dst_ds.SetProjection(data.GetProjection())
                OPTIONS = 'ATTRIBUTE=' + inField
                gdal.RasterizeLayer(dst_ds, [1], lyr, None, options=[OPTIONS])
                data, dst_ds, shp, lyr = None, None, None, None
            except:
                QgsMessageLog.logMessage("Cannot create temporary data set")

            # Load Training set
            try:
                X, Y = dataraster.get_samples_from_roi(inRaster, filename)
            except:
                QgsMessageLog.logMessage(
                    "Problem while getting samples from ROI with" + inRaster)
                QgsMessageLog.logMessage(
                    "Are you sure to have only integer values in your " +
                    str(inField) + " column ?")

            [n, d] = X.shape
            C = int(Y.max())
            SPLIT = inSplit
            os.remove(filename)
            os.rmdir(temp_folder)

            # Scale the data
            X, M, m = self.scale(X)

            learningProgress.addStep()  # Add Step to ProgressBar

            # Learning process take split of groundthruth pixels for training and the remaining for testing

            try:
                if SPLIT < 1:

                    # Random selection of the sample
                    x = sp.array([]).reshape(0, d)
                    y = sp.array([]).reshape(0, 1)
                    xt = sp.array([]).reshape(0, d)
                    yt = sp.array([]).reshape(0, 1)

                    sp.random.seed(inSeed)  # Set the random generator state
                    for i in range(C):
                        t = sp.where((i + 1) == Y)[0]
                        nc = t.size
                        ns = int(nc * SPLIT)
                        rp = sp.random.permutation(nc)
                        x = sp.concatenate((X[t[rp[0:ns]], :], x))
                        xt = sp.concatenate((X[t[rp[ns:]], :], xt))
                        y = sp.concatenate((Y[t[rp[0:ns]]], y))
                        yt = sp.concatenate((Y[t[rp[ns:]]], yt))

                else:
                    x, y = X, Y
            except:
                QgsMessageLog.logMessage("Problem while learning if SPLIT <1")

            learningProgress.addStep()  # Add Step to ProgressBar
            # Train Classifier
            if inClassifier == 'GMM':
                try:
                    # tau=10.0**sp.arange(-8,8,0.5)
                    model = gmmr.GMMR()
                    model.learn(x, y)
                    # htau,err = model.cross_validation(x,y,tau)
                    # model.tau = htau
                except:
                    QgsMessageLog.logMessage("Cannot train with GMMM")
            else:
                try:
                    from sklearn import neighbors
                    from sklearn.svm import SVC
                    from sklearn.ensemble import RandomForestClassifier

                    try:
                        model_selection = True
                        from sklearn.model_selection import StratifiedKFold
                        from sklearn.model_selection import GridSearchCV

                    except:
                        model_selection = False
                        from sklearn.cross_validation import StratifiedKFold
                        from sklearn.grid_search import GridSearchCV

                    try:

                        # AS Qgis in Windows doensn't manage multiprocessing, force to use 1 thread for not linux system
                        if os.name == 'posix':
                            n_jobs = -1
                        else:
                            n_jobs = 1

                        #
                        if inClassifier == 'RF':
                            param_grid_rf = dict(n_estimators=3**sp.arange(
                                1, 5),
                                                 max_features=sp.arange(1, 4))
                            y.shape = (y.size, )
                            if model_selection:
                                cv = StratifiedKFold(n_splits=3).split(x, y)
                                #cv = cv.get_n_splits(y)
                            else:
                                cv = StratifiedKFold(y, n_folds=3)

                            grid = GridSearchCV(RandomForestClassifier(),
                                                param_grid=param_grid_rf,
                                                cv=cv,
                                                n_jobs=n_jobs)
                            grid.fit(x, y)
                            model = grid.best_estimator_
                            model.fit(x, y)
                        elif inClassifier == 'SVM':
                            param_grid_svm = dict(gamma=2.0**sp.arange(-4, 4),
                                                  C=10.0**sp.arange(-2, 5))
                            y.shape = (y.size, )
                            if model_selection:
                                cv = StratifiedKFold(n_splits=5).split(x, y)
                            else:
                                cv = StratifiedKFold(y, n_folds=5)
                            grid = GridSearchCV(SVC(),
                                                param_grid=param_grid_svm,
                                                cv=cv,
                                                n_jobs=n_jobs)
                            grid.fit(x, y)
                            model = grid.best_estimator_
                            model.fit(x, y)
                        elif inClassifier == 'KNN':
                            param_grid_knn = dict(
                                n_neighbors=sp.arange(1, 20, 4))
                            y.shape = (y.size, )
                            if model_selection:
                                cv = StratifiedKFold(n_splits=3).split(x, y)
                            else:
                                cv = StratifiedKFold(y, n_folds=3)
                            grid = GridSearchCV(
                                neighbors.KNeighborsClassifier(),
                                param_grid=param_grid_knn,
                                cv=cv,
                                n_jobs=n_jobs)
                            grid.fit(x, y)
                            model = grid.best_estimator_
                            model.fit(x, y)
                    except:
                        QgsMessageLog.logMessage(
                            "Cannot train with classifier " + inClassifier)

                except:
                    QgsMessageLog.logMessage(
                        "You must have sklearn dependencies on your computer. Please consult the documentation for installation."
                    )

            learningProgress.prgBar.setValue(5)  # Add Step to ProgressBar
            # Assess the quality of the model
            if SPLIT < 1:
                # if  inClassifier == 'GMM':
                #          = model.predict(xt)[0]
                # else:
                yp = model.predict(xt)
                CONF = ai.CONFUSION_MATRIX()
                CONF.compute_confusion_matrix(yp, yt)
                sp.savetxt(outMatrix,
                           CONF.confusion_matrix,
                           delimiter=',',
                           fmt='%1.4d')

            # Save Tree model
            if outModel is not None:
                output = open(outModel, 'wb')
                pickle.dump([model, M, m], output)
                output.close()

            learningProgress.addStep()  # Add Step to ProgressBar

            # Close progressBar
            learningProgress.reset()
            learningProgress = None
        except:
            learningProgress.reset()
Example #54
0
def skysub(x, y, z, scale):
    """
    skysub(x,y,z,scale)

    Routine to determine the 2d background from data. (x,y) are the
      coordinates of the data, usually in the *corrected* frame.

    Inputs:
      x     - 1d array describing x-coordinate, usually wavelength
      y     - 1d array describing y-coordinate, usually corrected spatial
            position
      z     - data each position (x,y)
      scale - approximate output scale (for knot placement). It is not, in
            general, possible to calculate this from x because the
            input coordinates are not on a regular grid.

    Outputs:
      2d spline model of the background
    """
    cond = (scipy.isfinite(z)) & (z > 0.)
    x = x[cond]
    y = y[cond]
    z = z[cond]

    x0 = x.copy()
    y0 = y.copy()
    z0 = z.copy()

    height = int(y.max() - y.min())
    width = int(x.max() - x.min())
    npoints = x.size

    midpt = y.mean()
    """
    Very wide slits need special attention. Here we fit a first order
      correction to the slit and subtract it away before doing the high
      pixel rejection (the problem is if there is a small gradient across
      a wide slit, the top and bottom pixels may differ significantly,
      but these pixels may be close in *wavelength* and so locally (on
      the CCD) low pixels will be rejected in the smoothing
    """
    if height > WIDE:
        zbak = z.copy()
        args = y.argsort()
        revargs = args.argsort()
        ymodel = ndimage.percentile_filter(z[args], 30., size=height)[revargs]
        fit = special_functions.lsqfit(ymodel, 'polynomial', 1)

        if fit['coeff'][1] * float(ymodel.size) / fit['coeff'][0] < 0.05:
            pass
        else:
            ymodel = special_functions.genfunc(scipy.arange(ymodel.size), 0,
                                               fit)
            ymodel -= ymodel.mean()
            z -= ymodel

    # Filter locally (in wavelength space) high points
    args = x.argsort()
    revargs = args.argsort()

    smooth = ndimage.percentile_filter(z[args], 35., size=height)[revargs]

    diff = z - smooth
    # We assume poisson statistics....
    var = scipy.sqrt(scipy.fabs(z))
    sigma = diff / var

    args = y.argsort()
    revargs = args.argsort()

    t = ndimage.median_filter(sigma[args], 9)
    t = ndimage.gaussian_filter(t, width)  #[revargs]
    # Source detection/rejection
    # Reject yvalues > 1. sigma, and weight remaining pixels
    w = (1.0 - t) / abs(z[args])

    if AGGRESSIVE:
        g = scipy.where(w <= 0, 0, 1)
        g = ndimage.maximum_filter(g, width * 3)
        g = ndimage.minimum_filter(g, width * 7)

        s = sigma[args].copy()
        b = ndimage.minimum_filter(g, width * 5)
        xi = scipy.arange(t.size)
        fitdata = scipy.empty((xi[g == 1].size, 2))
        fitdata[:, 0] = xi[g == 1].copy()
        fitdata[:, 1] = t[g == 1].copy()
        fit = special_functions.lsqfit(fitdata, 'polynomial', 3)
        fit = special_functions.genfunc(xi, 0., fit)

        diff = (t - fit)[b == 1]
        s = diff.std()
        while (abs(t - fit)[(g == 1) & (b == 0)] > 2.5 * s).any():
            g = b.copy()
            b = ndimage.minimum_filter(g, width * 5)
            fitdata = scipy.empty((xi[g == 1].size, 2))
            fitdata[:, 0] = xi[g == 1].copy()
            fitdata[:, 1] = t[g == 1].copy()
            fit = special_functions.lsqfit(fitdata, 'polynomial', 3)
            fit = special_functions.genfunc(xi, 0., fit)

            diff = (t - fit)[b == 1]
            s = diff.std()

        w *= g

    skycond = ((w > 0.) & (z > 0))
    x = x[skycond]
    y = y[skycond]
    z = z[skycond]

    # Reject residual high pixels (and very low pixels too!)
    args = x.argsort()
    revargs = args.argsort()
    smooth = ndimage.median_filter(z[args], height / 4.)[revargs]

    diff = z - smooth
    var = scipy.sqrt(smooth)

    cond = abs(diff) < 4. * var
    x = x[cond]
    y = y[cond]
    z = z[cond]

    kx = 3
    ky = 1

    # If the slit is long, return to original data and increase the order
    #   of the y-fit.
    if height > WIDE:
        z = zbak[skycond]
        z = z[cond].astype(scipy.float64)

        #    if height>WIDE*1.5:
        #        ky = 3

        cond = z > 0.
        x = x[cond]
        y = y[cond]
        z = z[cond]

    w = 1. / z

    if x.size < 5. * width:
        kx = 1
        ky = 1

    # Create knots...

    innertx = scipy.arange(x.min() + scale / 2.,
                           x.max() - scale / 2., 3. * scale / 4.)
    """
    tx = scipy.zeros(innertx.size+kx*2+2)
    tx[0:kx+1] = x.min()
    tx[kx+1:innertx.size+kx+1] = innertx.copy()
    tx[innertx.size+kx+1:] = x.max()
    """
    tx = scipy.linspace(x.min(), x.max(), innertx.size)
    xsort = scipy.sort(x)
    tmp = [x.min()]
    num = []
    cnt = 0
    j = 1
    for i in range(xsort.size):
        while xsort[i] > tx[j]:
            if cnt > 0:
                if len(num) == 0 or cnt > 1 or num[-1] > 1:
                    tmp.append(tx[j])
                    num.append(cnt)
                    cnt = 0
            j += 1
        cnt += 1
    tmp.append(x.max())
    tx = scipy.asarray(tmp)
    ty = scipy.zeros(ky * 2 + 2)
    ty[0:ky + 1] = y.min()
    ty[ky + 1:] = y.max()

    #del innertx
    # ...and fit.
    bgfit = interpolate.bisplrep(x,
                                 y,
                                 z,
                                 w,
                                 tx=tx,
                                 ty=ty,
                                 kx=kx,
                                 ky=ky,
                                 task=-1,
                                 nxest=tx.size,
                                 nyest=ty.size)
    del x, y, z, w, tx, ty
    return bgfit
def biased_pagerank(G,
                    num,
                    alpha=0.05,
                    personalization=None,
                    max_iter=100,
                    tol=1.0e-6,
                    weight='weight',
                    dangling=None):
    '''
    Parameters
    ----------
    G : graph
      A NetworkX graph.  Undirected graphs will be converted to a directed
      graph with two directed edges for each undirected edge.

    num : integer
      Label number of a seed-node.

    alpha : float, optional
      Damping parameter for biased PageRank, default=0.05.

    personalization: dict, optional
      The "personalization vector" consisting of a dictionary with a
      key for every graph node and nonzero personalization value for each node.
      By default, a uniform distribution is used.

    max_iter : integer, optional
      Maximum number of iterations in power method eigenvalue solver.

    tol : float, optional
      Error tolerance used to check convergence in power method solver.

    weight : key, optional
      Edge data key to use as weight.  If None weights are set to 1.

    dangling: dict, optional
      The outedges to be assigned to any "dangling" nodes, i.e., nodes without
      any outedges. The dict key is the node the outedge points to and the dict
      value is the weight of that outedge. By default, dangling nodes are given
      outedges according to the personalization vector (uniform if not
      specified). This must be selected to result in an irreducible transition
      matrix (see notes under google_matrix). It may be common to have the
      dangling dict to be the same as the personalization dict.

    Returns
    -------
    pagerank : dictionary
       Dictionary of nodes with PageRank as value.
    '''

    # Number of nodes
    N = len(G)
    if N == 0:
        return {}

    nodelist = G.nodes()

    # Adjacency matrix
    M = nx.to_scipy_sparse_matrix(G,
                                  nodelist=nodelist,
                                  weight=weight,
                                  dtype=float)

    S = scipy.array(M.sum(axis=1)).flatten()

    S[S != 0] = 1.0 / S[S != 0]
    Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')

    M = Q * M

    # Initialize vector
    x = scipy.repeat(1.0 / N, N)

    # Personalization vector
    if personalization is None:
        p = scipy.repeat(1.0 / N, N)
    else:
        missing = set(nodelist) - set(personalization)
        if missing:
            raise NetworkXError('Personalization vector dictionary '
                                'must have a value for every node. '
                                'Missing nodes %s' % missing)
        p = scipy.array([personalization[n] for n in nodelist], dtype=float)
        p = p / p.sum()

    # Dangling nodes
    if dangling is None:
        dangling_weights = p
    else:
        missing = set(nodelist) - set(dangling)
        if missing:
            raise NetworkXError('Dangling node dictionary '
                                'must have a value for every node. '
                                'Missing nodes %s' % missing)
        # Convert the dangling dictionary into an array in nodelist order
        dangling_weights = scipy.array([dangling[n] for n in nodelist],
                                       dtype=float)
        dangling_weights /= dangling_weights.sum()
    is_dangling = scipy.where(S == 0)[0]

    # power iteration: make up to max_iter iterations
    for _ in range(max_iter):
        xlast = x
        x = alpha * (x * M + sum(x[is_dangling]) * dangling_weights)

        x[num] += np.float64((1 - alpha) * p[num])

        # check convergence, l1 norm
        err = scipy.absolute(x - xlast).sum()
        if err < N * tol:
            print 'roop:', _ + 1
            return dict(zip(nodelist, map(float, x)))
    raise NetworkXError('pagerank_scipy: power iteration failed to converge '
                        'in %d iterations.' % max_iter)
Example #56
0
    def predict_image(self,
                      inRaster,
                      outRaster,
                      model,
                      inMask=None,
                      confidenceMap=None,
                      NODATA=-10000,
                      SCALE=None,
                      classifier='GMM'):
        """!@brief The function classify the whole raster image, using per block image analysis.
        
        The classifier is given in classifier and options in kwargs
        
            Input :
                inRaster : Filtered image name ('sample_filtered.tif',str)
                outRaster :Raster image name ('outputraster.tif',str)
                model : model file got from precedent step ('model', str)
                inMask : mask to 
                confidenceMap :  map of confidence per pixel
                NODATA : Default set to -10000 (int)
                SCALE : Default set to None
                classifier = Default 'GMM'
                
            Output :
                nothing but save a raster image and a confidence map if asked
        """
        # Open Raster and get additionnal information

        raster = gdal.Open(inRaster, gdal.GA_ReadOnly)
        if raster is None:
            print 'Impossible to open ' + inRaster
            exit()

        if inMask is None:
            mask = None
        else:
            mask = gdal.Open(inMask, gdal.GA_ReadOnly)
            if mask is None:
                print 'Impossible to open ' + inMask
                exit()
            # Check size
            if (raster.RasterXSize != mask.RasterXSize) or (
                    raster.RasterYSize != mask.RasterYSize):
                print 'Image and mask should be of the same size'
                exit()
        if SCALE is not None:
            M, m = sp.asarray(SCALE[0]), sp.asarray(SCALE[1])

        # Get the size of the image
        d = raster.RasterCount
        nc = raster.RasterXSize
        nl = raster.RasterYSize

        # Get the geoinformation
        GeoTransform = raster.GetGeoTransform()
        Projection = raster.GetProjection()

        # Get block size
        band = raster.GetRasterBand(1)
        block_sizes = band.GetBlockSize()
        x_block_size = block_sizes[0]
        y_block_size = block_sizes[1]
        del band

        ## Initialize the output
        driver = gdal.GetDriverByName('GTiff')
        dst_ds = driver.Create(outRaster, nc, nl, 1, gdal.GDT_Byte)
        dst_ds.SetGeoTransform(GeoTransform)
        dst_ds.SetProjection(Projection)
        out = dst_ds.GetRasterBand(1)

        if confidenceMap:
            dst_confidenceMap = driver.Create(confidenceMap, nc, nl, 1,
                                              gdal.GDT_Float32)
            dst_confidenceMap.SetGeoTransform(GeoTransform)
            dst_confidenceMap.SetProjection(Projection)
            out_confidenceMap = dst_confidenceMap.GetRasterBand(1)

        ## Perform the classification
        predictProgress = progressBar('Classifying image...',
                                      nl * y_block_size)

        for i in range(0, nl, y_block_size):
            predictProgress.addStep()
            if i + y_block_size < nl:  # Check for size consistency in Y
                lines = y_block_size
            else:
                lines = nl - i
            for j in range(0, nc,
                           x_block_size):  # Check for size consistency in X
                if j + x_block_size < nc:
                    cols = x_block_size
                else:
                    cols = nc - j

                # Load the data and Do the prediction
                X = sp.empty((cols * lines, d))
                for ind in xrange(d):
                    X[:, ind] = raster.GetRasterBand(int(ind + 1)).ReadAsArray(
                        j, i, cols, lines).reshape(cols * lines)

                # Do the prediction
                if mask is None:
                    mask_temp = raster.GetRasterBand(1).ReadAsArray(
                        j, i, cols, lines).reshape(cols * lines)
                    t = sp.where((mask_temp != 0) & (X[:, 0] != NODATA))[0]
                    yp = sp.zeros((cols * lines, ))
                    K = sp.zeros((cols * lines, ))

                else:
                    mask_temp = mask.GetRasterBand(1).ReadAsArray(
                        j, i, cols, lines).reshape(cols * lines)
                    t = sp.where((mask_temp != 0) & (X[:, 0] != NODATA))[0]
                    yp = sp.zeros((cols * lines, ))
                    K = sp.zeros((cols * lines, ))

                # TODO: Change this part accorindgly ...
                if t.size > 0:
                    if confidenceMap and classifier == 'GMM':
                        yp[t], K[t] = model.predict(
                            self.scale(X[t, :], M=M, m=m), None, confidenceMap)

                    elif confidenceMap:
                        yp[t] = model.predict(self.scale(X[t, :], M=M, m=m))
                        K[t] = sp.amax(model.predict_proba(
                            self.scale(X[t, :], M=M, m=m)),
                                       axis=1)

                    else:
                        yp[t] = model.predict(self.scale(X[t, :], M=M, m=m))

                        #QgsMessageLog.logMessage('amax from predict proba is : '+str(sp.amax(model.predict.proba(self.scale(X[t,:],M=M,m=m)),axis=1)))

                # Write the data
                out.WriteArray(yp.reshape(lines, cols), j, i)
                out.FlushCache()

                if confidenceMap:
                    out_confidenceMap.WriteArray(K.reshape(lines, cols), j, i)
                    out_confidenceMap.FlushCache()

                del X, yp

        # Clean/Close variables
        predictProgress.reset()
        raster = None
        dst_ds = None
        return outRaster
Example #57
0
    def __call__(self, Xi, Xj, ni, nj, hyper_deriv=None, symmetric=False):
        """Evaluate the covariance between points `Xi` and `Xj` with derivative order `ni`, `nj`.
        
        Parameters
        ----------
        Xi : :py:class:`Matrix` or other Array-like, (`M`, `D`)
            `M` inputs with dimension `D`.
        Xj : :py:class:`Matrix` or other Array-like, (`M`, `D`)
            `M` inputs with dimension `D`.
        ni : :py:class:`Matrix` or other Array-like, (`M`, `D`)
            `M` derivative orders for set `i`.
        nj : :py:class:`Matrix` or other Array-like, (`M`, `D`)
            `M` derivative orders for set `j`.
        hyper_deriv : Non-negative int or None, optional
            The index of the hyperparameter to compute the first derivative
            with respect to. If None, no derivatives are taken. Hyperparameter
            derivatives are not supported at this point. Default is None.
        symmetric : bool, optional
            Whether or not the input `Xi`, `Xj` are from a symmetric matrix.
            Default is False.
        
        Returns
        -------
        Kij : :py:class:`Array`, (`M`,)
            Covariances for each of the `M` `Xi`, `Xj` pairs.
        
        Raises
        ------
        NotImplementedError
            If the `hyper_deriv` keyword is not None.
        """
        if hyper_deriv is not None:
            raise NotImplementedError(
                "Hyperparameter derivatives have not been implemented!")
        n_cat = scipy.asarray(scipy.concatenate((ni, nj), axis=1), dtype=int)
        X_cat = scipy.asarray(scipy.concatenate((Xi, Xj), axis=1), dtype=float)
        n_cat_unique = unique_rows(n_cat)
        k = scipy.zeros(Xi.shape[0], dtype=float)
        # Loop over unique derivative patterns:
        if self.num_proc > 1:
            pool = multiprocessing.Pool(processes=self.num_proc)
        for n_cat_state in n_cat_unique:
            idxs = scipy.where(
                scipy.asarray((n_cat == n_cat_state).all(axis=1)).squeeze())[0]
            if (n_cat_state == 0).all():
                k[idxs] = self.cov_func(Xi[idxs, :], Xj[idxs, :], *self.params)
            else:
                if self.num_proc > 1 and len(idxs) > 1:
                    k[idxs] = scipy.asarray(pool.map(
                        _ArbitraryKernelEval(self, n_cat_state),
                        X_cat[idxs, :]),
                                            dtype=float)
                else:
                    for idx in idxs:
                        k[idx] = mpmath.chop(
                            mpmath.diff(self._mask_cov_func,
                                        X_cat[idx, :],
                                        n=n_cat_state,
                                        singular=True))

        if self.num_proc > 0:
            pool.close()
        return k
Example #58
0
def TVDI_function(inNDVI,inLST,pas=0.02,t=1,s1Min=0.3,s2Max=0.8,ss1Min=0.2,ss2Max=0.8):
    """
    Allows to calculates the TVDI.

    this function is a modified version of the IDL script published by Monica Garcia:
    (Garcia,M., Fernández, N., Villagarcía, L., Domingo, F.,  Puigdefábregas,J. & I. Sandholt. 2014. 2014. 
    Accuracy of the Temperature–Vegetation Dryness Index using MODIS under water-limited vs. 
    energy-limited evapotranspiration conditions  Remote Sensing of Environment 149, 100-117.) 

    Input:
        inNDVI: NDVI 
        inLST: land surface temperature
        pas: intervall of the NDVI


        S1min: lower threshold to determine the interval which will be used to determine the design parameters of LSTmax
        S2max: upper threshold to determine the interval which will be used to determine the design parameters of LSTmax
        ss1Min: lower threshold to determine the interval which will be used to determine the calculation of parmaètres LSTmin
        ss2Max: upper threshold to determine the interval which will be used to determine the design parameters of LSTmin

        
        t : t=0 to use Garcia M method  and t=1 to calculate the TVDI without using the threshold .
    Output: 
    
        TVDI
    """
    TVDI=sp.zeros(inLST.shape)
    if inNDVI.shape == inLST.shape :
        
        inNdvi=sp.reshape(inNDVI,(inNDVI.size))  
        inLst=sp.reshape(inLST,(inLST.size))  
        
        mini=sp.nanmin(inNdvi) # valeur minimale
        maxi=sp.nanmax(inNdvi) # valeur maximale
        
        
        arg=sp.argsort(inNdvi) #trie et renvoi les indices des valeurs ordonnées
    
        
        inV=inNdvi[arg] # on récupère les valeurs de NDVI
        inT=inLst[arg] # on récupère les valeurs de temperature
          # pas de decoupade du NDVI en intervalle
        
        
        percentileMax=99.0 
        
        percentileMin=1.0
        
        nObsMin=5 # la longeur minimale que doit avoir un intervalle pour être considéré
        
        ni= int(round((maxi-mini)/pas ) + 1) # Nombre total d'intervalle
        iValMax=0 
        iValMin=ni
        #création des vecteurs de stockage
        vx= sp.zeros((ni),dtype="float")
        vMaxi=sp.zeros((ni),dtype="float")
        vMini=sp.zeros((ni),dtype="float")
        
        vMaxi[0:]=None
        vMini[0:]=None
    
        vNpi=sp.zeros((ni),dtype="float")
        
        
        for k in range (ni):
            
            hi=k*pas + mini # valeur de depart de l'intervalle
            hs=k*pas + hi # valeur de fin de l'intervalle
            
            a=sp.where(inV <= hi)
            ii=a[0].max()
            
            b=sp.where(inV <= hs)
            iis=b[0].max()
            
            vNpi[k]=  iis - ii
            inTp=inT[ii:iis+1] #recuperation des valeurs de temperature contenues dans cet intervalle       
            vx[k]=(hs - hi )/2 +hi #recuperation de valeur de NDVI qui se trouve au milieu intervalle
            if vNpi[k] > nObsMin : #on teste si l'intervalle defini a suffisamment de valeur
    
                inTp=inTp[sp.argsort(inTp)] #on trie les valeurs de temperature contenu dans cet intervalle
                vMaxi[k]=inTp[ int( ( vNpi[k] *percentileMax/100 )) ] #on recupère la valeur de temperature qui correspond au 99em percentile de l'intervalle
                vMini[k]=inTp[ int( ( vNpi[k] *percentileMin/100 ))] #on recupère la valeur de temperature qui correspond au 99em percentile de l'intervalle
                if k >iValMax:
                    iValMax=k
                if k < iValMin:
                    iValMin=k
        
    	# calcul de LSTmax et LSTmin
        if (t==0):
            # Dry Edge     
        	   # on utilise un seuil inferieur pour trouver la fin de l'intervalle qui va servir pour le calcul de la regression linéaire
            # on utilise iValMin et iValMax pour eviter les nan c'est à dire on reste dans les intervalles qui respecte le nObsMin
            try:
                b=sp.where(vx < s1Min) # seuil inferieur à modifier
                ii=sp.nanmax([sp.nanmax(b[0]),iValMin]) 
                
                b=sp.where(vx < s2Max) # seuil superieur à modifier
                iis=sp.nanmin([sp.nanmax(b[0]),iValMax])
            
        
                # Wet Edge        
                c=sp.where(vx < ss1Min) # seuil inferieur à modifier
                ii2=sp.nanmax([sp.nanmax(c[0]),iValMin]) 
                
                c=sp.where(vx < ss2Max) # seuil superieur à modifier
                iis2=sp.nanmin([sp.nanmax(c[0]),iValMax])
            except:
                print "problème avec les valeurs inferieures et superieures utilisées"
        else:
            ii=iValMin
            iis=iValMax
            ii2=iValMin
            iis2=iValMax
            
        #calcul de la regression linéaire
        estimation1=sp.stats.linregress(vx[ii:iis+1],vMaxi[ii:iis+1])
        
        #LSTmax= a * NDVI + b
        lstmax_a=estimation1[0] #recuperation du paramètre de pente
        lstmax_b=estimation1[1] #recuperation du paramètre de l'ordonnée à l'origine
        
        estimation1=sp.stats.linregress(vx[ii2:iis2+1],vMini[ii2:iis2+1])
        #LSTmax= a * NDVI + b
        lstmin=sp.nanmin(vMini[ii2:iis2+1])
        #calcul de TVDI
        TVDI=( inLST - lstmin) / ( lstmax_b + (lstmax_a * inNDVI )- lstmin+0.00000001 )
#        TVDI=( inLST - lstmin) / ( ( lstmax_b + (lstmax_a * inNDVI ))- lstmin +0.00001 )
    else:
        print "les deux tableaux n'ont pas la même taille"
        exit
    
    return TVDI
Example #59
0
def unred(wave, ebv, R_V=3.1, LMC2=False, AVGLMC=False):
    '''
    https://github.com/sczesla/PyAstronomy
    in /src/pyasl/asl/unred
    '''

    x = 10000. / wave  # Convert to inverse microns
    curve = x * 0.

    # Set some standard values:
    x0 = 4.596
    gamma = 0.99
    c3 = 3.23
    c4 = 0.41
    c2 = -0.824 + 4.717 / R_V
    c1 = 2.030 - 3.007 * c2

    if LMC2:
        x0 = 4.626
        gamma = 1.05
        c4 = 0.42
        c3 = 1.92
        c2 = 1.31
        c1 = -2.16
    elif AVGLMC:
        x0 = 4.596
        gamma = 0.91
        c4 = 0.64
        c3 = 2.73
        c2 = 1.11
        c1 = -1.28

    # Compute UV portion of A(lambda)/E(B-V) curve using FM fitting function and
    # R-dependent coefficients
    xcutuv = np.array([10000.0 / 2700.0])
    xspluv = 10000.0 / np.array([2700.0, 2600.0])

    iuv = sp.where(x >= xcutuv)[0]
    N_UV = iuv.size
    iopir = sp.where(x < xcutuv)[0]
    Nopir = iopir.size
    if N_UV > 0:
        xuv = sp.concatenate((xspluv, x[iuv]))
    else:
        xuv = xspluv

    yuv = c1 + c2 * xuv
    yuv = yuv + c3 * xuv**2 / ((xuv**2 - x0**2)**2 + (xuv * gamma)**2)
    yuv = yuv + c4 * (0.5392 * (sp.maximum(xuv, 5.9) - 5.9)**2 + 0.05644 *
                      (sp.maximum(xuv, 5.9) - 5.9)**3)
    yuv = yuv + R_V
    yspluv = yuv[0:2]  # save spline points

    if N_UV > 0:
        curve[iuv] = yuv[2::]  # remove spline points

    # Compute optical portion of A(lambda)/E(B-V) curve
    # using cubic spline anchored in UV, optical, and IR
    xsplopir = sp.concatenate(
        ([0], 10000.0 /
         np.array([26500.0, 12200.0, 6000.0, 5470.0, 4670.0, 4110.0])))
    ysplir = np.array([0.0, 0.26469, 0.82925]) * R_V / 3.1
    ysplop = np.array(
        (sp.polyval([-4.22809e-01, 1.00270, 2.13572e-04][::-1], R_V),
         sp.polyval([-5.13540e-02, 1.00216, -7.35778e-05][::-1], R_V),
         sp.polyval([7.00127e-01, 1.00184, -3.32598e-05][::-1], R_V),
         sp.polyval(
             [1.19456, 1.01707, -5.46959e-03, 7.97809e-04,
              -4.45636e-05][::-1], R_V)))
    ysplopir = sp.concatenate((ysplir, ysplop))

    if Nopir > 0:
        tck = interpolate.splrep(sp.concatenate((xsplopir, xspluv)),
                                 sp.concatenate((ysplopir, yspluv)),
                                 s=0)
        curve[iopir] = interpolate.splev(x[iopir], tck)

    #Now apply extinction correction to input flux vector
    curve *= ebv
    corr = 1. / (10.**(0.4 * curve))

    return corr
Example #60
0
def makehistdata(params,maindir):
    """ This will make the histogram data for the statistics.
        Inputs
            params -  A list of parameters that will have statistics created
            maindir - The directory that the simulation data is held.
        Outputs
            datadict - A dictionary with the data values in numpy arrays. The keys are param names.
            errordict - A dictionary with the data values in numpy arrays. The keys are param names.
            errdictrel -  A dictionary with the error values in numpy arrays, normalized by the correct value. The keys are param names.
    """
    maindir = Path(maindir)
    ffit = maindir.joinpath('Fitted', 'fitteddata.h5')
    inputfiledir = maindir.joinpath('Origparams')

    paramslower = [ip.lower() for ip in params]
    eparamslower = ['n'+ip.lower() for ip in params]

    # set up data dictionary

    errordict = {ip:[] for ip in params}
    errordictrel = {ip:[] for ip in params}
    #Read in fitted data

    Ionofit = IonoContainer.readh5(str(ffit))
    times = Ionofit.Time_Vector

    dataloc = Ionofit.Sphere_Coords
    rng = dataloc[:, 0]
    rng_log = sp.logical_and(rng > 200., rng < 400)
    dataloc_out = dataloc[rng_log]
    pnames = Ionofit.Param_Names
    pnameslower = sp.array([ip.lower() for ip in pnames.flatten()])
    p2fit = [sp.argwhere(ip == pnameslower)[0][0] if ip in pnameslower else None for ip in paramslower]

    datadict = {ip:Ionofit.Param_List[rng_log, :, p2fit[ipn]].flatten() for ipn, ip in enumerate(params)}

    ep2fit = [sp.argwhere(ip==pnameslower)[0][0] if ip in pnameslower else None for ip in eparamslower]

    edatadict = {ip:Ionofit.Param_List[rng_log, :, ep2fit[ipn]].flatten() for ipn, ip in enumerate(params)}
    # Determine which input files are to be used.

    dirlist = [str(i) for i in inputfiledir.glob('*.h5')]
    _, outime, filelisting, _, _ = IonoContainer.gettimes(dirlist)
    time2files = []
    for itn, itime in enumerate(times):
        log1 = (outime[:, 0] >= itime[0]) & (outime[:, 0] < itime[1])
        log2 = (outime[:, 1] > itime[0]) & (outime[:, 1] <= itime[1])
        log3 = (outime[:, 0] <= itime[0]) & (outime[:, 1] > itime[1])
        tempindx = sp.where(log1|log2|log3)[0]
        time2files.append(filelisting[tempindx])


    curfilenum = -1
    for iparam, pname in enumerate(params):
        curparm = paramslower[iparam]
        # Use Ne from input to compare the ne derived from the power.
        if curparm == 'nepow':
            curparm = 'ne'
        datalist = []
        for itn, itime in enumerate(times):
            for  filenum in time2files[itn]:
                filenum = int(filenum)
                if curfilenum != filenum:
                    curfilenum = filenum
                    datafilename = dirlist[filenum]
                    Ionoin = IonoContainer.readh5(datafilename)
                    if ('ti' in paramslower) or ('vi' in paramslower):
                        Ionoin = maketi(Ionoin)
                    pnames = Ionoin.Param_Names
                    pnameslowerin = sp.array([ip.lower() for ip in pnames.flatten()])
                prmloc = sp.argwhere(curparm == pnameslowerin)
                if prmloc.size != 0:
                    curprm = prmloc[0][0]
                # build up parameter vector bs the range values by finding the closest point in space in the input
                curdata = sp.zeros(len(dataloc_out))

                for irngn, curcoord in enumerate(dataloc_out):

                    tempin = Ionoin.getclosestsphere(curcoord, [itime])[0]
                    Ntloc = tempin.shape[0]
                    tempin = sp.reshape(tempin, (Ntloc, len(pnameslowerin)))
                    curdata[irngn] = tempin[0, curprm]
                datalist.append(curdata)
        errordict[pname] = datadict[pname]-sp.hstack(datalist)
        errordictrel[pname] = 100.*errordict[pname]/sp.absolute(sp.hstack(datalist))
    return datadict, errordict, errordictrel, edatadict