Ejemplo n.º 1
0
    def toleranceLimitProcessing(self,data):
        # Tolerance limit processing in the EIA, tolerance limits are first
        # performed for the upper and lower bounds, then afterward for the
        # interval lengths (as opposed to all at once

        random.seed(1)
        resampledData = [random.choice(data) for x in xrange(2000)]
        (resampLower,resampUpper) = zip(*resampledData)
        meanLower = nanmean(resampLower)
        stdLower = nanstd(resampLower) * sqrt(len(data)) # *sqrt is to get population std from sample 
        meanUpper = nanmean(resampUpper)
        stdUpper = nanstd(resampUpper) * sqrt(len(data)) # ditto
        K=[32.019, 32.019, 8.380, 5.369, 4.275, 3.712, 3.369, 3.136, 2.967, 2.839,
           2.737, 2.655, 2.587, 2.529, 2.48, 2.437, 2.4, 2.366, 2.337, 2.31,
           2.31, 2.31, 2.31, 2.31, 2.208] # taken from Liu/Mendel matlab code, in turn from Walpole,Myers,Myers,Ye2008
        k = K[min(len(data),24)]
        acceptableLower = (meanLower-k*stdLower, meanLower+k*stdLower)
        acceptableUpper = (meanUpper-k*stdUpper, meanUpper+k*stdUpper)
        for (l,u) in data[:]:
            try:
                if not acceptableLower[0] <= l <= acceptableLower[1]:
                    raise ValueError("Intolerable: lower bound %s not in  %s" % (str(l), str(acceptableLower)),(l,u))
                if not acceptableUpper[0] <= u <= acceptableUpper[1]:
                    raise ValueError("Intolerable: upper bound %s not in %s" % (str(u), str(acceptableUpper)),(l,u))
            except ValueError as (e,d):
                #print e
                #print "Intolerable: removing data point %s" % str(d)
                data.remove(d)
Ejemplo n.º 2
0
    def toleranceLimitProcessing(self, data):
        # Tolerance limit processing in the EIA, tolerance limits are first
        # performed for the upper and lower bounds, then afterward for the
        # interval lengths (as opposed to all at once

        random.seed(1)
        resampledData = [random.choice(data) for x in xrange(2000)]
        (resampLower, resampUpper) = zip(*resampledData)
        meanLower = nanmean(resampLower)
        stdLower = nanstd(resampLower) * sqrt(
            len(data))  # *sqrt is to get population std from sample
        meanUpper = nanmean(resampUpper)
        stdUpper = nanstd(resampUpper) * sqrt(len(data))  # ditto
        K = [
            32.019, 32.019, 8.380, 5.369, 4.275, 3.712, 3.369, 3.136, 2.967,
            2.839, 2.737, 2.655, 2.587, 2.529, 2.48, 2.437, 2.4, 2.366, 2.337,
            2.31, 2.31, 2.31, 2.31, 2.31, 2.208
        ]  # taken from Liu/Mendel matlab code, in turn from Walpole,Myers,Myers,Ye2008
        k = K[min(len(data), 24)]
        acceptableLower = (meanLower - k * stdLower, meanLower + k * stdLower)
        acceptableUpper = (meanUpper - k * stdUpper, meanUpper + k * stdUpper)
        for (l, u) in data[:]:
            try:
                if not acceptableLower[0] <= l <= acceptableLower[1]:
                    raise ValueError(
                        "Intolerable: lower bound %s not in  %s" %
                        (str(l), str(acceptableLower)), (l, u))
                if not acceptableUpper[0] <= u <= acceptableUpper[1]:
                    raise ValueError(
                        "Intolerable: upper bound %s not in %s" %
                        (str(u), str(acceptableUpper)), (l, u))
            except ValueError as (e, d):
                #print e
                #print "Intolerable: removing data point %s" % str(d)
                data.remove(d)
Ejemplo n.º 3
0
def timeseries(iData, zoneMap, std=None):
    '''
    Make zone-wise averaging of input data
    input: 3D matrix(Layers x Width x Height) and map of zones (W x H)
    output: 2D matrices(L x WH) with mean and std
    '''
    #reshape input cube into 2D matrix
    r, h, w = iData.shape
    iData, notNanDataI = cube2flat(iData)
    #get unique values of not-nan labels
    uniqZones = np.unique(zoneMap[np.isfinite(zoneMap)])
    zoneNum = np.zeros((r, uniqZones.size))
    zoneMean = np.zeros((r, uniqZones.size))
    zoneStd = np.zeros((r, uniqZones.size))
    #in each zone: get all values from input data get not nan data average
    for i in range(uniqZones.size):
        zi = uniqZones[i]
        if not np.isnan(zi):
            zoneData = iData[:, zoneMap.flat == zi]
            zoneNum[:, i] = zi
            zoneMean[:, i] = st.nanmean(zoneData, axis=1)
            zoneStd[:, i] = st.nanstd(zoneData, axis=1)
            if std is not None:
                # filter out of maxSTD values
                outliers = (np.abs(zoneData.T - zoneMean[:, i]) > zoneStd[:, i] * std).T
                zoneData[outliers] = np.nan
                zoneMean[:, i] = st.nanmean(zoneData, axis=1)
                zoneStd[:, i] = st.nanstd(zoneData, axis=1)

    return zoneMean, zoneStd, zoneNum
def get_randomized_group_average_speed_profiles(profiles):
    stAvg = []
    unAvg = []
    stErr = []
    unErr = []

    for avgSpeeds, trialTypes in profiles:
        # Plot Average Speeds in bins
        stableTrials = np.where(trialTypes == 0)
        unstableTrials = np.where(trialTypes == 1)

        mSt = stats.nanmean(avgSpeeds[stableTrials, :], 1)
        mUn = stats.nanmean(avgSpeeds[unstableTrials, :], 1)
        eSt = stats.nanstd(avgSpeeds[stableTrials, :],
                           1) / np.sqrt(np.size(stableTrials) - 1)
        eUn = stats.nanstd(avgSpeeds[unstableTrials, :],
                           1) / np.sqrt(np.size(unstableTrials) - 1)

        mSt = mSt[0]
        mUn = mUn[0]
        eSt = eSt[0]
        eUn = eUn[0]

        stAvg.append(mSt)
        unAvg.append(mUn)
        stErr.append(eSt)
        unErr.append(eUn)
    return (stAvg, stErr), (unAvg, unErr)
Ejemplo n.º 5
0
def get3NetworkAvg(data_t, titleName, roiNames, numRuns):
    #Define the streams
    #Ventral=[1, 3, 11, 12, 13, 14]
    #Dorsal=[2, 4, 5, 6, 7, 8, 9, 10]
    #Lateral=[0, 1, 2, 3, 4]
    Lateral=[0,1,2,8,9]
    Dorsal=[8,9,10, 11, 12, 13, 14, 15]
    Ventral=[1,2, 3, 4, 5, 6]

    print 'Ventral rois: '+ str(roiNames[Ventral])
    print 'Dorsal rois: ' + str(roiNames[Dorsal])
    print 'Early Visual rois: '+ str(roiNames[Lateral])

    # Get network averages
    lateralCoher=getNetworkWithin(data_t, Lateral)
    dorsalCoher=getNetworkWithin(data_t, Ventral)
    ventralCoher=getNetworkWithin(data_t, Dorsal)
    #allMeansWithin=(stats.nanmean(lateralCoher.flat), stats.nanmean(dorsalCoher.flat), stats.nanmean(ventralCoher.flat))
    #allSTDWithin=(stats.nanstd(lateralCoher.flat), stats.nanstd(dorsalCoher.flat), stats.nanstd(ventralCoher.flat))
    allMeansWithin= (stats.nanmean(dorsalCoher.flat), stats.nanmean(ventralCoher.flat))
    allSTDWithin=( stats.nanstd(dorsalCoher.flat), stats.nanstd(ventralCoher.flat))

    latBtwCoher=getNetworkBtw(data_t, Lateral, Ventral+Dorsal)
    dorsBtwCoher=getNetworkBtw(data_t, Dorsal, Ventral)
    ventBtwCoher=getNetworkBtw(data_t, Ventral, Dorsal)

    #allMeansBtw=(stats.nanmean(latBtwCoher), stats.nanmean(dorsBtwCoher), stats.nanmean(ventBtwCoher))
    #allSTDBtw=(stats.nanstd(latBtwCoher), stats.nanstd(dorsBtwCoher), stats.nanstd(ventBtwCoher))
    # Just dorsal versus ventral
    allMeansBtw=( stats.nanmean(dorsBtwCoher), stats.nanmean(ventBtwCoher))
    allSTDBtw=( stats.nanstd(dorsBtwCoher), stats.nanstd(ventBtwCoher))

    # Make bar graph
    title= titleName+ 'by Network for ' +sub+ ' for '+ str(numRuns)+' runs'; labels=( 'Dorsal', 'Ventral')
    makeBarPlots(allMeansWithin, allSTDWithin, allMeansBtw, allSTDBtw, title, labels)
Ejemplo n.º 6
0
def despike(self, n=3, recursive=False, verbose=False):
    """Replace spikes with np.NaN.
    Removing spikes that are >= n * std.
    default n = 3."""

    result = self.values.copy()
    outliers = (np.abs(self.values - nanmean(self.values)) >=
                n * nanstd(self.values))

    removed = np.count_nonzero(outliers)
    result[outliers] = np.NaN

    if verbose and not recursive:
        print("Removing from %s\n # removed: %s" % (self.name, removed))

    counter = 0
    if recursive:
        while outliers.any():
            result[outliers] = np.NaN
            outliers = np.abs(result - nanmean(result)) >= n * nanstd(result)
            counter += 1
            removed += np.count_nonzero(outliers)
        if verbose:
            print("Removing from %s\nNumber of iterations: %s # removed: %s" %
                  (self.name, counter, removed))
    return Series(result, index=self.index, name=self.name)
Ejemplo n.º 7
0
def despike(self, n=3, recursive=False, verbose=False):
    """
    Replace spikes with np.NaN.
    Removing spikes that are >= n * std.
    default n = 3.

    """

    result = self.values.copy()
    outliers = (np.abs(self.values - nanmean(self.values)) >= n *
                nanstd(self.values))

    removed = np.count_nonzero(outliers)
    result[outliers] = np.NaN

    if verbose and not recursive:
        print("Removing from %s\n # removed: %s" % (self.name, removed))

    counter = 0
    if recursive:
        while outliers.any():
            result[outliers] = np.NaN
            outliers = np.abs(result - nanmean(result)) >= n * nanstd(result)
            counter += 1
            removed += np.count_nonzero(outliers)
        if verbose:
            print("Removing from %s\nNumber of iterations: %s # removed: %s" %
                  (self.name, counter, removed))
    return Series(result, index=self.index, name=self.name)
Ejemplo n.º 8
0
    def reasonableIntervalProcessing(self, data):
        databackup = data[:]  #keep backup in case all intervals are deleted
        random.seed(1)
        resampledData = [random.choice(data) for x in xrange(2000)]
        (resampLower, resampUpper) = zip(*resampledData)
        resampInterval = map(lambda x: x[1] - x[0], resampledData)
        meanLower = nanmean(resampLower)
        stdLower = nanstd(resampLower) * sqrt(
            len(data)
        )  # it appears *sqrt is done to estimage population std from sample
        meanUpper = nanmean(resampUpper)
        stdUpper = nanstd(resampUpper) * sqrt(len(data))  # ditto
        meanInterval = nanmean(resampInterval)
        stdInterval = nanstd(resampInterval) * sqrt(len(data))  # ditto
        if stdLower + stdUpper == 0:
            barrier = (meanLower + meanUpper) / 2
            print "barrierAvg", barrier
        elif stdLower == 0:
            barrier = meanLower + .5
            print "barrierlower", barrier
        elif stdUpper == 0:
            barrier = meanUpper - .5
            print "barrierupper", barrier

        else:
            barrier1 = (
                -(meanLower * stdUpper**2 - meanUpper * stdLower**2) +
                stdLower * stdUpper * sqrt(
                    (meanLower - meanUpper)**2 + 2 *
                    (stdLower**2 - stdUpper**2) * log(stdLower / stdUpper))
            ) / (stdLower**2 - stdUpper**2)
            barrier2 = (
                -(meanLower * stdUpper**2 - meanUpper * stdLower**2) -
                stdLower * stdUpper * sqrt(
                    (meanLower - meanUpper)**2 + 2 *
                    (stdLower**2 - stdUpper**2) * log(stdLower / stdUpper))
            ) / (stdLower**2 - stdUpper**2)

            if barrier1 >= meanLower and barrier1 <= meanUpper:
                barrier = barrier1
                print "barrier1", barrier
            else:
                barrier = barrier2
                print "barrier2", barrier
        for (l, u) in data[:]:
            try:

                #if l > barrier+(.1*stdLower) or u < barrier-(.1*stdUpper):
                #if l > barrier+stdLower or u < barrier-stdUpper:
                #if l > barrier and u < barrier:
                if l > barrier + (.001 * stdLower) or u < barrier - (.001 *
                                                                     stdUpper):
                    raise ValueError(
                        "Unreasonable: interval %s does not cross reasonable barrier  %s"
                        % (str((l, u)), str(barrier)), (l, u))
            except ValueError as (e, d):
                #print e
                #print "Unreasonable: removing data point %s" % str(d)
                data.remove(d)
Ejemplo n.º 9
0
def calc_clipped_stats_old(data, clip=3.0, nIter=10):
    """Calculate the mean and stdev of an array given a sigma clip"""
    
    data = np.array(data).flatten()
    
    mean = float(stats.nanmean(data))
    std = float(stats.nanstd(data))
    mad = float(MAD(data))

    if clip > 0.0:
        convergeFlg = 0
        itCnt = 0
        while convergeFlg==0 and itCnt<nIter:
            meanOld, stdOld, madOld = mean, std, mad
            minVal = mean - (clip * mad)
            maxVal = mean + (clip * mad)

            # Blank values outside the 3-sigma range
            dataMsk = np.where(np.greater(data, maxVal), np.nan, data)
            dataMsk = np.where(np.less(data, minVal), np.nan, dataMsk)

            # Measure the statistics
            mean = stats.nanmean(dataMsk)
            median = stats.nanmedian(dataMsk)
            std = stats.nanstd(dataMsk)
            mad = MAD(dataMsk)
            npix = np.sum(np.where(np.isnan(dataMsk),0.0,1.0))
            dataMsk = []
            
            if mean == meanOld and mad == madOld:
                convergFlg = 1
            itCnt += 1
            

    # Assemble the measurements into a dictionary
    m = {}
    m['mean'] = float(mean)
    m['median'] = float(median)
    m['stdev'] = float(std)
    m['madfm'] = float(mad)
    m['npix'] =int(npix)
    m['max'] = float(np.nanmax(data))
    m['min'] = float(np.nanmin(data))
    del data
    
    # If all nans
    if m['npix'] == 0:
        m['stdev'] = 0.0
        m['mean'] = 0.0
        m['median'] = 0.0
        m['max'] = 0.0
        m['min'] = 0.0
        m['centmax'] = (0.0,0.0)
        m['madfm'] = 0.0
        m['success'] = False
    else:
        m['success'] = True

    return m
Ejemplo n.º 10
0
    def toleranceLimitProcessing(self, data):
        # Tolerance limit processing
        random.seed(1)
        resampledData = [random.choice(data) for x in xrange(2000)]

        #address default values:
        # middle = nanmean([(d[1]+d[0])/2 for d in data])/(self.r[1]-self.r[0])
        # print "middle", middle
        # if(middle < .35):
        #     print "filtering higher range"
        #     f = lambda x: x[1] != 100 or  random.random() > .3
        #     resampledData = filter(f, resampledData)
        # if(middle > .65*(self.r[1]-self.r[0])):
        #     print "filtering higher range"
        #     f = lambda x: x[0] != 0 or random.random() > .3
        #     resampledData = filter(f, resampledData)
        # f = lambda x: (x[0] != 0 and x[1]!=100) or random.random() > .1
        # resampledData = filter(f, resampledData)
        # print "resampled data length", len(resampledData)

        (resampLower, resampUpper) = zip(*resampledData)
        resampInterval = map(lambda x: x[1] - x[0], resampledData)
        meanLower = nanmean(resampLower)
        stdLower = nanstd(resampLower) * sqrt(
            len(data)
        )  # it appears *sqrt is done to estimage population std from sample
        meanUpper = nanmean(resampUpper)
        stdUpper = nanstd(resampUpper) * sqrt(len(data))  # ditto
        meanInterval = nanmean(resampInterval)
        stdInterval = nanstd(resampInterval) * sqrt(len(data))  # ditto
        K = [
            32.019, 32.019, 8.380, 5.369, 4.275, 3.712, 3.369, 3.136, 2.967,
            2.839, 2.737, 2.655, 2.587, 2.529, 2.48, 2.437, 2.4, 2.366, 2.337,
            2.31, 2.31, 2.31, 2.31, 2.31, 2.208
        ]  # taken from Liu/Mendel matlab code, in turn from Walpole,Myers,Myers,Ye2008
        k = K[min(len(data), 24)]
        acceptableLower = (meanLower - k * stdLower, meanLower + k * stdLower)
        acceptableUpper = (meanUpper - k * stdUpper, meanUpper + k * stdUpper)
        acceptableInterval = (meanInterval - k * stdInterval,
                              meanInterval + k * stdInterval)
        for (l, u) in data[:]:
            try:
                if not acceptableLower[0] <= l <= acceptableLower[1]:
                    raise ValueError(
                        "Intolerable: lower bound %s not in  %s" %
                        (str(l), str(acceptableLower)), (l, u))
                if not acceptableUpper[0] <= u <= acceptableUpper[1]:
                    raise ValueError(
                        "Intolerable: upper bound %s not in %s" %
                        (str(u), str(acceptableUpper)), (l, u))
                if not acceptableInterval[0] <= u - l <= acceptableInterval[1]:
                    raise ValueError(
                        "Intolerable: interval %s greater than %s" %
                        (str(u - l), str(acceptableInterval)), (l, u))
            except ValueError as (e, d):
                #print e
                #print "Intolerable: removing data point %s" % str(d)
                data.remove(d)
Ejemplo n.º 11
0
def calc_clipped_stats_old(data, clip=3.0, nIter=10):

    data = np.array(data).flatten()

    mean = float(stats.nanmean(data))
    std = float(stats.nanstd(data))
    mad = float(MAD(data))

    if clip > 0.0:
        convergeFlg = 0
        itCnt = 0
        while convergeFlg == 0 and itCnt < nIter:
            meanOld, stdOld, madOld = mean, std, mad
            minVal = mean - (clip * mad)
            maxVal = mean + (clip * mad)

            # Blank values outside the 3-sigma range
            dataMsk = np.where(np.greater(data, maxVal), np.nan, data)
            dataMsk = np.where(np.less(data, minVal), np.nan, dataMsk)

            # Measure the statistics
            mean = stats.nanmean(dataMsk)
            median = stats.nanmedian(dataMsk)
            std = stats.nanstd(dataMsk)
            mad = MAD(dataMsk)
            npix = np.sum(np.where(np.isnan(dataMsk), 0.0, 1.0))
            dataMsk = []

            if mean == meanOld and mad == madOld:
                convergFlg = 1
            itCnt += 1

    # Assemble the measurements into a dictionary
    m = {}
    m['mean'] = float(mean)
    m['median'] = float(median)
    m['stdev'] = float(std)
    m['madfm'] = float(mad)
    m['npix'] = int(npix)
    m['max'] = float(np.nanmax(data))
    m['min'] = float(np.nanmin(data))
    del data

    # If all nans
    if m['npix'] == 0:
        m['stdev'] = 0.0
        m['mean'] = 0.0
        m['median'] = 0.0
        m['max'] = 0.0
        m['min'] = 0.0
        m['centmax'] = (0.0, 0.0)
        m['madfm'] = 0.0
        m['success'] = False
    else:
        m['success'] = True

    return m
Ejemplo n.º 12
0
    def reasonableIntervalProcessing(self,data):
        databackup = data[:]   #keep backup in case all intervals are deleted
        random.seed(1)
        resampledData = [random.choice(data) for x in xrange(2000)]
        (resampLower,resampUpper) = zip(*resampledData)
        resampInterval = map(lambda x: x[1]-x[0], resampledData)
        meanLower = nanmean(resampLower)
        stdLower = nanstd(resampLower) * sqrt(len(data)) # it appears *sqrt is done to estimate population std from sample 
        meanUpper = nanmean(resampUpper)
        stdUpper = nanstd(resampUpper) * sqrt(len(data)) # ditto
        meanInterval = nanmean(resampInterval)
        stdInterval = nanstd(resampInterval) * sqrt(len(data)) # ditto
        if stdLower+stdUpper==0:
            barrier = (meanLower+meanUpper)/2
            print "barrierAvg", barrier
        elif stdLower == 0:
            barrier = meanLower+.5
            print "barrierlower", barrier
        elif stdUpper == 0:
            barrier = meanUpper-.5
            print "barrierupper", barrier

        else:
            barrier1 = ( 
                (meanUpper*stdLower**2-meanLower*stdUpper**2) 
                 + stdLower*stdUpper*sqrt((meanLower-meanUpper)**2 + 2*(stdLower**2-stdUpper**2)*log(stdLower/stdUpper))
                 ) /(stdLower**2-stdUpper**2)
                                                                                        
            barrier2 = ( 
                (meanUpper*stdLower**2-meanLower*stdUpper**2) 
                 - stdLower*stdUpper*sqrt((meanLower-meanUpper)**2 + 2*(stdLower**2-stdUpper**2)*log(stdLower/stdUpper))
                 )/(stdLower**2-stdUpper**2)
                                                 
            
            print "barrier1", barrier1
            print "barrier2", barrier2
            if barrier1 >= meanLower and barrier1 <= meanUpper:
                barrier = barrier1
                print "barrier1", barrier
            #elif barrier2 >= meanLower and barrier1 <= meanUpper:
            else:
                barrier = barrier2
                print "barrier2", barrier
        for (l,u) in data[:]:
            try:
                #if l > barrier+(.1*stdLower) or u < barrier-(.1*stdUpper):
                #if l > barrier+stdLower or u < barrier-stdUpper:
                #if l > barrier and u < barrier:
                #if l > barrier+(.001*stdLower) or u < barrier-(.001*stdUpper):
                if not (2*meanLower - barrier) <= l <= barrier <= u <= (2*meanUpper- barrier):
                    
                    raise ValueError("Unreasonable: interval %s does not cross reasonable barrier  %s" % (str((l,u)), str(barrier)),(l,u))
            except ValueError as (e,d):
                #print e
                #print "Unreasonable: removing data point %s" % str(d)
                data.remove(d)
def return_speedup_stats(x, y):

    speedup_stats = {
        'ratio_of_the_means': stats.nanmean(x) / stats.nanmean(y),
        'ratio_of_the_medians': stats.nanmedian(x) / stats.nanmedian(y),
        'ratio_of_the_stddevs': stats.nanstd(x) / stats.nanstd(y),
        'ratio_max_to_min': np.amax(x) / np.amin(y),
        'ratio_min_to_max': np.amin(x) / np.amax(y)
    }
    return speedup_stats
 def calc_perf_stats(self):
     """Calculates mean performance based on trimmed time series."""
     self.mean_tsr, self.std_tsr = nanmean(self.tsr), nanstd(self.tsr)
     self.mean_cp, self.std_cp = nanmean(self.cp), nanstd(self.cp)
     self.mean_cd, self.std_cd = nanmean(self.cd), nanstd(self.cd)
     self.mean_ct, self.std_ct = nanmean(self.ct), nanstd(self.ct)
     self.mean_u_enc = nanmean(self.tow_speed)
     self.mean_tow_speed = self.mean_u_enc
     self.std_u_enc = nanstd(self.tow_speed)
     self.std_tow_speed = self.std_u_enc
def removeoutliers(inarray,stdcut=3.0):
    #bonehead outlier cut, stdcut is how many sigma, replace with nearest neighbor
    #first mark the bad numbers
    inarray[np.logical_not(np.isfinite(inarray))]=0.
    indexarray=np.arange(len(inarray))
    badi=indexarray[np.abs(inarray-nanmedian(inarray)) > stdcut*nanstd(inarray) ]
    goodi=indexarray[np.abs(inarray-nanmedian(inarray)) <= stdcut*nanstd(inarray) ]
    outarray=inarray
    for i in badi:
        outarray[i]=inarray[np.abs(goodi-i).argmin()]
    return outarray
Ejemplo n.º 16
0
def extract_pigments(f):
    '''
    extract pigments data of *.txt files, and return a list of
    dictionaries.

    INPUT
    -----
    f : str or pandas object 
    string of specific .csv file

    OUTPUT
    ------
    var: list of dictionaries, containing pigments and informations.

    '''
    stringy = str(f)
    term = stringy[stringy.rfind('.'):stringy.rfind('.')+4]
    if term == '.csv':
        dat = np.genfromtxt(stringy, names=True, dtype=None, delimiter=',')
        print(dat)
        lista = dat.dtype.names
    else:
        dat = f
        lista = dat.keys()
    
    dicts = []
    nd = {}
    parse = ['station','treatment','time']
    
    for k in lista:
        if k in parse:
            continue
            
        nd[k] = []
        nd['name'] = k
        nd['local'] = dat['station'][0]
        nd['ct0'] = dat[k][(dat['treatment']=='Initial')]
        nd['ct1'] = dat[k][(dat['treatment']=='Control') & (dat['time']=='T1')]
        nd['ct2'] = dat[k][(dat['treatment']=='Control') & (dat['time']=='T2')]
        nd['ft0'] = dat[k][(dat['treatment']=='Initial')]
        nd['ft1'] = dat[k][(dat['treatment']=='Fe') & (dat['time']=='T1')]
        nd['ft2'] = dat[k][(dat['treatment']=='Fe') & (dat['time']=='T2')]
        nd['dt0'] = dat[k][(dat['treatment']=='Initial')]
        nd['dt1'] = dat[k][(dat['treatment']=='DFA') & (dat['time']=='T1')]
        nd['dt2'] = dat[k][(dat['treatment']=='DFA') & (dat['time']=='T2')]
        nd['xcontrol'] = np.append(nanmean(nd['ct0']),(nanmean(nd['ct1']), nanmean(nd['ct2'])))
        nd['xferro'] = np.append(nanmean(nd['ft0']),(nanmean(nd['ft1']), nanmean(nd['ft2'])))
        nd['xdfa'] = np.append(nanmean(nd['dt0']),(nanmean(nd['dt1']), nanmean(nd['dt2'])))
        nd['econtrol'] = np.append(nanstd(nd['ct0']),(nanstd(nd['ct1']), nanstd(nd['ct2'])))
        nd['eferro'] = np.append(nanstd(nd['ft0']),(nanstd(nd['ft1']), nanstd(nd['ft2'])))
        nd['edfa'] = np.append(nanstd(nd['dt0']),(nanstd(nd['dt1']), nanstd(nd['dt2'])))
        if nd:
            dicts.append(nd)
        nd = {}

    return dicts
Ejemplo n.º 17
0
    def make_plots(self, num_bins=50):
        import matplotlib.pyplot as p

        ## Histogram of Widths
        widths = [float(x) for x in self.dataframe["Widths"] if is_float_try(x)]
        widths_stats = [nanmean(widths), nanstd(widths), nanmedian(widths)]

        ## Histogram of Lengths
        lengths = self.dataframe["Lengths"]
        lengths_stats = [nanmean(lengths), nanstd(lengths), nanmedian(lengths)]

        ## Histogram of Curvature
        rht_curvature = self.dataframe["RHT Curvature"]
        rht_curvature_stats = [nanmean(rht_curvature), nanstd(rht_curvature), nanmedian(rht_curvature)]



        if self.verbose:
            print "Widths Stats: %s" % (widths_stats)
            print "Lengths Stats: %s" % (lengths_stats)
            print "Curvature Stats: %s" % (rht_curvature_stats)

            p.subplot(131)
            p.hist(widths, num_bins)
            p.xlabel("Widths (pc)")
            p.subplot(132)
            p.hist(lengths, num_bins)
            p.xlabel("Lengths (pc)")
            p.subplot(133)
            p.hist(curvature, num_bins)
            p.xlabel("Curvature")
            p.show()
        if self.save:
            p.hist(widths, num_bins)
            p.xlabel("Widths (pc)")
            p.savefig("".join([self.save_name,"_widths.pdf"]))
            p.close()

            p.hist(lengths, num_bins)
            p.xlabel("Lengths (pc)")
            p.savefig("".join([self.save_name,"_lengths.pdf"]))
            p.close()

            p.hist(rht_curvature, num_bins)
            p.xlabel("RHT Curvature")
            p.savefig("".join([self.save_name,"_rht_curvature.pdf"]))
            p.close()

        return self
def plot_randomized_speed_profiles(avgSpeeds, trialTypes):
    # Set Plotting Attributes
    color1 = (0.0, 0.0, 0.0, 0.1)
    color2 = (1.0, 0.6, 0.0, 0.1)
    color1b = (0.0, 0.0, 0.0, 1.0)
    color2b = (1.0, 0.6, 0.0, 1.0)

    traceColors = [color1, color2]
    boldColors = [color1b, color2b]

    # Plot Average Speeds in bins
    plt.figure()
    numTrials = np.size(trialTypes)
    for t in range(0, numTrials):
        if trialTypes[t] == 0:
            plt.plot(avgSpeeds[t, :], color=color1)
        else:
            plt.plot(avgSpeeds[t, :], color=color2)

    stableTrials = np.where(trialTypes == 0)
    unstableTrials = np.where(trialTypes == 1)
    mSt = stats.nanmean(avgSpeeds[stableTrials, :], 1)
    mUn = stats.nanmean(avgSpeeds[unstableTrials, :], 1)
    eSt = stats.nanstd(avgSpeeds[stableTrials, :],
                       1) / np.sqrt(np.size(stableTrials) - 1)
    eUn = stats.nanstd(avgSpeeds[unstableTrials, :],
                       1) / np.sqrt(np.size(unstableTrials) - 1)

    #    eSt = stats.nanstd(avgSpeeds[stableTrials, :], 1)
    #    eUn = stats.nanstd(avgSpeeds[unstableTrials, :], 1)

    mSt = mSt[0]
    mUn = mUn[0]
    eSt = eSt[0]
    eUn = eUn[0]

    plt.plot(mUn, color=color2b, linewidth=7)
    plt.plot(mSt, color=color1b, linewidth=7)

    #    plt.plot(mSt + eSt, color=color1b, linewidth = 0.5)
    #    plt.plot(mSt - eSt, color=color1b, linewidth = 0.5)
    #    plt.plot(mUn + eUn, color=color2b, linewidth = 0.5)
    #    plt.plot(mUn - eUn, color=color2b, linewidth = 0.5)
    #pltutils.fix_font_size()
    plt.xlabel('crossing extent (cm)')
    plt.ylabel('normalized horizontal speed')
    pltutils.fix_font_size()
    plt.axis([0, 39, 0, 3])
Ejemplo n.º 19
0
Archivo: RR_rf.py Proyecto: r-b-g-b/Lab
def gen_exp_analysis_2d(f, key = 'rrtf_noise', key_dtype = '7f4'):
	
	db = h5_to_numpy(f, [key, 'rrs'], [key_dtype, '7f4'])

	fig = plt.figure()
	ax = fig.add_subplot(111)	
	gens = ['ko', 'wt']
	exps = ['nai', 'exp']
	pltopts = {'ko' : {'nai' : {'color' : 'r', 'ls' : '-'}, 'exp' : {'color' : 'r', 'ls' : '--'}}, 'wt' : {'nai' : {'color' : 'b', 'ls' : '-'}, 'exp' : {'color' : 'b', 'ls' : '--'}}}
	# t = np.arange(7)
	rrs = db['rrs'][0]
	leg = []
	for i, gen in enumerate(gens):
		for j, exp in enumerate(exps):
			ix = np.vstack((db['gen']==gen, db['exp']==exp)).all(0)
			db_ = db[ix]
			nunits = db_.size
			# y = st.nanmean(db_[key], 0)
			yerr = st.nanstd(db_[key], 0) / np.sqrt(nunits)
			ax.errorbar(rrs, y, yerr = yerr, color = pltopts[gen][exp]['color'], ls = pltopts[gen][exp]['ls'])
			
			leg.append('-'.join((gen, exp)))
			
	ax.legend(leg)
	# ax.set_title('Evoked PSTHs')
	# ax.set_xlabel('Time (ms)')
	# ax.set_ylabel('Firing rate (spks/s)')
	plt.show()
Ejemplo n.º 20
0
def rate_startle_ratio(data, title = None, ax = None, show_all = True):

	if ax is None:
		fig = plt.figure();
		ax = fig.add_subplot(111);
	
	freqs = np.unique(data['freq'])
	nfreqs = freqs.size
	rates = np.unique(data['rate'])
	nrates = rates.size
	animals = np.unique(data['animal'])
	nanimals = animals.size
	x = np.arange(nrates)
	ppi = np.empty((nfreqs, nrates, nanimals))
	for f, freq in enumerate(freqs):
		for r, rate in enumerate(rates):
			for a, animal in enumerate(animals):
				dat_ = data[np.c_[data['freq']==freq, data['rate']==rate, data['animal']==animal].all(1)]
				ppi[f, r, a] = calc_rate_startle_ratio(dat_)
		
		ax.errorbar(x+nrates*f, st.nanmean(ppi[f, ...], 1), yerr = st.nanstd(ppi[f, ...], 1), lw = 3)

		if show_all:
			ax.plot(x+nrates*f, ppi[f, ...], color = '0.7')
			
	ax.set_xticks(np.arange(nfreqs*nrates))
	ax.set_xticklabels(np.tile(rates, nfreqs))
	ax.axhline(1, color = 'r', ls = '--')
	ax.set_ylabel('PPI')
	ax.set_xlabel('Rate (pps)')
	ax.set_title(title)
Ejemplo n.º 21
0
def get_total_task_counts(fluc_levels,fluc_type):
    for fluc_level in fluc_levels:
        task_counts_mean=[]
        task_counts_se=[]
        task_counts_sd=[]
        task_counts=[]
        for replicate in range(1,31):
            replicate_counts=[]
            tasks_for_replicate=get_file_lines("../data_"+str(fluc_type)+"_"+str(fluc_level)+"/replicate_"+str(replicate)+"/tasks.dat")
            for i in range(len(tasks_for_replicate)):
                if len(tasks_for_replicate[i])!=0 and tasks_for_replicate[i][0]!="#":
                    temp=str(tasks_for_replicate[i]).split(" ")
                    update_task_count=0
                    for j in range(1,10):
                        update_task_count+=float(temp[j])
                    replicate_counts+=[update_task_count]
            assert len(replicate_counts)==500000/50+1,""+str(len(replicate_counts))
            task_counts+=[copy.deepcopy(replicate_counts)]
        assert len(task_counts)==30,""+str(len(task_counts))
        for update in range(0,400001,50):
            update_data=[float(task_counts[i][update/50]) for i in range(30)]
            task_counts_mean+=[stats.nanmean(update_data)]
            task_counts_se+=[stats.sem(update_data)]
            task_counts_sd+=[stats.nanstd(update_data)]
        pickle.dump(task_counts_mean,open("../plot_data/total_task_counts_mean_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb"))
        pickle.dump(task_counts_se,open("../plot_data/total_task_counts_se_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb"))
        pickle.dump(task_counts_sd,open("../plot_data/total_task_counts_sd_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb"))
    return "success"
Ejemplo n.º 22
0
def bar_by_indep_2d(dep_key, indep_key, data, ax = None, bins = None, color = 'b', show_all = False):

	x = np.asarray(data[indep_key])
	y = np.asarray(data[dep_key])

	if bins is None:
		x_bin = x
	else:
		x_bin = misc.bin(x, bins)

	bins = np.unique(x_bin)
	nbins = bins.size

	y_mean = np.empty(nbins)
	y_sem = np.empty(nbins)
	for i in range(nbins):
		y_ = y[x_bin == bins[i]]
		y_mean[i] = st.nanmean(y_)
		y_sem[i] = st.nanstd(y_) / np.sqrt(y_.size)

	if ax is None:
		fig = plt.figure();
		ax = fig.add_subplot(111);

	if show_all:
		ax.scatter(x, y, color = color, alpha = 0.25)
		lw = 2
	else:
		lw = 1
	ax.errorbar(bins, y_mean, yerr = y_sem, color = color, lw = lw)

	ax.set_xlim([bins[0]-1, bins[-1]+1])
	plt.show()
Ejemplo n.º 23
0
def gap_startle_ratio(data, title = None, ax = None, show_all = True, animals = None):
	if ax is None:
		fig = plt.figure();
		ax = fig.add_subplot(111);
	
	freqs = np.unique(data['freq'])
	nfreqs = freqs.size
	gaps = np.unique(data['gap'])
	ngaps = gaps.size
	if animals is None:
		animals = np.unique(data['animal'])
	nanimals = animals.size
	x = np.arange(ngaps)
	ppi = np.empty((nfreqs, ngaps, nanimals))
	for f, freq in enumerate(freqs):
		for a, animal in enumerate(animals):
			dat = data[np.c_[data['animal']==animal, data['freq']==freq].all(1)]
			basestartle = dat[dat['gap']==0]['maxstartle'].mean()
			for r, gap in enumerate(gaps):
				dat_ = dat[dat['gap']==gap]['maxstartle']
				ppi[f, r, a] = dat_.mean() / basestartle
		
		ax.errorbar(x+ngaps*f, st.nanmean(ppi[f, ...], 1), yerr = st.nanstd(ppi[f, ...], 1), lw = 3)

		if show_all:
			ax.plot(x+ngaps*f, ppi[f, ...], color = '0.7')
			
	ax.set_xticks(np.arange(nfreqs*ngaps))
	ax.set_xticklabels(np.tile(gaps, nfreqs))
	ax.axhline(1, color = 'r', ls = '--')
	ax.set_ylabel('PPI')
	ax.set_xlabel('Gap duration (s)')
	ax.set_title(title)
Ejemplo n.º 24
0
def get_count(data_category,specific_data,fluc_levels,fluc_type):
    assert data_category in ["resource","tasks"]
    assert type(fluc_levels)==list
    assert type(fluc_type)==str
    assert fluc_type in ["sync","stag","lowhigh"]
    assert specific_data>=0
    assert specific_data<=8
    
    for fluc_level in fluc_levels:
        treatment_counts_mean=[]
        treatment_counts_se=[]
        treatment_counts_sd=[]
        treatment_counts=[]
        for replicate in range(1,31):
            replicate_counts=[]
            data_for_replicate=get_file_lines("../data_"+str(fluc_type)+"_"+str(fluc_level)+"/replicate_"+str(replicate)+"/"+str(data_category)+".dat")
            for i in range(len(data_for_replicate)):
                if len(data_for_replicate[i])!=0 and data_for_replicate[i][0]!="#":
                    temp=str(data_for_replicate[i]).split(" ")
                    update_count=float(temp[specific_data+1])
                    replicate_counts+=[update_count]
            assert len(replicate_counts)==500000/50+1,""+str(len(replicate_counts))
            treatment_counts+=[copy.deepcopy(replicate_counts)]
        assert len(treatment_counts)==30,""+str(len(treatment_counts))
        for update in range(0,400001,50):
            update_data=[float(treatment_counts[i][update/50]) for i in range(30)]
            treatment_counts_mean+=[stats.nanmean(update_data)]
            treatment_counts_se+=[stats.sem(update_data)]
            treatment_counts_sd+=[stats.nanstd(update_data)]
        pickle.dump(treatment_counts_mean,open("../plot_data/"+str(data_category)+"_"+str(specific_data)+"_counts_mean_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb"))
        pickle.dump(treatment_counts_se,open("../plot_data/"+str(data_category)+"_"+str(specific_data)+"_counts_se_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb"))
        pickle.dump(treatment_counts_sd,open("../plot_data/"+str(data_category)+"_"+str(specific_data)+"_counts_sd_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb"))
    return "success"
Ejemplo n.º 25
0
def nanste(array, axis):
    """ Function that computes standard error accounting for NaN's
    """

    err = stats.nanstd(array, axis=axis) / np.sqrt(nanlen(array, axis))

    return err
Ejemplo n.º 26
0
Archivo: cf.py Proyecto: epifanio/ecoop
 def nin_get(self, url='http://www.cpc.ncep.noaa.gov/data/indices/sstoi.indices', save=None, csvout='nin.csv'):
     """
     read NIN data from url and return a pandas dataframe
     @param url: url to data online default is set to : http://www.cpc.ncep.noaa.gov/data/indices/sstoi.indices
     @param save: directory where to save raw data as csv
     @return: nindata as pandas dataframe
     """
     try:
         ts_raw = pd.read_table(url, sep=' ', header=0, skiprows=0, parse_dates=[['YR', 'MON']], skipinitialspace=True,
                                index_col=0, date_parser=parse)
         print 'dataset used: %s', url
         ts_year_group = ts_raw.groupby(lambda x: x.year).apply(lambda sdf: sdf if len(sdf) > 11 else None)
         ts_range = pd.date_range(ts_year_group.index[0][1], ts_year_group.index[-1][1] + pd.DateOffset(months=1),
                                  freq="M")
         ts = pd.DataFrame(ts_year_group.values, index=ts_range, columns=ts_year_group.keys())
         ts_fullyears_group = ts.groupby(lambda x: x.year)
         nin_anomalies = (ts_fullyears_group.mean()['ANOM.3'] - sts.nanmean(
             ts_fullyears_group.mean()['ANOM.3'])) / sts.nanstd(ts_fullyears_group.mean()['ANOM.3'])
         nin_anomalies = pd.DataFrame(nin_anomalies.values, index=pd.to_datetime([str(x) for x in nin_anomalies.index]))
         nin_anomalies = nin_anomalies.rename(columns={'0': 'nin'})
         nin_anomalies.columns = ['nin']
         if save:
             eu.ensure_dir(save)
             output = os.path.join(save, csvout)
             nin_anomalies.to_csv(output, sep=',', header=True, index=True, index_label='Date')
             print 'data saved as', output
         return nin_anomalies
     except IOError:
         print 'unable to fetch the data, check if %s is a valid address and data is conform to AMO spec, for info about data spec. see [1]', url
Ejemplo n.º 27
0
def nanste(array,axis):
    """ Function that computes standard error accounting for NaN's
    """

    err = stats.nanstd(array,axis=axis)/np.sqrt(nanlen(array,axis))

    return err
Ejemplo n.º 28
0
def calc_stats_old(a, maskzero=False):
    """Calculate the statistics of an array"""
    
    statsDict = {}
    a = np.array(a)
    if maskzero:
        a = np.where( np.equal(a, 0.0), np.nan, a)

    # Check that array is not all NaNs
    statsDict['npix'] = int(np.sum(np.where(np.isnan(a),0.0,1.0)))
    if statsDict['npix']>=2:
        statsDict['stdev'] = float(stats.nanstd(a.flatten()))
        statsDict['mean'] = float(stats.nanmean(a.flatten()))
        statsDict['median'] = float(stats.nanmedian(a.flatten()))
        statsDict['max'] = float(np.nanmax(a))
        statsDict['min'] = float(np.nanmin(a))
        statsDict['centmax'] = list(np.unravel_index(np.nanargmax(a),
                                                     a.shape))
        statsDict['madfm'] = float(MAD(a.flatten()))
        statsDict['npix'] = int(np.sum(np.where(np.isnan(a),0.0,1.0)))
        statsDict['success'] = True
        
    else:
        statsDict['npix'] == 0
        statsDict['stdev']   = 0.0
        statsDict['mean']    = 0.0
        statsDict['median']  = 0.0
        statsDict['max']     = 0.0
        statsDict['min']     = 0.0
        statsDict['centmax'] = (0.0, 0.0)
        statsDict['madfm']   = 0.0
        statsDict['success'] = False
        
    return statsDict
Ejemplo n.º 29
0
def calc_stats(a, maskzero=False):

    statsDict = {}
    a = np.array(a)
    if maskzero:
        a = np.where(np.equal(a, 0.0), np.nan, a)

    # Check that array is not all NaNs
    statsDict['npix'] = int(np.sum(np.where(np.isnan(a), 0.0, 1.0)))
    if statsDict['npix'] >= 2:
        statsDict['stdev'] = float(stats.nanstd(a.flatten()))
        statsDict['mean'] = float(stats.nanmean(a.flatten()))
        statsDict['median'] = float(stats.nanmedian(a.flatten()))
        statsDict['max'] = float(np.nanmax(a))
        statsDict['min'] = float(np.nanmin(a))
        statsDict['centmax'] = list(np.unravel_index(np.nanargmax(a), a.shape))
        statsDict['madfm'] = float(MAD(a.flatten()))
        statsDict['npix'] = int(np.sum(np.where(np.isnan(a), 0.0, 1.0)))
        statsDict['success'] = True

    else:
        statsDict['npix'] == 0
        statsDict['stdev'] = 0.0
        statsDict['mean'] = 0.0
        statsDict['median'] = 0.0
        statsDict['max'] = 0.0
        statsDict['min'] = 0.0
        statsDict['centmax'] = (0.0, 0.0)
        statsDict['madfm'] = 0.0
        statsDict['success'] = False

    return statsDict
Ejemplo n.º 30
0
def plot_wav(wav_file, fignum):
    f, axarr = plt.subplots(2, 1, False, False, False, num=fignum)
    (signal, rate) = load_wav_as_mono(wav_file)
    energy = gen_log_energy_array(signal, rate)

    filename = os.path.basename(wav_file)

    (energy_silent_points, energy_inv_silent_points) = find_silent_moments(energy)

    log_signal = signal * 10
    log_signal = np.log10(signal)


    chunk_size = 50
    chunks = chunkyfy(energy, chunk_size)

    mean_filtered = stats.nanmean(chunks, axis=1)
    min_filtered = np.nanmin(chunks, axis=1)
    max_filtered = np.nanmax(chunks, axis=1)
    std_filtered = stats.nanstd(chunks, axis=1)
    x = np.linspace(chunk_size / 2, energy.size - chunk_size / 2, min_filtered.size)

    ax = axarr[0, 0]
    ax.plot(energy, linewidth=0.4, color='gray')

    ax.plot(x, mean_filtered, color='b', linewidth=0.4)

    ax.set_title(filename)
    plot_split_points(ax, energy_silent_points, energy_inv_silent_points)

    ax = axarr[1, 0]
    plot_split_points(ax, energy_silent_points, energy_inv_silent_points)
    ax.plot(x, std_filtered, color='g', linewidth=0.4)

    return f
Ejemplo n.º 31
0
 def rms_f(self, x):
     """Compute standard deviation over time varying axis of a
     front relative quantity, x.
     """
     # TODO: the axis used in nanmean is different for U and Uf
     # calcs - change Uf dims to make consistent?
     return stats.nanstd(x, axis=1)
Ejemplo n.º 32
0
def anlstd(data):
    import numpy as np
    from scipy import stats
    year = []
    for month in np.arange(0,12):
        year.append(stats.nanstd(data[month::12]))
    return np.asarray(year)
Ejemplo n.º 33
0
def calc_sample_loglik(gam_unit, family="poisson"):
    """
    Calculate log likelihood loss function for Poisson or Gaussian
    distributed data.

    Log likelihood is defined as:

    .. math:: L(Y, \theta(X)) = -2 \cdot \log \mathtext{Pr}_{\theta(X)}(Y)
    
    where :math:`\theta(X)` is the prediction and :math:`Y` is the actual data.
    From Friedman, Tibshirani, and Hastie, 2nd ed, 5th print, eq. 7.8.
    This is the probability of seeing the data, given the prediction.
    
    For Poisson, :math:`\mathtext{Pr}_{\theta(X)}(Y)` is given by 
    :math:`pmf(Y,f(X))` and for gaussian, by :math:`pdf(Y, f(X))`
    """
    assert family in ["poisson", "gaussian"]

    if family == "poisson":
        # only needs mean shape parameter
        Pr = -2 * stats.poisson.logpmf(gam_unit.actual, gam_unit.pred)
    else:
        # normal needs means and variance
        # shape handling here calculates average across repeats,
        # but keeps that dimension for broadcasting purposes
        pred_std = stats.nanstd(gam_unit.pred, axis=2)[:, :, None]
        Pr = -2 * stats.norm.logpdf(gam_unit.actual, gam_unit.pred, pred_std)

    # Pr now has shape (nmod, ntask, nrep, nunit, nbin)
    # get one number per model...
    return get_mean_sem_of_samples(Pr)
Ejemplo n.º 34
0
    def compFanoFactor(self,time_range=[],pop_id= 'all',nmax=100):
	''' Compute the fano factor for the spike trains given in sp'''
	self.load_spikes()	
	spikes = self.events['spikes']
	if len(spikes)==0:
	    print 'Comp mean rate: spike array is empty !'
	    return np.nan
	pop_id,spikes,neuron_nr = self.get_pop_spikes(spikes,nmax,pop_id)
	if len(spikes) == 0:
	    return np.nan
	    
	if time_range!=[]:
	    idx = (spikes[:,1]>time_range[0]) & (spikes[:,1]<=time_range[1])
	    spikes = spikes[idx]
	if time_range==[]:
	  total_time = self.pars['T_total']
	else:
	  total_time = time_range[1] - time_range[0]
	ids = np.unique(spikes[:,0])[:nmax]

	counts = np.zeros((len(ids),))
	for i in np.arange(len(ids)):
		counts[i] = len(spikes[spikes[:,0]==i,:])

	FF = (st.nanstd(counts))**2/st.nanmean(counts)
	return FF
Ejemplo n.º 35
0
def bar_by_indep_2d(dep_key, indep_key, data, visible = True, ax = None, color = 'b', show_all = False, use_bar = False, **kwargs):
	
	'''
	the 2D case (i.e. independent is RR, dependent is RRTF)
	'''
	if type(indep_key) is str:
		x = data[0][indep_key]
	else:
		x = indep_key
	y = data[dep_key]
	nbins = x.size
	

	y_means = st.nanmean(y, 0)
	y_sems = st.nanstd(y, 0) / np.sqrt(y.shape[0])
	
	if visible:
		if ax is None:
			fig = plt.figure();
			ax = fig.add_subplot(111);

		if show_all:
			ax.plot(x, y.T, 'gray')
		
		if use_bar:
			line, = ax.bar(x, y_means, yerr = y_sems, color = color, **kwargs)
		else:
			line, _, _ = ax.errorbar(x, y_means, yerr = y_sems, lw = 2, color = color, **kwargs)
	
	plt.show()
	return line, ax
Ejemplo n.º 36
0
def get_final_ecotype_num_data(resource_levels,type_data):
    for resource_level in resource_levels:
        ecotype_mean=0
        ecotype_se=0
        ecotype_sd=0
        ecotype_counts=[]
        for replicate in range(1,31):
            if type_data=="phenotype":
                file_str="../data_"+str(resource_level)+"/phenotypes/phenotype_"+str(replicate)+".dat"
            elif type_data=="genotype":
                file_str="../data_"+str(resource_level)+"/genotypes_time/genotype_"+str(replicate)+".dat"
            else:
                raise StandardError("Incorrect Type Data Entry: "+str(type_data))
            ecos_for_update=copy.deepcopy(get_ecotypes_for_update(file_str,type_data))
            present_ecotypes=[]
            if type_data=="genotype":
                ecotype_counts+=[len(ecos_for_update)]
            elif type_data=="phenotype":
                for i in range(len(ecos_for_update)):
                    if str(ecos_for_update[i][0]) not in present_ecotypes:
                        present_ecotypes+=[str(ecos_for_update[i][0])]
                ecotype_counts+=[len(present_ecotypes)]
        ecotype_mean=stats.nanmean(ecotype_counts)
        ecotype_se=stats.sem(ecotype_counts)
        ecotype_sd=stats.nanstd(ecotype_counts)
        pickle.dump(ecotype_mean,open("../replication_plot_data/"+str(type_data)+"_nums_mean_"+str(resource_level)+".data","wb"))
        pickle.dump(ecotype_se,open("../replication_plot_data/"+str(type_data)+"_nums_se_"+str(resource_level)+".data","wb"))
        pickle.dump(ecotype_sd,open("../replication_plot_data/"+str(type_data)+"_nums_sd_"+str(resource_level)+".data","wb"))
    return "success"
Ejemplo n.º 37
0
def get_resource_counts(fluc_levels,fluc_type):
    for fluc_level in fluc_levels:
        resource_counts_mean=[[] for i in range(9)]
        resource_counts_se=[[] for i in range(9)]
        resource_counts_sd=[[] for i in range(9)]
        resource_counts=[]
        for replicate in range(1,31):
            replicate_counts=[[] for i in range(9)]
            resources_for_replicate=get_file_lines("../data_"+str(fluc_type)+"_"+str(fluc_level)+"/replicate_"+str(replicate)+"/resource.dat")
            for i in range(len(resources_for_replicate)):
                if len(resources_for_replicate[i])!=0 and resources_for_replicate[i][0]!="#":
                    temp=str(resources_for_replicate[i]).split(" ")
                    for j in range(1,10):
                        replicate_counts[j-1]+=[temp[j]]
            assert len(replicate_counts[0])==500000/50+1,""+str(len(replicate_counts[0]))
            resource_counts+=[copy.deepcopy(replicate_counts)]
        assert len(resource_counts)==30,""+str(len(resource_counts))
        for resource in range(9):
            for update in range(0,500001,50):
                update_data=[float(resource_counts[i][resource][update/50]) for i in range(30)]
                resource_counts_mean[resource]+=[stats.nanmean(update_data)]
                resource_counts_se[resource]+=[stats.sem(update_data)]
                resource_counts_sd[resource]+=[stats.nanstd(update_data)]
        pickle.dump(resource_counts_mean,open("../plot_data/resource_counts_mean_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb"))
        pickle.dump(resource_counts_se,open("../plot_data/resource_counts_se_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb"))
        pickle.dump(resource_counts_sd,open("../plot_data/resource_counts_sd_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb"))
    return "success"
Ejemplo n.º 38
0
 def rms_f(self, x):
     """Compute standard deviation over time varying axis of a
     front relative quantity, x.
     """
     # TODO: the axis used in nanmean is different for U and Uf
     # calcs - change Uf dims to make consistent?
     return stats.nanstd(x, axis=1)
Ejemplo n.º 39
0
def getTraceAvg(dm, avgFunc=nanmean, **traceParams):

	"""
	Gets a single average trace

	Arguments:
	dm				--	a DataMatrix

	Keyword arguments:
	avgFunc			--	the function to use to determine the average trace. This
						function must be robust to nan values. (default=nanmean)
	*traceParams	--	see getTrace()

	Returns:
	An (xData, yData, errData) tuple, where errData contains the standard
	error.
	"""

	traceLen = traceParams['traceLen']
	mTrace = np.empty( (len(dm), traceLen) )
	mTrace[:] = np.nan
	i = 0
	for trialDm in dm:
		aTrace = getTrace(trialDm, **traceParams)
		mTrace[i, 0:len(aTrace)] = aTrace
		i += 1
	xData = np.linspace(0, traceLen, traceLen)
	yData = nanmean(mTrace, axis=0)
	errData = nanstd(mTrace, axis=0) / np.sqrt(mTrace.shape[0])
	errData = np.array( [errData, errData] )
	return xData, yData, errData
Ejemplo n.º 40
0
def timeseries(iData, zoneMap):
    '''
    Make zone-wise averaging of input data
    input: 3D matrix(Layers x Width x Height) and map of zones (W x H)
    output: 2D matrices(L x WH) with mean and std 
    '''
    #reshape input cube into 2D matrix
    r, h, w = iData.shape
    iData, notNanDataI = cube2flat(iData)
    #get unique values of labels
    uniqZones = np.unique(zoneMap)
    # leave only not-nan
    uniqZones = uniqZones[~np.isnan(uniqZones)]
    zoneNum = np.zeros((r, uniqZones.size))
    zoneMean = np.zeros((r, uniqZones.size))
    zoneStd = np.zeros((r, uniqZones.size))
    #in each zone: get all values from input data get not nan data average
    for i in range(uniqZones.size):
        zi = uniqZones[i]
        if not np.isnan(zi):
            zoneData = iData[:, zoneMap.flat == zi]
            zoneNum[:, i] = zi
            zoneMean[:, i] = st.nanmean(zoneData, axis=1)
            zoneStd[:, i] = st.nanstd(zoneData, axis=1)
        
    return zoneMean, zoneStd, zoneNum
Ejemplo n.º 41
0
def compute_moments(img):
    '''
    Compute the moments of the given image.

    Parameters
    ----------
    img : numpy.ndarray
        2D image.

    Returns
    -------
    mean : float
        The 1st moment.
    variance : float
        The 2nd moment.
    skewness : float
        The 3rd moment.
    kurtosis : float
        The 4th moment.

    '''

    mean = nanmean(img, axis=None)
    variance = nanstd(img, axis=None) ** 2.
    skewness = np.nansum(
        ((img - mean) / np.sqrt(variance)) ** 3.) / np.sum(~np.isnan(img))
    kurtosis = np.nansum(
        ((img - mean) / np.sqrt(variance)) ** 4.) / np.sum(~np.isnan(img)) - 3

    return mean, variance, skewness, kurtosis
Ejemplo n.º 42
0
    def phot(self):
        """
        builds the table of stars
        """
        import numpy as np
        from scipy import stats

        epochs = len(self.objids)
        stars = len(self.stars)
        from datasource import DataSource

        m = np.zeros([epochs, stars])
        # objid is specific to a filter so we only need to query the objid
        wifsip = DataSource(host='pina', database='wifsip', user='******')
        for star in self.stars:
            print star,
            query = """SELECT mag_auto, magerr_auto 
                    FROM frames, phot, matched
                    WHERE matched.id like '%s'
                    AND frames.filter like '%s'
                    AND frames.objid = phot.objid
                    AND (matched.objid,matched.star) = (phot.objid,phot.star)
                    AND phot.flags = 0
                    AND magerr_auto > 0.0;""" % (star, self.filter)
            result = wifsip.query(query)
            mags = np.array([s[0] for s in result])
            err = np.array([s[1] for s in result])
            m = stats.nanmean(mags)
            s = stats.nanstd(mags)
            merr = stats.nanmean(err)
            stderr = stats.nanstd(err)
            #print mags
            #print err
            if len(mags) > 1:
                print '%4d %.3f %.3f %.3f %.3f' % (len(mags), m, s, merr,
                                                   stderr),
                mags = mags[err <= merr + stderr]
                err = err[err <= merr + stderr]
                avg = np.average(mags, weights=1. / err)
                std = np.sqrt(np.average(abs(mags - avg)**2, weights=1. / err))
                #std = np.std(mags)
                print '%4d %.3f %.3f' % (len(mags), avg, std)
                self.update_star(wifsip, star, avg, std, len(mags))
            else:
                print 'none (%.3f, %.3f)' % (m, s)

        wifsip.close()
Ejemplo n.º 43
0
    def toleranceLimitProcessing(self,data):
        # Tolerance limit processing
        random.seed(1)
        resampledData = [random.choice(data) for x in xrange(2000)]

        #address default values:
        # middle = nanmean([(d[1]+d[0])/2 for d in data])/(self.r[1]-self.r[0])
        # print "middle", middle
        # if(middle < .35):
        #     print "filtering higher range"
        #     f = lambda x: x[1] != 100 or  random.random() > .3
        #     resampledData = filter(f, resampledData)
        # if(middle > .65*(self.r[1]-self.r[0])):
        #     print "filtering higher range"
        #     f = lambda x: x[0] != 0 or random.random() > .3
        #     resampledData = filter(f, resampledData)
        # f = lambda x: (x[0] != 0 and x[1]!=100) or random.random() > .1
        # resampledData = filter(f, resampledData)
        # print "resampled data length", len(resampledData)

        (resampLower,resampUpper) = zip(*resampledData)
        resampInterval = map(lambda x: x[1]-x[0], resampledData)
        meanLower = nanmean(resampLower)
        stdLower = nanstd(resampLower) * sqrt(len(data)) # it appears *sqrt is done to estimage population std from sample 
        meanUpper = nanmean(resampUpper)
        stdUpper = nanstd(resampUpper) * sqrt(len(data)) # ditto
        meanInterval = nanmean(resampInterval)
        stdInterval = nanstd(resampInterval) * sqrt(len(data)) # ditto
        K=[32.019, 32.019, 8.380, 5.369, 4.275, 3.712, 3.369, 3.136, 2.967, 2.839,
           2.737, 2.655, 2.587, 2.529, 2.48, 2.437, 2.4, 2.366, 2.337, 2.31,
           2.31, 2.31, 2.31, 2.31, 2.208] # taken from Liu/Mendel matlab code, in turn from Walpole,Myers,Myers,Ye2008
        k = K[min(len(data),24)]
        acceptableLower = (meanLower-k*stdLower, meanLower+k*stdLower)
        acceptableUpper = (meanUpper-k*stdUpper, meanUpper+k*stdUpper)
        acceptableInterval = (meanInterval-k*stdInterval, meanInterval+k*stdInterval)
        for (l,u) in data[:]:
            try:
                if not acceptableLower[0] <= l <= acceptableLower[1]:
                    raise ValueError("Intolerable: lower bound %s not in  %s" % (str(l), str(acceptableLower)),(l,u))
                if not acceptableUpper[0] <= u <= acceptableUpper[1]:
                    raise ValueError("Intolerable: upper bound %s not in %s" % (str(u), str(acceptableUpper)),(l,u))
                if not acceptableInterval[0] <= u-l <= acceptableInterval[1]:
                    raise ValueError("Intolerable: interval %s greater than %s" % (str(u-l), str(acceptableInterval)),(l,u))
            except ValueError as (e,d):
                #print e
                #print "Intolerable: removing data point %s" % str(d)
                data.remove(d)
Ejemplo n.º 44
0
 def _normalize(self, arr):
     ''' perform normalization routine on attributes ''' 
     with warn.catch_warnings():
         warn.simplefilter("ignore")
         for i in xrange(arr.shape[1]):
             arr[:,i] = (arr[:,i] - nanmean(arr[:,i])) / nanstd(arr[:,i])
         arr = np.nan_to_num(arr)
     return arr
Ejemplo n.º 45
0
    def buildtable(self):
        """
        builds the table of stars
        """
        import numpy as np

        epochs = len(self.objids)
        stars = len(self.stars)
        if fileexists('/work2/jwe/NGC2281/' + self.filter + 'array.npy'):
            m = np.load('/work2/jwe/NGC2281/' + self.filter + 'array.npy')
        else:
            from datasource import DataSource
            from framecal import FrameCal

            fc = FrameCal(self.filter)

            m = np.zeros([epochs, stars])
            # objid is specific to a filter so we only need to query the objid
            wifsip = DataSource(host='pina', database='wifsip', user='******')
            for objid in self.objids:
                k = self.objids.index(objid)
                print k, epochs, objid,
                query = """SELECT matched.id, phot.mag_auto, phot.mag_errauto 
                        FROM phot, matched
                        WHERE phot.objid like '%s'
                        AND (matched.objid,matched.star) = (phot.objid,phot.star)
                        AND phot.flags = 0;""" % objid
                result = wifsip.query(query)
                starids = [s[0] for s in result]
                mags = [s[1] for s in result]
                err = [s[2] for s in result]

                slope, intercept, _, _, _ = fc.calframe(objid)
                print len(mags)
                for starid in starids:
                    i = self.stars.index(starid)
                    m[k, i] = mags[starids.index(starid)] * slope + intercept
            np.save('/work2/jwe/NGC2281/' + self.filter + 'array.npy', m)
            wifsip.close()

        i = np.where(m == 0.0)
        m[i] = np.nan
        from scipy import stats
        # calculate the observed average for the stars
        avg = stats.nanmean(m, axis=0)
        for k in range(epochs):
            print k, epochs, self.objids[k]

            # calculate the mean of offsets
            off = stats.nanmedian(m[k, :] - avg)
            # correct epoch for mean of offsets
            m[k, :] += off

        # calculate new corrected means
        avg = stats.nanmean(m, axis=0)
        std = stats.nanstd(m, axis=0)
        for i in range(len(self.stars)):
            print self.stars[i], avg[i], std[i]
Ejemplo n.º 46
0
def lineprops(filename,z,abf,ebf,D=32,H0=[70,2],wm=[.27,.01],wv=[.73,.01]):
    '''
    Determine the best fit physical and intrinsic conditions inside
    the observed galaxy. These are determined through scientific 
    principles and using other radiative transfer codes (not developed
    by me)
    '''
    c = 3.e5
    k = 1.38e16
    c1 = c*10**5
    freqwe = np.genfromtxt('lines.catalog')
    lfreq1 = freqwe[:,0]
    wt1 = freqwe[:,3]
    linename = np.loadtxt('lines.catalog',dtype=str)
    fs = np.loadtxt(filename)
    f = fs[:,0]
    sp = fs[:,1]*4654*1./D**2*1000
    snu = stats.nanstd(sp)
    nc = np.ceil(2*np.sqrt(2*np.log(1000))*abf[2]/.031)
    fm = np.argsort((f-abf[0])**2)[:nc]
    fm = fm[np.argsort(fm)]
    name11 = linename[:,1]
    name21 = linename[:,2]
    wt = []
    name1 = []
    name2 = []
    lfreq = []
    for j in range(np.size(wt1)):
        if wt1[j] == 1:
            wt = np.append(wt,wt1[j])
            name1 = np.append(name1,name11[j])
            name2 = np.append(name2,name21[j])
            lfreq = np.append(lfreq,lfreq1[j])

    ebf[0] = (ebf[0]**2+(.031/np.sqrt(8*np.log(2)))**2)**.5
    ebf[2] = (ebf[2]**2+(.031/np.sqrt(8*np.log(2)))**2)**.5
    rfreq = lfreq/(1+z)
    nlines = np.size(abf)/3.
    bfline = lfreq[np.argsort((abf[0]-rfreq)**2)[0]]
    zbf = [(bfline/abf[0]-1),(bfline/abf[0]**2*ebf[0])]
    #print bfline
    #v = (c*(1+z)**2-1)/(1+(1+z)**2)
    zs = np.linspace(0,zbf[0],1000)
    d = c/H0[0]*np.trapz((wm[0]*(1+zs)**3+wv[0])**-.5,zs)
    D = [d*(1+zbf[0]),0]
#    eDwm = c/(H0[0])*(1+zbf[0])*np.trapz(((wm[0]+wm[1])*(1+zs)**3+wv[0]-wm[1])**-.5,zs)-D[0]
    D[1] = D[0]*H0[1]/H0[0]
    fwhm1 = [2*np.sqrt(2*np.log(2))*abf[2]/abf[0]*c,0]
    fwhm = [fwhm1[0]-2*np.sqrt(2*np.log(2))*(fwhm1[0]-(fwhm1[0]**2-(.031*c/abf[0])**2)**.5),0]
    R = c*.031/abf[0]
    fwhm[0] += -70.*np.sqrt(8.*np.log(2.))*(np.sqrt(1+(R/70.)**2/(8.*np.log(2.)))-1.)
    fwhm[1] = ((ebf[0]*c/abf[0])**2+(ebf[2]*c/abf[0])**2)**.5
    Sco = [integrate.simps(sp[fm],f[fm])*c/abf[0],0]
    Sco[1] = np.sqrt(3*fwhm[0]*c*.031/abf[0])*snu
    Lco = [2.350*Sco[0]*(115/abf[0])**2*D[0]**2*(1+zbf[0])**-3,0] #Sco in mJy
    Lco[1] = Lco[0]*((Sco[1]/Sco[0])**2+(2*D[1]/D[0])**2+(2*ebf[0]/abf[0])**2)**.5
    return zbf,fwhm,Sco,D,Lco
Ejemplo n.º 47
0
def aggregate_ftr_matrix(ftr_matrix):
    sig = []
    for ftr in ftr_matrix:
        median = stats.nanmedian(ftr)
        mean = stats.nanmean(ftr)
        std = stats.nanstd(ftr)
        # Invalid double scalars warning appears here
        skew = stats.skew(ftr) if any(ftr) else 0.0
        kurtosis = stats.kurtosis(ftr)
        sig.extend([median, mean, std, skew, kurtosis])
    return sig
Ejemplo n.º 48
0
def standardize(X, axis=None):
    r"""Subtracts the data mean and divide by its standard deviation
    at the specified axis.  Accepts NaNs."""

    # NOTE: There is an alternative in scipy.stats.mstats.zscore
    mu = nanmean(X, axis=axis)
    sigma = nanstd(X, axis=axis)

    Xr = (X - mu) / sigma

    return Xr, mu, sigma
Ejemplo n.º 49
0
 def fromData(self, y):
     """
 Compute scoring matrix based on estimated covariance matrix of y
 Estimated covariance matrix is geiven by 1/2 variance of the second order
 differences of y
 INPUT:
   y -- DxN -- N measurements of a time series in D dimensions
 """
     self.y0 = nanmean(y, 1)
     self.M = diag(nanstd(diff(y, n=2, axis=1), axis=1))
     self.S = diag(1 / sqrt(diag(self.M)))
Ejemplo n.º 50
0
def get3NetworkAvg(data_t, titleName, roiNames, numRuns):
    #Define the streams
    #Ventral=[1, 3, 11, 12, 13, 14]
    #Dorsal=[2, 4, 5, 6, 7, 8, 9, 10]
    #Lateral=[0, 1, 2, 3, 4]

    Lateral = [0, 1, 2, 8, 9]
    Dorsal = [8, 9, 10, 11, 12, 13, 14, 15]
    Ventral = [1, 2, 3, 4, 5, 6]

    print 'Ventral rois: ' + str(roiNames[Ventral])
    print 'Dorsal rois: ' + str(roiNames[Dorsal])
    print 'Early Visual rois: ' + str(roiNames[Lateral])

    # Get network averages
    lateralCoher = getNetworkWithin(data_t, Lateral)
    dorsalCoher = getNetworkWithin(data_t, Ventral)
    ventralCoher = getNetworkWithin(data_t, Dorsal)
    #allMeansWithin=(stats.nanmean(lateralCoher.flat), stats.nanmean(dorsalCoher.flat), stats.nanmean(ventralCoher.flat))
    #allSTDWithin=(stats.nanstd(lateralCoher.flat), stats.nanstd(dorsalCoher.flat), stats.nanstd(ventralCoher.flat))
    allMeansWithin = (stats.nanmean(dorsalCoher.flat),
                      stats.nanmean(ventralCoher.flat))
    allSTDWithin = (stats.nanstd(dorsalCoher.flat),
                    stats.nanstd(ventralCoher.flat))

    latBtwCoher = getNetworkBtw(data_t, Lateral, Ventral + Dorsal)
    dorsBtwCoher = getNetworkBtw(data_t, Dorsal, Ventral)
    ventBtwCoher = getNetworkBtw(data_t, Ventral, Dorsal)

    #allMeansBtw=(stats.nanmean(latBtwCoher), stats.nanmean(dorsBtwCoher), stats.nanmean(ventBtwCoher))
    #allSTDBtw=(stats.nanstd(latBtwCoher), stats.nanstd(dorsBtwCoher), stats.nanstd(ventBtwCoher))
    # Just dorsal versus ventral
    allMeansBtw = (stats.nanmean(dorsBtwCoher), stats.nanmean(ventBtwCoher))
    allSTDBtw = (stats.nanstd(dorsBtwCoher), stats.nanstd(ventBtwCoher))

    # Make bar graph
    title = titleName + 'by Network for ' + sub + ' for ' + str(
        numRuns) + ' runs'
    labels = ('Dorsal', 'Ventral')
    makeBarPlots(allMeansWithin, allSTDWithin, allMeansBtw, allSTDBtw, title,
                 labels)
Ejemplo n.º 51
0
def plot_dist_to_targ(task_entry,
                      reach_trajectories=None,
                      targ_dist=10.,
                      plot_all=False,
                      ax=None,
                      target=None,
                      update_rate=60.,
                      decoder_rate=10.,
                      **kwargs):
    task_entry = dbfn.lookup_task_entries(task_entry)
    if reach_trajectories == None:
        reach_trajectories = task_entry.get_reach_trajectories()
    if target == None:
        target = np.array([targ_dist, 0])
    trajectories_dist_to_targ = [
        map(np.linalg.norm, traj.T - target) for traj in reach_trajectories
    ]

    step = update_rate / decoder_rate
    trajectories_dist_to_targ = map(lambda x: x[::step],
                                    trajectories_dist_to_targ)
    max_len = np.max([len(traj) for traj in trajectories_dist_to_targ])
    n_trials = len(trajectories_dist_to_targ)

    # TODO use masked arrays
    data = np.ones([n_trials, max_len]) * np.nan
    for k, traj in enumerate(trajectories_dist_to_targ):
        data[k, :len(traj)] = traj

    from scipy.stats import nanmean, nanstd
    mean_dist_to_targ = np.array([nanmean(data[:, k]) for k in range(max_len)])
    std_dist_to_targ = np.array([nanstd(data[:, k]) for k in range(max_len)])

    if ax == None:
        plt.figure()
        ax = plt.subplot(111)

    # time vector, assuming original screen update rate of 60 Hz
    time = np.arange(max_len) * 0.1
    if plot_all:
        for dist_to_targ in trajectories_dist_to_targ:
            ax.plot(dist_to_targ, **kwargs)
    else:
        ax.plot(time, mean_dist_to_targ, **kwargs)

    import plotutil
    #plotutil.set_ylim(ax, [0, targ_dist])
    plotutil.ylabel(ax, 'Distance to target')
    plotutil.xlabel(ax, 'Time (s)')
    plt.draw()
Ejemplo n.º 52
0
def plot_means(dataset):
    min_age = min(dataset.ages)
    max_age = max(dataset.ages)
    min_expression = np.nanmin(dataset.expression.flat)
    max_expression = np.nanmax(dataset.expression.flat)

    center = np.empty(dataset.ages.shape)
    std_plus = np.empty(dataset.ages.shape)
    std_minus = np.empty(dataset.ages.shape)
    for i, age in enumerate(dataset.ages):
        a = dataset.expression[i, :, :].flat
        c = nanmean(a)
        s = nanstd(a)
        center[i] = c
        std_plus[i] = c + s
        std_minus[i] = c - s

    fig = plt.figure()
    ax = fig.add_axes([0.08, 0.15, 0.85, 0.8])

    ax.set_ylabel('expression level', fontsize=cfg.fontsize)
    ax.set_xlabel('age', fontsize=cfg.fontsize)
    ax.set_title('Mean expression across all genes - {}'.format(dataset.name),
                 fontsize=cfg.fontsize)

    # set the development stages as x labels
    stages = [stage.scaled(scaler) for stage in dev_stages]
    ax.set_xticks([stage.central_age for stage in stages])
    ax.set_xticklabels([stage.short_name for stage in stages],
                       fontsize=cfg.xtick_fontsize,
                       fontstretch='condensed',
                       rotation=90)
    ax.set_xlim([min_age, max_age])

    # mark birth time with a vertical line
    ymin, ymax = ax.get_ylim()
    birth_age = scaler.scale(0)
    ax.plot([birth_age, birth_age], [ymin, ymax], '--', color='0.85')

    ax.plot([min_age, max_age], [min_expression, min_expression], '--g')
    ax.plot([min_age, max_age], [max_expression, max_expression], '--g')
    ax.plot(dataset.ages, center, 'bx')
    ax.plot(dataset.ages, std_plus, 'g-')
    ax.plot(dataset.ages, std_minus, 'g-')

    save_figure(fig,
                'mean-expression-{}.png'.format(dataset.name),
                under_results=True)
Ejemplo n.º 53
0
def MeanWithConfidenceInterval(Y, confidence=0.95):
    """
    Use the fact that (mean(Y) - mu) / (std(Y)/sqrt(n))
    is a Student T distribution with n-1 degrees of freedom
    
    Returns:
        2 tuple (mean, symmetric confidence interval size).
    """
    n = len(Y)
    Y_bar = st.nanmean(Y)

    # According to the Student T-test distribution for n-1 degrees of freedom
    # find the position where the CDF is 0.975 (assuming we want a confidence
    # of 0.95). The lower part of the tail will account for the other 0.025
    # chance.
    t = st.t.ppf((confidence + 1.0) / 2.0, n - 1)
    SD = st.nanstd(Y,
                   bias=False)  # use the unbiased estimator: sqrt(y^2 / (n-1))
    SE = SD / np.sqrt(len(Y))
    return Y_bar, t * SE
Ejemplo n.º 54
0
    def updateLabelsAndFit(self, bufferA, bufferB):
        self.plotAttributes["curve"].setData(bufferA, bufferB)

        try:
            if self.ui.checkBoxAutoscale.isChecked():
                self.setPlotRanges(bufferA, bufferB)

            minBufferA = nanmin(bufferA)
            minBufferB = nanmin(bufferB)
            maxBufferA = nanmax(bufferA)
            maxBufferB = nanmax(bufferB)

            if self.ui.checkBoxShowAve.isChecked():
                rtbsaUtils.setPosAndText(self.text["avg"], nanmean(bufferB),
                                         minBufferA, minBufferB, 'AVG: ')

            if self.ui.checkBoxShowStdDev.isChecked():
                xPos = (minBufferA + (minBufferA + maxBufferA) / 2) / 2

                rtbsaUtils.setPosAndText(self.text["std"], nanstd(bufferB),
                                         xPos, minBufferB, 'STD: ')

            if self.ui.checkBoxCorrCoeff.isChecked():
                correlation = corrcoef(bufferA, bufferB)
                rtbsaUtils.setPosAndText(self.text["corr"],
                                         correlation.item(1), minBufferA,
                                         maxBufferB, "Corr. Coefficient: ")

            if self.ui.checkBoxLinFit.isChecked():
                self.text["slope"].setPos((minBufferA + maxBufferA) / 2,
                                          minBufferB)
                self.getLinearFit(bufferA, bufferB, True)

            elif self.ui.checkBoxPolyFit.isChecked():
                self.text["slope"].setPos((minBufferA + maxBufferA) / 2,
                                          minBufferB)
                self.getPolynomialFit(bufferA, bufferB, True)

        except ValueError:
            print "Error updating plot range"
Ejemplo n.º 55
0
def moving_average (feedbacks, slot_n, prediction_length, mmc):
     
     past_delays_fitted=numpy.asarray(feedbacks)
     col_mean = stats.nanmean(past_delays_fitted,axis=0)
     col_std = stats.nanstd(past_delays_fitted,axis=0)
     inds = numpy.where(numpy.isnan(past_delays_fitted))
     past_delays_fitted[inds]=numpy.take(col_mean,inds[1])
     wifi_delays=past_delays_fitted[:,0]
     lte_delays=past_delays_fitted[:,1]
     
     if mmc: 
            
	    forward_predicted_delays_mmc=numpy.c_[numpy.random.normal(col_mean[0], col_std[0], prediction_length),  numpy.random.normal(col_mean[1], col_std[1], prediction_length )]
            forward_predicted_delays_mmc[ forward_predicted_delays_mmc <0 ]=0 # truncate negative samples 
            predicted_ma=numpy.r_ [past_delays_fitted, forward_predicted_delays_mmc]
            return  col_mean, col_std, predicted_ma 
     else:
            predicted_ma=numpy.r_ [past_delays_fitted, numpy.zeros((prediction_length,2)) ]
	    for pl in range (past_delays_fitted.shape[0] , past_delays_fitted.shape[0]+prediction_length):
		 predicted_ma[pl,0]= numpy.divide ( numpy.sum(numpy.divide( wifi_delays , range(wifi_delays.shape[0]+1,1,-1), dtype='float_' )) , numpy.sum(numpy.divide(1,range(wifi_delays.shape[0]+1,1,-1),dtype='float_')),  dtype='float_' )
		 predicted_ma[pl,1]= numpy.divide ( numpy.sum(numpy.divide( lte_delays , range(lte_delays.shape[0]+1,1,-1), dtype='float_' )) , numpy.sum(numpy.divide(1,range(lte_delays.shape[0]+1,1,-1),dtype='float_')),  dtype='float_' )
            
            return  predicted_ma
Ejemplo n.º 56
0
def standardize_col(dat, meanonly=False):
    '''
    Mean impute each columns of an array.
    '''
    colmean = st.nanmean(dat)
    if ~meanonly:
        colstd = st.nanstd(dat)
    else:
        colstd = None
    ncol = dat.shape[1]
    nmissing = sp.zeros((ncol))
    datimp = sp.empty_like(dat)
    datimp[:] = dat
    for c in sp.arange(0, ncol):
        datimp[sp.isnan(datimp[:, c]), c] = colmean[c]
        datimp[:, c] = datimp[:, c] - colmean[c]
        if not meanonly:
            if colstd[c] > 1e-6:
                datimp[:, c] = datimp[:, c] / colstd[c]
            else:
                print "warning: colstd=" + colstd[c] + " during normalization"
        nmissing[c] = float(sp.isnan(dat[:, c]).sum())
    fracmissing = nmissing / dat.shape[0]
    return datimp, fracmissing
Ejemplo n.º 57
0
def meanstd(x,axis=None):
    return stats.nanmean(x,axis),stats.nanstd(x,axis)
Ejemplo n.º 58
0
def main(argv):

  #default settings
  markerSize=16
  markerSize2=16
  markerColor='g'
  markerColor2='red'
  lineWidth=2
  fontSize=16
  unit='cm'
  Save_timeseries='no'
  dispTsFig='yes'
  dispVelFig='yes'
  dispContour='only'
  contour_step=200
  smoothContour='no'
  radius=0;
  edgeWidth=1.5
  fig_dpi=300

  if len(sys.argv)>2:
    try:
      opts, args = getopt.getopt(argv,"f:F:v:a:b:s:m:c:w:u:l:h:S:D:C:V:t:T:d:r:x:y:P:p:")
    except getopt.GetoptError:
      Usage() ; sys.exit(1)
 
    for opt,arg in opts:
      if   opt == '-f':     timeSeriesFile = arg
      elif opt == '-F':     timeSeriesFile_2 = arg
      elif opt == '-v':     velocityFile = arg
      elif opt == '-a':     vmin = float(arg)
      elif opt == '-b':     vmax = float(arg)
      elif opt == '-s':     fontSize = int(arg)
      elif opt == '-m':     markerSize=int(arg);       markerSize2=int(arg)
      elif opt == '-S':     Save_timeseries=arg
      elif opt == '-c':     markerColor=arg
      elif opt == '-w':     lineWidth=int(arg)
      elif opt == '-u':     unit=arg
      elif opt == '-l':     lbound=float(arg)
      elif opt == '-h':     hbound=float(arg)
      elif opt == '-D':     demFile=arg
      elif opt == '-C':     dispContour=arg
      elif opt == '-V':     contour_step=float(arg)
      elif opt == '-t':     minDate=arg
      elif opt == '-T':     maxDate=arg
      elif opt == '-d':     datesNot2show = arg.split()
      elif opt == '-r':     radius=abs(int(arg))
      elif opt == '-x':     xsub = [int(i) for i in arg.split(':')];   xsub.sort();   dispVelFig='no'
      elif opt == '-y':     ysub = [int(i) for i in arg.split(':')];   ysub.sort();   dispVelFig='no'
      elif opt == '-P':     dispTsFig=arg
      elif opt == '-p':     dispVelFig=arg


  elif len(sys.argv)==2:
    if argv[0]=='-h':
       Usage(); sys.exit(1)
    elif os.path.isfile(argv[0]):
       timeSeriesFile = argv[0]
       h5timeseries = h5py.File(timeSeriesFile)
       if not 'timeseries' in h5timeseries.keys():
          print 'ERROR'
          Usage(); sys.exit(1)
    else:  Usage(); sys.exit(1)
  elif len(sys.argv)<2:
    Usage(); sys.exit(1)

  if   unit in ('m','M'):              unitFac=1
  elif unit in ('cm','Cm','CM'):       unitFac=100
  elif unit in ('mm','Mm','MM','mM'):  unitFac=1000
  else:
     print 'Warning:'
     print 'wrong unit input!'
     print 'cm is considered to display the displacement'

##############################################################
# Read time series file info

  if not os.path.isfile(timeSeriesFile):
     Usage();sys.exit(1)

  h5timeseries = h5py.File(timeSeriesFile)
  if not 'timeseries' in h5timeseries.keys():
     Usage(); sys.exit(1)
 
  dateList1 = h5timeseries['timeseries'].keys()

##############################################################
# Dates to show time series plot

  import matplotlib.dates as mdates
  years    = mdates.YearLocator()   # every year
  months   = mdates.MonthLocator()  # every month
  yearsFmt = mdates.DateFormatter('%Y')

  print '*******************'
  print 'All dates existed:'
  print dateList1
  print '*******************'

  try:
     datesNot2show
     print 'dates not to show: '+str(datesNot2show)
  except:  datesNot2show=[]

  try:
    minDate
    minDateyy=yyyymmdd2years(minDate)
    print 'minimum date: '+minDate
    for date in dateList1:
       yy=yyyymmdd2years(date)
       if yy < minDateyy:
           datesNot2show.append(date)
  except:  pass
  try:
    maxDate
    maxDateyy=yyyymmdd2years(maxDate)
    print 'maximum date: '+maxDate
    for date in dateList1:
       yy=yyyymmdd2years(date)
       if yy > maxDateyy:
           datesNot2show.append(date)
  except:  pass

  try:
     dateList=[]
     for date in dateList1:
        if date not in datesNot2show:
           dateList.append(date)
     print '--------------------------------------------'
     print 'dates used to show time series displacements:'
     print dateList
     print '--------------------------------------------'
  except:
     dateList=dateList1
     print 'using all dates to show time series displacement'

###################################################################
# Date info

  dateIndex={}
  for ni in range(len(dateList)):
     dateIndex[dateList[ni]]=ni
  tbase=[]
  d1 = datetime.datetime(*time.strptime(dateList[0],"%Y%m%d")[0:5])

  for ni in range(len(dateList)):
     d2 = datetime.datetime(*time.strptime(dateList[ni],"%Y%m%d")[0:5])
     diff = d2-d1
     tbase.append(diff.days)

  dates=[]
  for ni in range(len(dateList)):
     d = datetime.datetime(*time.strptime(dateList[ni],"%Y%m%d")[0:5])
     dates.append(d)
  
  datevector=[]
  for i in range(len(dates)):
     datevector.append(np.float(dates[i].year) + np.float(dates[i].month-1)/12 + np.float(dates[i].day-1)/365)
  datevector2=[round(i,2) for i in datevector]


###########################################
# Plot Fig 1 - Velocity / last epoch of time series / DEM

  import matplotlib.pyplot as plt
  if dispVelFig in ('yes','Yes','y','Y','YES'):
     fig = plt.figure()
     ax=fig.add_subplot(111)

     try:
        velocityFile
        h5file=h5py.File(velocityFile,'r')
        k=h5file.keys()
        dset= h5file[k[0]].get(k[0])
        print 'display: ' + k[0]
     except:
        dset = h5timeseries['timeseries'].get(h5timeseries['timeseries'].keys()[-1])
        print 'display: last epoch of timeseries'

     #DEM/contour option
     try:
        demFile
        import _readfile as readfile
        if   os.path.basename(demFile).split('.')[1]=='hgt':  amp,dem,demRsc = readfile.read_float32(demFile)
        elif os.path.basename(demFile).split('.')[1]=='dem':  dem,demRsc = readfile.read_dem(demFile)

        if dispContour in ('no','No','n','N','NO','yes','Yes','y','Y','YES'):
           print 'show DEM as basemap'
           cmap_dem=plt.get_cmap('gray')
           import _pysar_utilities as ut
           plt.imshow(ut.hillshade(dem,50.0),cmap=cmap_dem)
        if dispContour in ('only','Only','o','O','ONLY','yes','Yes','y','Y','YES'):
           print 'show contour'
           if smoothContour in ('yes','Yes','y','Y','YES'):
              import scipy.ndimage as ndimage
              dem=ndimage.gaussian_filter(dem,sigma=10.0,order=0)
           contour_sequence=np.arange(-6000,9000,contour_step)
           plt.contour(dem,contour_sequence,origin='lower',colors='black',alpha=0.5)
     except: print 'No DEM file' 

     try:     img=ax.imshow(dset,vmin=vmin,vmax=vmax)
     except:  img=ax.imshow(dset)

     import matplotlib.patches as patches      # need for draw rectangle of points selected on VelFig

########################################## 
# Plot Fig 2 - Time series plot
  import scipy.stats as stats
  fig2 = plt.figure(2)
  ax2=fig2.add_subplot(111) 

  try:
     timeSeriesFile_2
     h5timeseries_2=h5py.File(timeSeriesFile_2)
     print 'plot 2nd time series'
  except:  pass   

  ########### Plot Time Series with x/y ##########
  try:
     xsub
     ysub
     try:     xmin=xsub[0];         xmax=xsub[1]+1;         print 'x='+str(xsub[0])+':'+str(xsub[1])
     except:  xmin=xsub[0]-radius;  xmax=xsub[0]+radius+1;  print 'x='+str(xsub[0])+'+/-'+str(radius)
     try:     ymin=ysub[0];         ymax=ysub[1]+1;         print 'y='+str(ysub[0])+':'+str(ysub[1])
     except:  ymin=ysub[0]-radius;  ymax=ysub[0]+radius+1;  print 'y='+str(ysub[0])+'+/-'+str(radius)
     try:
        fig
        rectSelect=patches.Rectangle((xmin,ymin),radius*2+1,radius*2+1,fill=False,lw=edgeWidth)
        ax.add_patch(rectSelect)
     except: pass

     Dis=[]
     for date in dateList:  Dis.append(h5timeseries['timeseries'].get(date)[ymin:ymax,xmin:xmax])
     Dis0=array(Dis)
     dis=Dis0*unitFac
     dis=reshape(dis,(len(dateList),-1))
     dis_mean=stats.nanmean(dis,1)
     if (xmax-xmin)*(ymax-ymin)==1:  dis_std=[0]*len(dateList)
     else:                           dis_std=stats.nanstd(dis,1)
     (_, caps, _)=ax2.errorbar(dates,dis_mean,yerr=dis_std,fmt='-ko',\
                               ms=markerSize, lw=lineWidth, alpha=1, mfc=markerColor,\
                               elinewidth=edgeWidth,ecolor='black',capsize=markerSize*0.5)
     for cap in caps:  cap.set_markeredgewidth(edgeWidth)
     print dis_mean

     # x axis format
     ax2.fmt_xdata = DateFormatter('%Y-%m-%d %H:%M:%S')
     if unitFac==100:     ax2.set_ylabel('Displacement [cm]',fontsize=fontSize)
     elif unitFac==1000:  ax2.set_ylabel('Displacement [mm]',fontsize=fontSize)
     else:                ax2.set_ylabel('Displacement [m]' ,fontsize=fontSize)
     ax2.set_xlabel('Time [years]',fontsize=fontSize)
     ax2.set_title('x='+str(xmin)+':'+str(xmax-1)+', y='+str(ymin)+':'+str(ymax-1))
     ax2.xaxis.set_major_locator(years)
     ax2.xaxis.set_major_formatter(yearsFmt)
     ax2.xaxis.set_minor_locator(months)
     datemin = datetime.date(int(datevector[0]),1,1)
     datemax = datetime.date(int(datevector[-1])+1,1,1)
     ax2.set_xlim(datemin, datemax)

     # y axis format
     try:
        lbound
        hbound
        ax2.set_ylim(lbound,hbound)
     except:
        ax2.set_ylim(nanmin(dis_mean-dis_std)-0.4*abs(nanmin(dis_mean)),\
                     nanmax(dis_mean+dis_std)+0.4*abs(nanmax(dis_mean)))

     for tick in ax2.xaxis.get_major_ticks():  tick.label.set_fontsize(fontSize)
     for tick in ax2.yaxis.get_major_ticks():  tick.label.set_fontsize(fontSize)
     #fig2.autofmt_xdate()     #adjust x overlap by rorating, may enble again

     if Save_timeseries in ('yes','Yes','Y','y','YES'):
        import scipy.io as sio
        Delay={}
        Delay['displacement']=Dis0
        Delay['unit']='m'
        Delay['time']=datevector
        tsNameBase='ts_x'+str(xmin)+'_'+str(xmax-1)+'y'+str(ymin)+'_'+str(ymax-1)
        sio.savemat(tsNameBase+'.mat', {'displacement': Delay})
        print 'saved data to '+tsNameBase+'.mat'
        plt.savefig(tsNameBase+'.pdf',dpi=fig_dpi)
        print 'saved plot to '+tsNameBase+'.pdf'