def method_errorbar(data,xlabels, line_color=default_color,
                   med_color=None, legend=None, y_offset=0.0,alpha=0.05):
    if not med_color:
        med_color=line_color
    ax.grid(axis='x', color='0.9', linestyle='-', linewidth=0.2)
    ax.set_axisbelow(True)
    n,m=data.shape
    medians = [percentile(data[:,i],50) for i in range(m)]
    xerr = [[ medians[i]-percentile(data[:,i],100*(alpha/2.)),
              percentile(data[:,i],100*(1-alpha/2.))-medians[i] ]
             for i in range(m)]
    xerr=np.array(xerr).transpose()
    y_marks = np.array(range(len(xlabels)))-y_offset
    plt.errorbar(y=y_marks,
                 x=medians,xerr=xerr,fmt='|',capsize=0,color=line_color,
                 ecolor=line_color,elinewidth=0.3,markersize=2)
    plt.xlabel('% cases used', fontsize=8)
    ax.tick_params(axis='x', which='both', labelsize=8)
    ax.set_yticks(np.array(range(len(xlabels))))
    ax.set_yticklabels(xlabels,fontsize=6)
    plt.ylim((min(y_marks)-0.5,max(y_marks)+0.5))
    spines_to_remove = ['top', 'right','left']
    for spine in spines_to_remove:
        ax.spines[spine].set_visible(False)
    ppl.utils.remove_chartjunk(ax, ['top', 'right', 'bottom'], show_ticks=False)
    if legend:
        rect = legend.get_frame()
        rect.set_facecolor(light_grey)
        rect.set_linewidth(0.0)
Esempio n. 2
0
    def calculateMeans(self):
        self.synHist = ma.masked_values(self.synHist, -9999.0)
        self.synHistMean = ma.mean(self.synHist, axis=0)
        self.medSynHist = ma.median(self.synHist, axis=0)

        self.synHistUpper = percentile(self.synHist, per=95, axis=0)
        self.synHistLower = percentile(self.synHist, per=5, axis=0)
Esempio n. 3
0
 def getMedian(self, inArticleDict):
     """returns dict of medians for subjects"""
     medDict = {}
     for subject in inArticleDict:
         medDict[subject] = (round(numpy.median(self.inArticleDict[subject])),
                             round(percentile(self.inArticleDict[subject],25)),
                             round(percentile(self.inArticleDict[subject],75)))
     return(medDict)
Esempio n. 4
0
def calculateCI(Vr, years, nodata, minRecords, yrsPerSim=1,
                sample_size=50, prange=90):
    """
    Fit a GEV to the wind speed records for a 2-D extent of
    wind speed values, providing a confidence range by resampling at
    random from the input values.

    :param Vr: `numpy.ndarray` of wind speeds (3-D - event, lat, lon)
    :param years: `numpy.ndarray` of years for which to evaluate
                  return period values.
    :param float nodata: missing data value.
    :param int minRecords: minimum number of valid wind speed values required
                           to fit distribution.
    :param int yrsPerSim: Values represent block maxima - this value indicates
                          the time span of the block (default 1).
    :param int sample_size: number of records to randomly sample for calculating
                            confidence interval of the fit.
    :param float prange: percentile range.


    :return: `numpy.ndarray` of return period wind speed values

    """

    lower = (100 - prange) / 2.
    upper = 100. - lower

    nrecords = Vr.shape[0]
    nsamples = nrecords / sample_size
    RpUpper = nodata*np.ones((len(years), Vr.shape[1], Vr.shape[2]), dtype='f')
    RpLower = nodata*np.ones((len(years), Vr.shape[1], Vr.shape[2]), dtype='f')

    w = np.zeros((len(years), nsamples), dtype='f')
    wUpper = np.zeros((len(years)), dtype='f')
    wLower = np.zeros((len(years)), dtype='f')

    for i in xrange(Vr.shape[1]):
        for j in xrange(Vr.shape[2]):
            if Vr[:, i, j].max() > 0.0:
                random.shuffle(Vr[:, i, j])
                for n in xrange(nsamples):
                    nstart = n*sample_size
                    nend  = (n + 1)*sample_size - 1
                    vsub = Vr[nstart:nend, i, j]

                    vsub.sort()
                    if vsub.max( ) > 0.:
                        w[:, n], loc, scale, shp = evd.estimateEVD(vsub, years, nodata,
                                                                   minRecords/10, yrsPerSim)

                for n in range(len(years)):
                    wUpper[n] = percentile(w[n,:], upper)
                    wLower[n] = percentile(w[n,:], lower)

                RpUpper[:, i, j] = wUpper
                RpLower[:, i, j] = wLower

    return RpUpper, RpLower
Esempio n. 5
0
    def calculateStats(self):

        self.synMeanLandfall = np.mean(self.synLandfall, axis=0)
        self.synMeanOffshore = np.mean(self.synOffshore, axis=0)

        self.synUpperLF = percentile(self.synLandfall, per=95, axis=0)
        self.synLowerLF = percentile(self.synLandfall, per=5, axis=0)
        self.synUpperOF = percentile(self.synOffshore, per=95, axis=0)
        self.synLowerOF = percentile(self.synOffshore, per=5, axis=0)
Esempio n. 6
0
    def calculateMeans(self):
        """
        Calculate the mean, median and percentiles of the synthetic values
        """
        self.synHist = ma.masked_values(self.synHist, -9999.)
        self.synHistMean = ma.mean(self.synHist, axis=0)
        self.medSynHist = ma.median(self.synHist, axis=0)

        self.synHistUpper = percentile(self.synHist, per=95, axis=0)
        self.synHistLower = percentile(self.synHist, per=5, axis=0)
Esempio n. 7
0
    def calculateMeans(self):
        """
        Calculate mean, median and percentiles of the :attr:`self.synHist`
        attribute.

        """

        self.synHist = ma.masked_values(self.synHist, -9999.)
        self.synHistMean = ma.mean(self.synHist, axis=0)
        self.medSynHist = ma.median(self.synHist, axis=0)

        self.synHistUpper = percentile(self.synHist, per=95, axis=0)
        self.synHistLower = percentile(self.synHist, per=5, axis=0)
Esempio n. 8
0
    def calculateStats(self):
        """
        Calculate mean and percentiels of landfall/offshore transition
        rates. Operates on the :attr:`self.synLandfall` and
        :attr:`self.synOffshore` attributes.

        """

        self.synMeanLandfall = np.mean(self.synLandfall, axis=0)
        self.synMeanOffshore = np.mean(self.synOffshore, axis=0)

        self.synUpperLF = percentile(self.synLandfall, per=95, axis=0)
        self.synLowerLF = percentile(self.synLandfall, per=5, axis=0)
        self.synUpperOF = percentile(self.synOffshore, per=95, axis=0)
        self.synLowerOF = percentile(self.synOffshore, per=5, axis=0)
def get_player_lists():
    position_attribute_scores = {position: {} for position in POSITIONS}
    player_lists = {position: [] for position in POSITIONS}

    for position in position_attribute_scores:
        for attribute in ATTRIBUTES:
            position_attribute_scores[position][attribute] = []

    with open('players.csv', mode='r') as csvfile:
        reader = csv.DictReader(csvfile)
        for player_row in reader:
            for attribute in ATTRIBUTES:
                score = int(player_row[attribute])
                position = player_row['position']
                position_attribute_scores[position][attribute].append(score)

    with open('players.csv', mode='r') as csvfile:
        reader = csv.DictReader(csvfile)
        for player_row in reader:
            player = {}
            position = player_row['position']
            player['name'] = player_row['name']
            player['position'] = position
            for attribute in ATTRIBUTES:
                player_score = int(player_row[attribute])
                scores = position_attribute_scores[player_row['position']][attribute]
                player[attribute] = percentile(scores, player_score, 'weak')

            player_lists[position].append(player)

    return player_lists
def get_method_bounds(X):
    sample_counts = array_sample(X)
    percentsat = lambda x,k: np.array([percentile(x[:,i],k) for i,m in enumerate(xlabels)])
    median=percentsat(sample_counts,50)
    yerr = np.vstack([median-percentsat(sample_counts,05),
                      percentsat(sample_counts,95)-median]).transpose()
    return median,yerr   
Esempio n. 11
0
def print_percentiles(label, values):
    try:
        from scipy.stats import scoreatpercentile as percentile
    except ImportError:
        print('WARN: no scipy means no percentile stats printed')
        return
    d = {
        0: min(values),
        5: percentile(values, 5),
        25: percentile(values, 25),
        50: percentile(values, 50),
        75: percentile(values, 75),
        95: percentile(values, 95),
        100: max(values),
    }
    for k, v in d.items():
        print('%s: %d percentile: %f' % (label, k, v))
Esempio n. 12
0
def get_method_bounds(X):
    sample_counts = array_sample(X)
    percentsat = lambda x, k: np.array(
        [percentile(x[:, i], k) for i, m in enumerate(xlabels)])
    median = percentsat(sample_counts, 50)
    yerr = np.vstack([
        median - percentsat(sample_counts, 05),
        percentsat(sample_counts, 95) - median
    ]).transpose()
    return median, yerr
def plot_boolean_frequency(data,labels,**kwargs):
    alpha=0.05
    boolean_percent = lambda X: np.count_nonzero(X)/float(len(X))
    boolean_sample = lambda x: np.array([boolean_percent(resample(x)) for i in range(bootstrap_num)])
    medians=[]
    yerr=[]
    for d,l in zip(data,labels):
        d_samples = boolean_sample(d)
        low=percentile(d_samples,100*alpha/2.)
        med=percentile(d_samples,50)
        high=percentile(d_samples,100*(1-alpha/2.))
        print '[%.2f,%.2f,%.2f]:%s'%(low,med,high,l)
        medians.append(med)
        yerr.append([med-low,high-med])
    yerr=np.array(yerr)
    kwargs['width']=0.2
    kwargs['xfontsize']=10
    method_bar(medians,yerr,labels,**kwargs)
    plt.ylim(0,1)
Esempio n. 14
0
def method_errorbar(data,
                    xlabels,
                    line_color=default_color,
                    med_color=None,
                    legend=None,
                    y_offset=0.0,
                    alpha=0.05):
    if not med_color:
        med_color = line_color
    ax.grid(axis='x', color='0.9', linestyle='-', linewidth=0.2)
    ax.set_axisbelow(True)
    n, m = data.shape
    medians = [percentile(data[:, i], 50) for i in range(m)]
    xerr = [[
        medians[i] - percentile(data[:, i], 100 * (alpha / 2.)),
        percentile(data[:, i], 100 * (1 - alpha / 2.)) - medians[i]
    ] for i in range(m)]
    xerr = np.array(xerr).transpose()
    y_marks = np.array(range(len(xlabels))) - y_offset
    plt.errorbar(y=y_marks,
                 x=medians,
                 xerr=xerr,
                 fmt='|',
                 capsize=0,
                 color=line_color,
                 ecolor=line_color,
                 elinewidth=0.3,
                 markersize=2)
    plt.xlabel('% cases used', fontsize=8)
    ax.tick_params(axis='x', which='both', labelsize=8)
    ax.set_yticks(np.array(range(len(xlabels))))
    ax.set_yticklabels(xlabels, fontsize=6)
    plt.ylim((min(y_marks) - 0.5, max(y_marks) + 0.5))
    spines_to_remove = ['top', 'right', 'left']
    for spine in spines_to_remove:
        ax.spines[spine].set_visible(False)
    ppl.utils.remove_chartjunk(ax, ['top', 'right', 'bottom'],
                               show_ticks=False)
    if legend:
        rect = legend.get_frame()
        rect.set_facecolor(light_grey)
        rect.set_linewidth(0.0)
Esempio n. 15
0
def plot_boolean_frequency(data, labels, **kwargs):
    alpha = 0.05
    boolean_percent = lambda X: np.count_nonzero(X) / float(len(X))
    boolean_sample = lambda x: np.array(
        [boolean_percent(resample(x)) for i in range(bootstrap_num)])
    medians = []
    yerr = []
    for d, l in zip(data, labels):
        d_samples = boolean_sample(d)
        low = percentile(d_samples, 100 * alpha / 2.)
        med = percentile(d_samples, 50)
        high = percentile(d_samples, 100 * (1 - alpha / 2.))
        print '[%.2f,%.2f,%.2f]:%s' % (low, med, high, l)
        medians.append(med)
        yerr.append([med - low, high - med])
    yerr = np.array(yerr)
    kwargs['width'] = 0.2
    kwargs['xfontsize'] = 10
    method_bar(medians, yerr, labels, **kwargs)
    plt.ylim(0, 1)
Esempio n. 16
0
    def plotStatistics(self, output_file):

        p = stats.statRemoveNum(np.array(self.param), self.missingValue)
        a = p - np.mean(p)
        pmin = p.min()
        pmax = p.max()
        amin = a.min()
        amax = a.max()
        abins = np.linspace(amin, amax, 50)
        bins = np.linspace(pmin, pmax, 50)
        hist = np.empty((len(bins) - 1, self.maxCell))
        ahist = np.empty((len(abins) - 1, self.maxCell))
        x = np.arange(11)
        alpha = np.empty((11, self.maxCell))
        aalpha = np.empty((11, self.maxCell))

        for i in xrange(self.maxCell + 1):
            p = self.extractParameter(i, 0)
            a = p - np.mean(p)
            hist[:, i - 1], b = np.histogram(p, bins, normed=True)
            ahist[:, i - 1], b = np.histogram(a, abins, normed=True)
            alpha[:, i - 1] = acf(p, 10)
            aalpha[:, i - 1] = acf(a, 10)

        mhist = np.mean(hist, axis=1)
        uhist = percentile(hist, per=95, axis=1)
        lhist = percentile(hist, per=5, axis=1)

        mahist = np.mean(ahist, axis=1)
        uahist = percentile(ahist, per=95, axis=1)
        lahist = percentile(ahist, per=5, axis=1)

        malpha = np.mean(alpha, axis=1)
        ualpha = percentile(alpha, per=95, axis=1)
        lalpha = percentile(alpha, per=5, axis=1)

        maalpha = np.mean(aalpha, axis=1)
        uaalpha = percentile(aalpha, per=95, axis=1)
        laalpha = percentile(aalpha, per=5, axis=1)

        fig = RangeCurve()
        fig.add(bins[:-1], mhist, uhist, lhist, "Values", "Probability", "")
        fig.add(abins[:-1], mahist, uahist, lahist, "Anomalies", "Probability",
                "")
        fig.add(x, malpha, ualpha, lalpha, "Lag", "Autocorrelation",
                "ACF of values")
        fig.add(x, maalpha, uaalpha, laalpha, "Lag", "Autocorrelation",
                "ACF of anomalies")
        fig.plot()

        saveFigure(fig, output_file + '.png')
Esempio n. 17
0
    def calculateMeans(self, synMean, synMin, synMed, synMax, synMinCP):
        """
        Calculate mean, median, minimum, maximum and percentiles of pressure
        values from synthetic events.

        :param synMean: `numpy.ndarray`
        :param synMin: `numpy.ndarray`
        :param synMed: `numpy.ndarray`
        :param synMax: `numpy.ndarray`
        :param synMinCP: `numpy.ndarray`

        """
        synMean = ma.masked_values(synMean, -9999.)
        synMin = ma.masked_values(synMin, -9999.)
        synMed = ma.masked_values(synMed, -9999.)
        synMax = ma.masked_values(synMax, -9999.)

        self.synMean = ma.mean(synMean, axis=0)
        self.synMed = ma.mean(synMed, axis=0)
        self.synMin = ma.mean(synMin, axis=0)
        self.synMax = ma.mean(synMax, axis=0)

        self.synMeanUpper = percentile(ma.compressed(synMean), per=95, axis=0)
        self.synMeanLower = percentile(ma.compressed(synMean), per=5, axis=0)
        self.synMinUpper = percentile(ma.compressed(synMin), per=95, axis=0)
        self.synMinLower = percentile(ma.compressed(synMin), per=5, axis=0)

        self.synMinCPDist = np.mean(synMinCP, axis=0)
        self.synMinCPLower = percentile(synMinCP, per=5, axis=0)
        self.synMinCPUpper = percentile(synMinCP, per=95, axis=0)
        r = list(np.random.uniform(high=synMean.shape[0], size=3).astype(int))
        self.synRandomMinima = synMean[r, :, :]
Esempio n. 18
0
def median(map):
   """Function to calculate median of a map

   map Input PCRaster map"""

   return percentile(map, 50)

   OrderMap = order(map)
   Mid = roundoff(mean(OrderMap))
   MidMap = ifthenelse(OrderMap == Mid, map, 0)
   Median = cellvalue(mapmaximum(MidMap), 0, 0)
   assert Median[0] > 0.0
   return Median[0]
Esempio n. 19
0
    def plotStatistics(self, output_file):

        p = stats.statRemoveNum(np.array(self.param), self.missingValue)
        a = p - np.mean(p)
        pmin = p.min()
        pmax = p.max()
        amin = a.min()
        amax = a.max()
        abins = np.linspace(amin, amax, 50)
        bins = np.linspace(pmin, pmax, 50)
        hist = np.empty((len(bins) - 1, self.maxCell))
        ahist = np.empty((len(abins) - 1, self.maxCell))
        x = np.arange(11)
        alpha = np.empty((11, self.maxCell))
        aalpha = np.empty((11, self.maxCell))

        for i in xrange(self.maxCell + 1):
            p = self.extractParameter(i, 0)
            a = p - np.mean(p)
            hist[:, i - 1], b = np.histogram(p, bins, normed=True)
            ahist[:, i - 1], b = np.histogram(a, abins, normed=True)
            alpha[:, i - 1] = acf(p, 10)
            aalpha[:, i - 1] = acf(a, 10)

        mhist = np.mean(hist, axis=1)
        uhist = percentile(hist, per=95, axis=1)
        lhist = percentile(hist, per=5, axis=1)

        mahist = np.mean(ahist, axis=1)
        uahist = percentile(ahist, per=95, axis=1)
        lahist = percentile(ahist, per=5, axis=1)

        malpha = np.mean(alpha, axis=1)
        ualpha = percentile(alpha, per=95, axis=1)
        lalpha = percentile(alpha, per=5, axis=1)

        maalpha = np.mean(aalpha, axis=1)
        uaalpha = percentile(aalpha, per=95, axis=1)
        laalpha = percentile(aalpha, per=5, axis=1)
        
        fig = RangeCurve()
        fig.add(bins[:-1], mhist, uhist, lhist, "Values", "Probability", "")
        fig.add(abins[:-1], mahist, uahist, lahist, "Anomalies", "Probability", "")
        fig.add(x, malpha, ualpha, lalpha, "Lag", "Autocorrelation", "ACF of values")
        fig.add(x, maalpha, uaalpha, laalpha, "Lag", "Autocorrelation", "ACF of anomalies")
        fig.plot()

        saveFigure(fig, output_file + '.png')
Esempio n. 20
0
    def calcStats(self, lonCrossHist, lonCrossEW, lonCrossWE):
        """Calculate means and percentiles of synthetic event sets"""

        self.synCrossMean = np.mean(lonCrossHist, axis=0)
        self.synCrossEW = np.mean(lonCrossEW, axis=0)
        self.synCrossWE = np.mean(lonCrossWE, axis=0)

        self.synCrossUpper = percentile(lonCrossHist, per=95, axis=0)
        self.synCrossEWUpper = percentile(lonCrossEW, per=95, axis=0)
        self.synCrossWEUpper = percentile(lonCrossWE, per=95, axis=0)

        self.synCrossLower = percentile(lonCrossHist, per=5, axis=0)
        self.synCrossEWLower = percentile(lonCrossEW, per=5, axis=0)
        self.synCrossWELower = percentile(lonCrossWE, per=5, axis=0)
Esempio n. 21
0
    def calcStats(self, lonCrossHist, lonCrossEW, lonCrossWE):
        """Calculate means and percentiles of synthetic event sets"""

        self.synCrossMean = np.mean(lonCrossHist, axis=0)
        self.synCrossEW = np.mean(lonCrossEW, axis=0)
        self.synCrossWE = np.mean(lonCrossWE, axis=0)

        self.synCrossUpper = percentile(lonCrossHist, per=95, axis=0)
        self.synCrossEWUpper = percentile(lonCrossEW, per=95, axis=0)
        self.synCrossWEUpper = percentile(lonCrossWE, per=95, axis=0)

        self.synCrossLower = percentile(lonCrossHist, per=5, axis=0)
        self.synCrossEWLower = percentile(lonCrossEW, per=5, axis=0)
        self.synCrossWELower = percentile(lonCrossWE, per=5, axis=0)
Esempio n. 22
0
def getTeamPercentile(season):
    team_percentile = defaultdict(lambda: [])
    for idx, game in enumerate(season):
        game_stats = game["stats"]["game"]

        team_percentile["goals_for"].append(game_stats["goals_for"])
        team_percentile["goals_against"].append(game_stats["goals_against"])
        team_percentile["shots_for"].append(game_stats["shots_for"])
        team_percentile["shots_against"].append(game_stats["shots_against"])
        team_percentile["hits_for"].append(game_stats["hits_for"])
        team_percentile["hits_against"].append(game_stats["hits_against"])
        team_percentile["giveaways"].append(game_stats["giveaways"])
        team_percentile["takeaways"].append(game_stats["takeaways"])
        team_percentile["pim_for"].append(game_stats["pim_for"])
        team_percentile["pim_against"].append(game_stats["pim_against"])
        team_percentile["power_plays"].append(game_stats["power_plays"])
        team_percentile["power_play_goals"].append(
            game_stats["power_play_goals"])
        team_percentile["penalty_kills"].append(game_stats["penalty_kills"])
        team_percentile["penalty_kill_goals"].append(
            game_stats["penalty_kill_goals"])
        team_percentile["power_play_percentage"].append(
            game_stats["power_play_percentage"])
        team_percentile["penalty_kill_percentage"].append(
            game_stats["penalty_kill_percentage"])
        team_percentile["shooting_percentage"].append(
            game_stats["shooting_percentage"])
        team_percentile["save_percentage"].append(
            game_stats["save_percentage"])
        team_percentile["PDO"].append(game_stats["PDO"])

        game["stats"]["team_percentile"] = {
            i: float(
                percentile(team_percentile[i], game_stats[i], kind='mean') /
                100)
            for i in dict(team_percentile)
        }
    return season
Esempio n. 23
0
    def calculateMeans(self, synMean, synMin, synMed, synMax, synMinCP):
        synMean = ma.masked_values(synMean, -9999.)
        synMin = ma.masked_values(synMin, -9999.)
        synMed = ma.masked_values(synMed, -9999.)
        synMax = ma.masked_values(synMax, -9999.)

        self.synMean = ma.mean(synMean, axis=0)
        self.synMed = ma.mean(synMed, axis=0)
        self.synMin = ma.mean(synMin, axis=0)
        self.synMax = ma.mean(synMax, axis=0)

        self.synMeanUpper = percentile(synMean, per=95, axis=0)
        self.synMeanLower = percentile(synMean, per=5, axis=0)
        self.synMinUpper = percentile(synMin, per=95, axis=0)
        self.synMinLower = percentile(synMin, per=5, axis=0)

        self.synMinCPDist = np.mean(synMinCP, axis=0)
        self.synMinCPLower = percentile(synMinCP, per=5, axis=0)
        self.synMinCPUpper = percentile(synMinCP, per=95, axis=0)
        
        r = list(np.random.uniform(high=synMean.shape[0], size=3).astype(int))
        self.synRandomMinima = synMean[r, :, :]
Esempio n. 24
0
 def get_rank(x):
     return percentile(bg, x) / 100.0
Esempio n. 25
0
def main(args):
    # Parse the command line
    ## Baseline list
    if args.baseline is not None:
        ## Fill the baseline list with the conjugates, if needed
        newBaselines = []
        for pair in args.baseline:
            newBaselines.append((pair[1], pair[0]))
        args.baseline.extend(newBaselines)
    ## Polarization
    if args.xx:
        args.polToPlot = 'XX'
    elif args.xy:
        args.polToPlot = 'XY'
    elif args.yx:
        args.polToPlot = 'YX'
    elif args.yy:
        args.polToPlot = 'YY'
    elif args.stokes_i:
        args.polToPlot = 'I'
    elif args.stokes_v:
        args.polToPlot = 'V'

    filenames = args.filename
    filenames.sort()
    if args.limit != -1:
        filenames = filenames[:args.limit]

    nInt = len(filenames)

    dataDict = numpy.load(filenames[0])
    tInt = dataDict['tInt']
    nBL, nchan = dataDict['vis1XX'].shape
    freq = dataDict['freq1']
    junk0, refSrc, junk1, junk2, junk3, junk4, antennas = read_correlator_configuration(
        dataDict)
    dataDict.close()

    # Make sure the reference antenna is in there
    if args.ref_ant is not None:
        found = False
        for ant in antennas:
            if ant.stand.id == args.ref_ant:
                found = True
                break
        if not found:
            raise RuntimeError("Cannot file reference antenna %i in the data" %
                               args.ref_ant)

    bls = []
    l = 0
    cross = []
    for i in xrange(0, len(antennas), 2):
        ant1 = antennas[i].stand.id
        for j in xrange(i, len(antennas), 2):
            ant2 = antennas[j].stand.id
            if args.include_auto or ant1 != ant2:
                if args.baseline is not None:
                    if (ant1, ant2) in args.baseline:
                        bls.append((ant1, ant2))
                        cross.append(l)
                elif args.ref_ant is not None:
                    if ant1 == args.ref_ant or ant2 == args.ref_ant:
                        bls.append((ant1, ant2))
                        cross.append(l)
                else:
                    bls.append((ant1, ant2))
                    cross.append(l)

            l += 1
    nBL = len(cross)

    if args.decimate > 1:
        if nchan % args.decimate != 0:
            raise RuntimeError(
                "Invalid freqeunce decimation factor:  %i %% %i = %i" %
                (nchan, args.decimate, nchan % args.decimate))

        nchan //= args.decimate
        freq.shape = (freq.size // args.decimate, args.decimate)
        freq = freq.mean(axis=1)

    times = numpy.zeros(nInt, dtype=numpy.float64)
    visToPlot = numpy.zeros((nInt, nBL, nchan), dtype=numpy.complex64)
    visToMask = numpy.zeros((nInt, nBL, nchan), dtype=numpy.bool)

    for i, filename in enumerate(filenames):
        dataDict = numpy.load(filename)

        tStart = dataDict['tStart']

        if args.polToPlot == 'I':
            cvis = dataDict['vis1XX'][cross, :] + dataDict['vis1YY'][cross, :]
        elif args.polToPlot == 'V':
            cvis = dataDict['vis1XY'][cross, :] - dataDict['vis1YX'][cross, :]
            cvis /= 1j
        else:
            cvis = dataDict['vis1%s' % args.polToPlot][cross, :]

        if args.decimate > 1:
            cvis.shape = (cvis.shape[0], cvis.shape[1] // args.decimate,
                          args.decimate)
            cvis = cvis.mean(axis=2)

        visToPlot[i, :, :] = cvis

        if not args.drop:
            try:
                delayStepApplied = dataDict['delayStepApplied']
                try:
                    len(delayStepApplied)
                except TypeError:
                    delayStepApplied = [
                        delayStepApplied if ant.stand.id > 50 else False
                        for ant in antennas if ant.pol == 0
                    ]
            except KeyError:
                delayStepApplied = [False for ant in antennas if ant.pol == 0]
            delayStepAppliedBL = []
            for j in xrange(len(delayStepApplied)):
                for k in xrange(j, len(delayStepApplied)):
                    delayStepAppliedBL.append(delayStepApplied[j]
                                              or delayStepApplied[k])

            visToMask[i, :, :] = [[
                delayStepAppliedBL[c],
            ] for c in cross]

        times[i] = tStart

        dataDict.close()

    print("Got %i files from %s to %s (%.1f s)" %
          (len(filenames), datetime.utcfromtimestamp(
              times[0]).strftime("%Y/%m/%d %H:%M:%S"),
           datetime.utcfromtimestamp(times[-1]).strftime("%Y/%m/%d %H:%M:%S"),
           (times[-1] - times[0])))

    iTimes = numpy.zeros(nInt - 1, dtype=times.dtype)
    for i in xrange(1, len(times)):
        iTimes[i - 1] = times[i] - times[i - 1]
    print(" -> Interval: %.3f +/- %.3f seconds (%.3f to %.3f seconds)" %
          (iTimes.mean(), iTimes.std(), iTimes.min(), iTimes.max()))

    print("Number of frequency channels: %i (~%.1f Hz/channel)" %
          (len(freq), freq[1] - freq[0]))

    dTimes = times - times[0]

    delay = numpy.linspace(-350e-6, 350e-6, 301)  # s
    drate = numpy.linspace(-150e-3, 150e-3, 301)  # Hz

    good = numpy.arange(freq.size // 8,
                        freq.size * 7 // 8)  # Inner 75% of the band

    fig1 = plt.figure()
    fig2 = plt.figure()
    fig3 = plt.figure()
    fig4 = plt.figure()
    fig5 = plt.figure()

    k = 0
    nRow = int(numpy.sqrt(len(bls)))
    nCol = int(numpy.ceil(len(bls) * 1.0 / nRow))
    for b in xrange(len(bls)):
        i, j = bls[b]
        vis = numpy.ma.array(visToPlot[:, b, :], mask=visToMask[:, b, :])

        ax = fig1.add_subplot(nRow, nCol, k + 1)
        ax.imshow(numpy.ma.angle(vis),
                  extent=(freq[0] / 1e6, freq[-1] / 1e6, dTimes[0],
                          dTimes[-1]),
                  origin='lower',
                  vmin=-numpy.pi,
                  vmax=numpy.pi,
                  interpolation='nearest')
        ax.axis('auto')
        ax.set_xlabel('Frequency [MHz]')
        ax.set_ylabel('Elapsed Time [s]')
        ax.set_title("%i,%i - %s" % (i, j, args.polToPlot))
        ax.set_xlim((freq[0] / 1e6, freq[-1] / 1e6))
        ax.set_ylim((dTimes[0], dTimes[-1]))

        ax = fig2.add_subplot(nRow, nCol, k + 1)
        amp = numpy.ma.abs(vis)
        vmin, vmax = percentile(amp, 1), percentile(amp, 99)
        ax.imshow(amp,
                  extent=(freq[0] / 1e6, freq[-1] / 1e6, dTimes[0],
                          dTimes[-1]),
                  origin='lower',
                  interpolation='nearest',
                  vmin=vmin,
                  vmax=vmax)
        ax.axis('auto')
        ax.set_xlabel('Frequency [MHz]')
        ax.set_ylabel('Elapsed Time [s]')
        ax.set_title("%i,%i - %s" % (i, j, args.polToPlot))
        ax.set_xlim((freq[0] / 1e6, freq[-1] / 1e6))
        ax.set_ylim((dTimes[0], dTimes[-1]))

        ax = fig3.add_subplot(nRow, nCol, k + 1)
        ax.plot(freq / 1e6, numpy.ma.abs(vis.mean(axis=0)))
        ax.set_xlabel('Frequency [MHz]')
        ax.set_ylabel('Mean Vis. Amp. [lin.]')
        ax.set_title("%i,%i - %s" % (i, j, args.polToPlot))
        ax.set_xlim((freq[0] / 1e6, freq[-1] / 1e6))

        ax = fig4.add_subplot(nRow, nCol, k + 1)
        ax.plot(numpy.ma.angle(vis[:, good].mean(axis=1)) * 180 / numpy.pi,
                dTimes,
                linestyle='',
                marker='+')
        ax.set_xlim((-180, 180))
        ax.set_xlabel('Mean Vis. Phase [deg]')
        ax.set_ylabel('Elapsed Time [s]')
        ax.set_title("%i,%i - %s" % (i, j, args.polToPlot))
        ax.set_ylim((dTimes[0], dTimes[-1]))

        ax = fig5.add_subplot(nRow, nCol, k + 1)
        ax.plot(numpy.ma.abs(vis[:, good].mean(axis=1)) * 180 / numpy.pi,
                dTimes,
                linestyle='',
                marker='+')
        ax.set_xlabel('Mean Vis. Amp. [lin.]')
        ax.set_ylabel('Elapsed Time [s]')
        ax.set_title("%i,%i - %s" % (i, j, args.polToPlot))
        ax.set_ylim((dTimes[0], dTimes[-1]))

        k += 1

    for f in (fig1, fig2, fig3, fig4, fig5):
        f.suptitle(
            "%s to %s UTC" %
            (datetime.utcfromtimestamp(times[0]).strftime("%Y/%m/%d %H:%M"),
             datetime.utcfromtimestamp(times[-1]).strftime("%Y/%m/%d %H:%M")))

    plt.show()
Esempio n. 26
0
def main(args):
    # Parse the command line
    ## Baseline list
    if args.baseline is not None:
        ## Fill the baseline list with the conjugates, if needed
        newBaselines = []
        for pair in args.baseline:
            newBaselines.append((pair[1], pair[0]))
        args.baseline.extend(newBaselines)
    ## Polarization
    plot_pols = []
    if args.xx:
        plot_pols.append('XX')
    if args.xy:
        plot_pols.append('XY')
    if args.yx:
        plot_pols.append('YX')
    if args.yy:
        plot_pols.append('YY')
    filename = args.filename

    figs = {}
    first = True
    for filename in args.filename:
        print("Working on '%s'" % os.path.basename(filename))
        # Open the FITS IDI file and access the UV_DATA extension
        hdulist = astrofits.open(filename, mode='readonly')
        andata = hdulist['ANTENNA']
        fqdata = hdulist['FREQUENCY']
        fgdata = None
        for hdu in hdulist[1:]:
            if hdu.header['EXTNAME'] == 'FLAG':
                fgdata = hdu
        uvdata = hdulist['UV_DATA']

        # Pull out various bits of information we need to flag the file
        ## Antenna look-up table
        antLookup = {}
        for an, ai in zip(andata.data['ANNAME'], andata.data['ANTENNA_NO']):
            antLookup[an] = ai
        ## Frequency and polarization setup
        nBand, nFreq, nStk = uvdata.header['NO_BAND'], uvdata.header[
            'NO_CHAN'], uvdata.header['NO_STKD']
        stk0 = uvdata.header['STK_1']
        ## Baseline list
        bls = uvdata.data['BASELINE']
        ## Time of each integration
        obsdates = uvdata.data['DATE']
        obstimes = uvdata.data['TIME']
        inttimes = uvdata.data['INTTIM']
        ## Source list
        srcs = uvdata.data['SOURCE']
        ## Band information
        fqoffsets = fqdata.data['BANDFREQ'].ravel()
        ## Frequency channels
        freq = (numpy.arange(nFreq) -
                (uvdata.header['CRPIX3'] - 1)) * uvdata.header['CDELT3']
        freq += uvdata.header['CRVAL3']
        ## UVW coordinates
        try:
            u, v, w = uvdata.data['UU'], uvdata.data['VV'], uvdata.data['WW']
        except KeyError:
            u, v, w = uvdata.data['UU---SIN'], uvdata.data[
                'VV---SIN'], uvdata.data['WW---SIN']
        uvw = numpy.array([u, v, w]).T
        ## The actual visibility data
        flux = uvdata.data['FLUX'].astype(numpy.float32)

        # Convert the visibilities to something that we can easily work with
        nComp = flux.shape[1] // nBand // nFreq // nStk
        if nComp == 2:
            ## Case 1) - Just real and imaginary data
            flux = flux.view(numpy.complex64)
        else:
            ## Case 2) - Real, imaginary data + weights (drop the weights)
            flux = flux[:, 0::nComp] + 1j * flux[:, 1::nComp]
        flux.shape = (flux.shape[0], nBand, nFreq, nStk)

        # Find unique baselines, times, and sources to work with
        ubls = numpy.unique(bls)
        utimes = numpy.unique(obstimes)
        usrc = numpy.unique(srcs)

        # Convert times to real times
        times = utcjd_to_unix(obsdates + obstimes)
        times = numpy.unique(times)

        # Build a mask
        mask = numpy.zeros(flux.shape, dtype=numpy.bool)
        if fgdata is not None and not args.drop:
            reltimes = obsdates - obsdates[0] + obstimes
            maxtimes = reltimes + inttimes / 2.0 / 86400.0
            mintimes = reltimes - inttimes / 2.0 / 86400.0

            bls_ant1 = bls // 256
            bls_ant2 = bls % 256

            for row in fgdata.data:
                ant1, ant2 = row['ANTS']

                ## Only deal with flags that we need for the plots
                process_flag = False
                if args.include_auto or ant1 != ant2 or ant1 == 0 or ant2 == 0:
                    if ant1 == 0 and ant2 == 0:
                        process_flag = True
                    elif args.baseline is not None:
                        if ant2 == 0 and ant1 in [
                                a0 for a0, a1 in args.baseline
                        ]:
                            process_flag = True
                        elif (ant1, ant2) in args.baseline:
                            process_flag = True
                    elif args.ref_ant is not None:
                        if ant1 == args.ref_ant or ant2 == args.ref_ant:
                            process_flag = True
                    else:
                        process_flag = True
                if not process_flag:
                    continue

                tStart, tStop = row['TIMERANG']
                band = row['BANDS']
                try:
                    len(band)
                except TypeError:
                    band = [
                        band,
                    ]
                cStart, cStop = row['CHANS']
                if cStop == 0:
                    cStop = -1
                pol = row['PFLAGS'].astype(numpy.bool)

                if ant1 == 0 and ant2 == 0:
                    btmask = numpy.where(
                        ((maxtimes >= tStart) & (mintimes <= tStop)))[0]
                elif ant1 == 0 or ant2 == 0:
                    ant1 = max([ant1, ant2])
                    btmask = numpy.where( ( (bls_ant1 == ant1) | (bls_ant2 == ant1) ) \
                                          & ( (maxtimes >= tStart) & (mintimes <= tStop) ) )[0]
                else:
                    btmask = numpy.where( ( (bls_ant1 == ant1) & (bls_ant2 == ant2) ) \
                                          & ( (maxtimes >= tStart) & (mintimes <= tStop) ) )[0]
                for b, v in enumerate(band):
                    if not v:
                        continue
                    mask[btmask, b, cStart - 1:cStop, :] |= pol

        plot_bls = []
        cross = []
        for i in xrange(len(ubls)):
            bl = ubls[i]
            ant1, ant2 = (bl >> 8) & 0xFF, bl & 0xFF
            if args.include_auto or ant1 != ant2:
                if args.baseline is not None:
                    if (ant1, ant2) in args.baseline:
                        plot_bls.append(bl)
                        cross.append(i)
                elif args.ref_ant is not None:
                    if ant1 == args.ref_ant or ant2 == args.ref_ant:
                        plot_bls.append(bl)
                        cross.append(i)
                else:
                    plot_bls.append(bl)
                    cross.append(i)
        nBL = len(cross)

        # Decimation, if needed
        if args.decimate > 1:
            if nFreq % args.decimate != 0:
                raise RuntimeError(
                    "Invalid freqeunce decimation factor:  %i %% %i = %i" %
                    (nFreq, args.decimate, nFreq % args.decimate))

            nFreq //= args.decimate
            freq.shape = (freq.size // args.decimate, args.decimate)
            freq = freq.mean(axis=1)

            flux.shape = (flux.shape[0], flux.shape[1],
                          flux.shape[2] // args.decimate, args.decimate,
                          flux.shape[3])
            flux = flux.mean(axis=3)

            mask.shape = (mask.shape[0], mask.shape[1],
                          mask.shape[2] // args.decimate, args.decimate,
                          mask.shape[3])
            mask = mask.mean(axis=3)

        good = numpy.arange(freq.size // 8,
                            freq.size * 7 // 8)  # Inner 75% of the band

        if first:
            ref_time = obsdates[0] + obstimes[0]

        # NOTE: Assumes that the Stokes parameters increment by -1
        namMapper = {}
        for i in xrange(nStk):
            stk = stk0 - i
            namMapper[i] = NUMERIC_STOKES[stk]
        polMapper = {'XX': 0, 'YY': 1, 'XY': 2, 'YX': 3}

        for b in xrange(len(plot_bls)):
            bl = plot_bls[b]
            valid = numpy.where(bls == bl)[0]
            i, j = (bl >> 8) & 0xFF, bl & 0xFF
            dTimes = obsdates[valid] + obstimes[valid]
            dTimes -= ref_time
            dTimes *= 86400.0

            for p in plot_pols:
                blName = (i, j)
                blName = '%s-%s - %s' % (
                    'EA%02i' % blName[0] if blName[0] < 51 else 'LWA%i' %
                    (blName[0] - 50),
                    'EA%02i' % blName[1] if blName[1] < 51 else 'LWA%i' %
                    (blName[1] - 50), namMapper[polMapper[p]])

                if first or blName not in figs:
                    fig = plt.figure()
                    fig.suptitle('%s' % blName)
                    fig.subplots_adjust(hspace=0.001)
                    axA = fig.add_subplot(1, 2, 1)
                    axP = fig.add_subplot(1, 2, 2)
                    figs[blName] = (fig, axA, axP)
                fig, axA, axP = figs[blName]

                for band, offset in enumerate(fqoffsets):
                    frq = freq + offset
                    vis = numpy.ma.array(flux[valid, band, :, polMapper[p]],
                                         mask=mask[valid, band, :,
                                                   polMapper[p]])

                    amp = numpy.ma.abs(vis)
                    vmin, vmax = percentile(amp, 1), percentile(amp, 99)
                    axA.imshow(amp,
                               extent=(frq[0] / 1e6, frq[-1] / 1e6, dTimes[0],
                                       dTimes[-1]),
                               origin='lower',
                               interpolation='nearest',
                               vmin=vmin,
                               vmax=vmax)

                    axP.imshow(numpy.ma.angle(vis),
                               extent=(frq[0] / 1e6, frq[-1] / 1e6, dTimes[0],
                                       dTimes[-1]),
                               origin='lower',
                               vmin=-numpy.pi,
                               vmax=numpy.pi,
                               interpolation='nearest')

        first = False

    for blName in figs:
        fig, axA, axP = figs[blName]

        fig.suptitle("%s UTC\n%s" % (datetime.utcfromtimestamp(
            times[0]).strftime("%Y/%m/%d %H:%M"), blName))

        axA.axis('auto')
        axA.set_title('Amp.')
        axA.set_xlabel('Frequency [MHz]')
        axA.set_ylabel('Amp. - Elapsed Time [s]')

        axP.axis('auto')
        axP.set_title('Phase')
        axP.set_xlabel('Frequency [MHz]')

        if args.save_images:
            fig.savefig('fringes-%s.png' % (blName.replace(' ', ''), ))

    if not args.save_images:
        plt.show()
Esempio n. 27
0
 def get_rank(x):
     return percentile(sample, x) / 100.0
Esempio n. 28
0
def getTeamGameStats(nhl_seasons):
    nhl_seasons = {
        i: sorted(nhl_seasons[i], key=lambda x: x.get("date"))
        for i in nhl_seasons
    }
    team_seasons = defaultdict(lambda: defaultdict(lambda: []))
    for team_id in TEAMS:
        print(TEAMS[team_id])
        for season in nhl_seasons:
            team_season_games = sorted([
                game for game in nhl_seasons[season]
                if game["home"] == team_id or game["away"] == team_id
            ],
                                       key=lambda x: x.get("date"))
            for idx, game in enumerate(team_season_games):

                game_df = TEAMSTATS[TEAMSTATS.game_id == game["id"]]
                for_df = game_df[game_df.team_id == team_id]
                against_df = game_df[game_df.team_id != team_id]

                game["won"] = int(for_df["won"].values[0])
                game["stats"]["travel"] = defaultdict(lambda: 0)
                game["stats"]["travel"]["home"] = int(
                    "home" == for_df["HoA"].values[0])
                game["stats"]["travel"]["game_day"] = datetime.strptime(
                    game["date"], "%Y-%m-%d").weekday()
                game["stats"]["travel"]["game_reg"] = int(
                    str(for_df["settled_in"]) == "REG")
                game["stats"]["travel"]["game_ot"] = int(
                    str(for_df["settled_in"]) == "OT")
                game["stats"]["travel"]["game_so"] = int(
                    str(for_df["settled_in"]) == "SO")

                if idx == 0:
                    game["stats"]["travel"]["rest_days"] = 1.0

                    if game["stats"]["travel"]["home"]:
                        game["stats"]["travel"]["timezone"] = 0
                        game["stats"]["travel"]["distance"] = 0

                    else:
                        prev_tz = ARENA_ZONES[TEAMS[team_id]]
                        curr_tz = ARENA_ZONES[TEAMS[game["home"]]]
                        game["stats"]["travel"]["timezone"] = abs(curr_tz -
                                                                  prev_tz)

                        prev_loc = ARENAS[TEAMS[team_id]]
                        curr_loc = ARENAS[TEAMS[game["home"]]]
                        game["stats"]["travel"]["distance"] = (
                            geodesic(prev_loc, curr_loc).miles / DST_MAX)

                else:
                    prev_game = team_season_games[idx - 1]

                    prev_date = prev_game["date"]
                    curr_date = game["date"]
                    d1 = datetime.strptime(prev_date, "%Y-%m-%d")
                    d2 = datetime.strptime(curr_date, "%Y-%m-%d")
                    game["stats"]["travel"]["rest_days"] = min(
                        abs((d1 - d2).days), 10) / DAY_MAX

                    prev_tz = ARENA_ZONES[TEAMS[prev_game["home"]]]
                    curr_tz = ARENA_ZONES[TEAMS[game["home"]]]
                    game["stats"]["travel"]["timezone"] = abs(curr_tz -
                                                              prev_tz)

                    prev_loc = ARENAS[TEAMS[prev_game["home"]]]
                    curr_loc = ARENAS[TEAMS[game["home"]]]
                    game["stats"]["travel"]["distance"] = geodesic(
                        curr_loc, prev_loc).miles / DST_MAX

                game["stats"]["game"] = getGameStats(for_df, against_df)

            team_season_games = getCumulative(team_season_games)
            team_season_games = getTeamPercentile(team_season_games)
            team_seasons[team_id][season] = team_season_games

    for team_id in team_seasons:
        for season in team_seasons[team_id]:
            for idx, game in enumerate(team_seasons[team_id][season]):
                league_percentile = getLeagueDistribution(
                    team_seasons, season, idx)
                game["stats"]["league_percentile"] = {
                    i: float(
                        percentile(league_percentile[i],
                                   game["stats"]["cumulative"][i]) / 100)
                    for i in dict(league_percentile)
                }

    return team_seasons
Esempio n. 29
0
def quicklook(filename, flatten, ant='252A'):
    h5 = tb.open_file(filename)

    T_ant = apply_calibration(h5)
    f_leda = T_ant['f']

    ant_ids = [
        ant,
    ]

    print("Plotting %s..." % ant_ids[0])
    fig, axes = plt.subplots(figsize=(12, 6), nrows=1, ncols=1)
    #plt.suptitle(h5.filename)

    lst_stamps = T_ant['lst']
    utc_stamps = T_ant['utc']
    xlims = (f_leda[0], f_leda[-1])
    #ylims = mdates.date2num((T_ant['utc'][0], T_ant['utc'][-1]))
    #hfmt = mdates.DateFormatter('%m/%d %H:%M')
    ylims = (T_ant['lst'][0], T_ant['lst'][-1])
    T_flagged = T_ant[ant_ids[0]]
    #T_flagged = np.fft.fft(T_flagged, axis=0)
    #T_flagged -= T_flagged.mean(axis=0)
    #T_flagged = 10*np.log10(np.abs(np.fft.ifft(T_flagged)))

    T_flagged = rfi_flag(T_flagged, freqs=f_leda)

    if flatten:
        abp = np.ma.median(T_flagged.data, axis=0)
        abp /= np.ma.median(abp)
        T_flagged /= abp
    clim = (percentile(T_flagged.compressed(),
                       5), percentile(T_flagged.compressed(), 95))

    im = plt.imshow(
        T_flagged,  # / np.median(xx, axis=0), 
        cmap='jet',
        aspect='auto',
        interpolation='nearest',
        clim=clim,
        extent=(xlims[0], xlims[1], ylims[1], ylims[0]))
    plt.title(ant_ids[0])
    plt.xlabel("Frequency [MHz]")

    plt.ylabel("LST [hr]")
    plt.colorbar()
    plt.text(0.005,
             0.005,
             get_repo_fingerprint(),
             transform=fig.transFigure,
             size=8)
    plt.savefig("figures/rfi-flagged.pdf")
    plt.show()

    plt.figure()
    #plt.plot(f_leda, np.sum(T_flagged.mask, axis=0).astype('float') / T_flagged.mask.shape[0], label='total')
    day = T_flagged[0:2000].mask
    night = T_flagged[2250:2750].mask
    plt.plot(f_leda,
             np.sum(night, axis=0).astype('float') / night.shape[0],
             label='night')
    plt.plot([0])
    plt.plot(f_leda,
             np.sum(day, axis=0).astype('float') / day.shape[0],
             label='day')
    plt.xlim(40, 85)
    plt.ylim(-0.025, 0.25)

    plt.title(ant_ids[0])
    plt.xlabel("Frequency [MHz]")
    plt.ylabel("Flagged fraction")
    plt.minorticks_on()
    plt.legend(frameon=True, loc=2)
    plt.tight_layout()
    plt.text(0.005,
             0.005,
             get_repo_fingerprint(),
             transform=fig.transFigure,
             size=8)
    plt.savefig("figures/rfi-fraction.pdf")
    plt.show()

    plt.figure()
    plt.plot(f_leda, kurtosis(T_flagged, axis=0))
    plt.title(ant_ids[0])
    plt.ylabel("Kurtosis")
    plt.xlabel("Frequency [MHz]")
    plt.xlim(40, 85)
    plt.ylim(-50, 1600)
    plt.minorticks_on()
    plt.text(0.005,
             0.005,
             get_repo_fingerprint(),
             transform=fig.transFigure,
             size=8)
    plt.show()

    plt.figure()
         Y_hat, Y_true, auc_scores, bottom_k_auc_scores = run_classifier(clf,classifier_features,cases, bottom_inds,
                                       optimize_hyperparams=False)
         # Save the data for next time
         print ' saving data...'
         pickle.dump((Y_hat,Y_true,auc_scores,bottom_k_auc_scores),
                     open(results_path+'clf_results_%s.pickle'%clf_name,'wb'))
     finally:
         if(plot_output):
             print ' plotting data...'
             # Plot the Precision Recall Curve
             # Scikit's Precision Recall
             p,r,thresh = precision_recall_curve(Y_true.flatten(), Y_hat.flatten())
             plt.plot(r,p,label=clf_name,**plot_ops[i])
             # Now get AUC bounds via bootstrap resampling
         print ' AUC bootstrap resampling...'
         print("[%.3f,%.3f]: %s AUC 95 bounds"%(percentile(auc_scores,2.5),percentile(auc_scores,97.5),clf_name))
         print("[%.3f,%.3f]: %s AUC 95 bounds - bottom %d methods"%(percentile(bottom_k_auc_scores,2.5),percentile(bottom_k_auc_scores,97.5),clf_name,k))
     
 # Save and display the overall figure
 if(plot_output):
     #plt.legend(loc=1,fontsize=20)
     fix_legend(handlelength=7)
     fix_axes()
     plt.ylim(0,1)
     plt.xlim(0,1)
     plt.hold(False)
     plt.tight_layout()
     PR_fig.savefig(figure_path+'precision_recall.pdf', bbox_inches=0)
     PR_fig.show()
 
 # Now print out all the results
Esempio n. 31
0
 def getPercentile(self, inArticleDict, percent):
     """returns dict of percentiles for subjects"""
     medDict = {}
     for subject in inArticleDict:
         medDict[subject] = percentile(self.inArticleDict[subject], percent)
     return(medDict)
Esempio n. 32
0
 def stat(self, a):
     #if self.mask != None:
     #    a = np.compress(a.flatten(), self.mask.flatten() > 0)
     #vmin, vmax, vmid = a.min(), a.max(), a.mean()
     vmin, vmax, vmid = percentile(a.flatten(), 1), percentile(a.flatten(), 99), percentile(a.flatten(), 50)
     return vmin, vmax, vmid
Esempio n. 33
0
def plot(out_fd,
         data_sets,
         title='Expected bankroll change over time',
         xlabel='Roll number',
         ylabel='Change in bankroll',
         file_format='png',
         transparent=False):
    ''' Plot the median value across many sets of data, as well as the area
    between the 1st and 3rd quartiles.

    Each input data set should be a dictionary of x,y pairs as specified below.
    All input data sets must have the same x values. At each specified x value,
    plot the median of the y values across all data sets. Also plot the 1st and
    3rd quartiles for the y values at this x value.

    out_fd: the file-like object to which to write the graph in PNG file format
    data_sets: an iterable, containing one or more data set dictionary

    file_format: file format to output. Must be one of:
        - png
        - svg

    transparent: whether or not the file should have a transparent background

    An example data set dictionary:
        {
            0: 1,
            1: 4,
            2: 2,
            6: 7,
            9: 10,
            ...
        }
    Where each key is an x value and the key's value is the corresponding y
    value. All data sets must have the same exact set of keys.
    '''
    assert file_format in 'png svg svgz'.split(' ')
    plt.figure()
    d = None
    for data_set in data_sets:
        if d is None:
            d = {}
            for x in data_set:
                d[x] = [data_set[x]]
            continue
        for x in data_set:
            d[x].append(data_set[x])
    stats_d = {}
    for x in d:
        stats_d[x] = (
            min(d[x]),
            percentile(d[x], 5),
            percentile(d[x], 25),
            percentile(d[x], 50),
            percentile(d[x], 75),
            percentile(d[x], 95),
            max(d[x]),
        )
    # colors selected to be good for colorblind people
    # http://www.somersault1824.com/tips-for-designing-scientific-figures-for-color-blind-readers/
    # http://mkweb.bcgsc.ca/biovis2012/
    # http://mkweb.bcgsc.ca/colorblind/
    dark_purple = rgb_conv(73, 0, 146)
    dark_blue = rgb_conv(0, 109, 219)
    purple = rgb_conv(182, 109, 255)
    blue = rgb_conv(109, 182, 255)
    light_blue = rgb_conv(182, 219, 255)
    uppest_color = *dark_purple, 0.9
    upper_color = *dark_blue, 0.9
    med_color = *purple, 1
    middle_color = *purple, 0.5
    lower_color = *blue, 0.9
    lowest_color = *light_blue, 0.9
    per_0 = [v[0] for v in stats_d.values()]
    per_5 = [v[1] for v in stats_d.values()]
    per_25 = [v[2] for v in stats_d.values()]
    per_50 = [v[3] for v in stats_d.values()]
    per_75 = [v[4] for v in stats_d.values()]
    per_95 = [v[5] for v in stats_d.values()]
    per_100 = [v[6] for v in stats_d.values()]
    # plt.plot(stats_d.keys(), per_100, color=max_color, label='max')
    plt.plot(stats_d.keys(), per_50, color=med_color, label='median')
    # plt.plot(stats_d.keys(), per_0, color=min_color, label='min')
    plt.fill_between(stats_d.keys(),
                     per_100,
                     per_95,
                     color=uppest_color,
                     label='top 5%')
    plt.fill_between(stats_d.keys(),
                     per_95,
                     per_75,
                     color=upper_color,
                     label='next 20%')
    plt.fill_between(stats_d.keys(),
                     per_75,
                     per_25,
                     color=middle_color,
                     label='middle 50%')
    plt.fill_between(stats_d.keys(),
                     per_25,
                     per_5,
                     color=lower_color,
                     label='next 20%')
    plt.fill_between(stats_d.keys(),
                     per_5,
                     per_0,
                     color=lowest_color,
                     label='bottom 5%')
    plt.xlim(left=0, right=max(stats_d.keys()))
    ymag = max(max(per_100), -1 * min(per_0))
    plt.ylim(top=ymag, bottom=-1 * ymag)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.legend(loc='best', fontsize=8)
    plt.title(title)
    plt.savefig(out_fd, transparent=transparent, format=file_format)
def quicklook(filename,
              save,
              dump,
              flag,
              merge,
              flatten,
              no_show,
              all_lsts,
              new_cal,
              sky=False,
              lfsm=False,
              emp=False):
    h5 = tb.open_file(filename)

    if new_cal: T_ant = apply_new_calibration(h5)
    else: T_ant = apply_calibration(h5)
    f_leda = T_ant['f']

    ant_ids = ['252', '254', '255']

    print("Plotting...")
    fig = plt.figure(figsize=(20, 20))
    #plt.suptitle(h5.filename)

    lst_stamps = T_ant['lst']
    indexes = np.arange(len(lst_stamps), dtype=np.int)

    if len(lst_stamps) == 0:
        raise RuntimeError("No LSTs in file")

    # Report discontinuities in time
    for i in range(1, len(lst_stamps)):
        if lst_stamps[i] - lst_stamps[i - 1] > 1 / 60.0:  # 1 minute
            print "Discontinuity at LST", lst_stamps[i], (
                lst_stamps[i] - lst_stamps[i - 1]) * 60 * 60, "seconds"

    utc_stamps = T_ant['utc']
    xlims = (f_leda[0], f_leda[-1])
    #ylims = mdates.date2num((T_ant['utc'][0], T_ant['utc'][-1]))
    #hfmt = mdates.DateFormatter('%m/%d %H:%M')
    ylims = (T_ant['lst'][0], T_ant['lst'][-1])

    # Work out altitude of Gal center and Sun. Use whichever is highest
    # and put that in the padding, which is the stripe.
    pad_length = 70
    padding = np.full((len(lst_stamps), pad_length), 10000)
    timing = lst_timing.LST_Timing(lst_stamps, utc_stamps)
    border_bottom, night_bottom, night_top, border_top = timing.calc_night()
    padding[night_bottom:night_top, :] = 1000

    #for ant in ant_ids:
    #  lst_stamps, T_ant[ant+"A"] = timing.align(T_ant[ant+"A"])
    #  lst_stamps, T_ant[ant+"B"] = timing.align(T_ant[ant+"B"])

    if night_bottom:
        print "Night", lst_stamps[night_bottom], "-", lst_stamps[night_top - 1]
    else:
        print "Night 0 - 0"

    # Use night only
    if not all_lsts:
        if not border_top:
            raise RuntimeError(
                "No LSTs available at night time (use --all_lsts to see all)")
        lst_stamps = lst_stamps[night_bottom:night_top]
        utc_stamps = utc_stamps[night_bottom:night_top]
        indexes = indexes[night_bottom:night_top]
        padding = padding[night_bottom:night_top]
        ylims = (lst_stamps[0], lst_stamps[-1])
        print len(lst_stamps), "usable LSTs"
    else:
        print "Using all LSTs"

    if len(lst_stamps) == 0:
        raise RuntimeError(
            "There are no data to display (number of LSTs is 0)")

    yloc = []
    ylabel = []
    try:
        for i in range(0, len(lst_stamps), len(lst_stamps) / 7):
            yloc.append(lst_stamps[i]), ylabel.append(("%.1f" % lst_stamps[i]))
    except:
        yloc.append(lst_stamps[0]), ylabel.append(("%.1f" % lst_stamps[0]))
        yloc.append(lst_stamps[-1]), ylabel.append(("%.1f" % lst_stamps[-1]))
    if all_lsts:
        new_x_high = xlims[1] + pad_length * (xlims[1] -
                                              xlims[0]) / len(f_leda)
    else:
        new_x_high = xlims[1]

    dump_data = {}

    if sky:
        if lfsm and emp:
            smdl = SkyModelLFSMEmp
            smlbl = 'LFSM+Emp'
        elif lfsm and not emp:
            smdl = SkyModelLFSM
            smlbl = 'LFSM'
        elif not lfsm and emp:
            smdl = SkyModelGSMEmp
            smlbl = 'GSM+Emp'
        else:
            smdl = SkyModelGSM
            smlbl = 'GSM'
        sy = smdl(pol='y')
        sx = smdl(pol='x')
        T_y_asm = sy.generate_tsky(lst_stamps, f_leda * 1e6)
        T_x_asm = sx.generate_tsky(lst_stamps, f_leda * 1e6)

    if flag and merge:
        # If we are going to merge the flags across antennas, we need to flag them all now
        for p in (0, 1):
            for ii, key in enumerate(ant_ids):
                ant = key + ("B" if p else "A")
                T_flagged = T_ant[ant]
                if not all_lsts:
                    # Do flagging with a border around the data in time
                    masks = rfi_flag(T_flagged[border_bottom:border_top],
                                     freqs=f_leda)
                    new_mask = masks.combine(do_not_excise_dtv=True)

                    new_mask = new_mask[night_bottom -
                                        border_bottom:night_top -
                                        border_bottom]  # remove border
                else:
                    masks = rfi_flag(T_flagged, freqs=f_leda)
                    new_mask = masks.combine(do_not_excise_dtv=True)

                    print ant, "Biggest DTV gap", lst_stamps[biggest_gap(
                        masks.dtv_tms)[1]], "-", lst_stamps[biggest_gap(
                            masks.dtv_tms)[0]], "waterfall"
                try:
                    merged_mask |= new_mask
                except NameError:
                    merged_mask = new_mask

    for p in [0, 1]:
        for ii, key in enumerate(ant_ids):
            if p == 0 and ii == 0:
                ax = fig.add_subplot(2, 3, 3 * p + ii + 1)
                origAX = ax
            else:
                ax = fig.add_subplot(2,
                                     3,
                                     3 * p + ii + 1,
                                     sharex=origAX,
                                     sharey=origAX)

            if p == 0:
                ant = key + "A"
            else:
                ant = key + "B"

            T_flagged = T_ant[ant]
            if not all_lsts:
                T_flagged = T_flagged[night_bottom:night_top]

            print "Max", np.max(T_flagged), "Min", np.min(T_flagged)

            masks = {}
            if flag:
                if merge:
                    ## Already done
                    T_flagged = np.ma.array(T_flagged, mask=merged_mask)
                else:
                    ## Need to do it now - there's probably a way to deal with
                    ## this all in one pass
                    if not all_lsts:
                        masks = rfi_flag(T_ant[ant][border_bottom:border_top],
                                         freqs=f_leda)
                        T_flagged = masks.apply_as_mask(
                            T_ant[ant][border_bottom:border_top],
                            do_not_excise_dtv=True)
                        T_flagged = T_flagged[night_bottom -
                                              border_bottom:night_top -
                                              border_bottom]  # Remove border

                        masks.chop(night_bottom - border_bottom,
                                   night_top - border_bottom)
                    else:
                        masks = rfi_flag(T_flagged, freqs=f_leda)
                        T_flagged = masks.apply_as_mask(T_flagged,
                                                        do_not_excise_dtv=True)

                        print ant, "Biggest DTV gap", lst_stamps[biggest_gap(
                            masks.dtv_tms)[1]], "-", lst_stamps[biggest_gap(
                                masks.dtv_tms)[0]], "waterfall"
                print "After flagging", "Max", np.ma.max(
                    T_flagged), "Min", np.ma.min(T_flagged)

            try:
                T_asm = T_y_asm if p == 0 else T_x_asm
                scale_offset_asm = robust.mean(T_asm / T_flagged)
                T_flagged = T_flagged - T_asm / scale_offset_asm
            except NameError:
                pass

            T_flagged = pad_data(T_flagged)  # Up to 2400 channels

            if dump:
                if not all_lsts:
                    if masks:
                        dump_data[ant + "_flagged"] = masks.apply_as_nan(
                            T_ant[ant][night_bottom:night_top])

                    dump_data[ant] = T_ant[ant][night_bottom:night_top]
                else:
                    if masks:
                        dump_data[ant + "_flagged"] = masks.apply_as_nan(
                            T_ant[ant])
                    dump_data[ant] = T_ant[ant]
                dump_data[ant + "_rms"] = add_uncertainties(T_flagged)
                av = np.ma.average(T_flagged, axis=0)
                weighted = av / dump_data[ant + "_rms"]**2
                dump_data[ant + "_weighted"] = weighted
                if masks:
                    dump_data[ant + "_dtv_times"] = np.array(masks.dtv_tms)
                    dump_data[ant + "_masks"] = masks.masks

            if flag:
                total = T_flagged.shape[0] * T_flagged.shape[1]
                num_in = np.ma.MaskedArray.count(T_flagged)
                print ant, ("%.1f%%" % (100 * float(total - num_in) / total)
                            ), "flagged.", "Count:", total - num_in

            # Add the stripe onto the right edge of the data and adjust the extent of the x-axis (frequency) to cover the stripe.
            if all_lsts:
                T_flagged_plot = np.ma.concatenate((T_flagged, padding),
                                                   axis=1)
            else:
                T_flagged_plot = T_flagged

            ax.set_yticks(yloc)
            ax.set_yticklabels(ylabel)
            ax.tick_params(axis='y', pad=2)

            if flatten:
                if type(T_flagged_plot) is np.ma.core.MaskedArray:
                    abp = np.ma.median(T_flagged_plot.data, axis=0)
                else:
                    abp = np.ma.median(T_flagged_plot, axis=0)
                abp /= np.ma.median(abp)
                T_flagged_plot /= abp
                try:
                    clim = (percentile(T_flagged_plot.compressed(), 5),
                            percentile(T_flagged_plot.compressed(), 95))
                except AttributeError:
                    clim = (percentile(T_flagged_plot,
                                       5), percentile(T_flagged_plot, 95))

            elif sky:
                clim = (-250, 500)
            else:
                clim = (1000, 10000)
            if ant != "252B":
                im = ax.imshow(
                    T_flagged_plot,  # / np.median(xx, axis=0), 
                    cmap="viridis",
                    aspect='auto',
                    interpolation='nearest',
                    clim=clim,
                    extent=(xlims[0], new_x_high, ylims[1], ylims[0]))

            ax.set_title(ant)
            if p == 1:
                ax.set_xlabel("Frequency [MHz]")
            if ii == 0:
                ax.set_ylabel("LST [hr]")
            #ax.yaxis_date()
            #ax.yaxis.set_major_formatter(hfmt)
            #

    if not flatten:
        fig.subplots_adjust(left=0.07)
        fig.subplots_adjust(right=0.875)
        cbar_ax = fig.add_axes([0.9, 0.125, 0.025, 0.75])
        cbar = fig.colorbar(im, cax=cbar_ax)

        #plt.subplot(2,3,3)
        #cbar = plt.colorbar()
        if sky:
            cbar.set_label("Temperature - %s [K]" % smlbl)
        else:
            cbar.set_label("Temperature [K]")
        cbar.ax.tick_params(axis='y', pad=2)
        #plt.tight_layout()

    plt.text(0.005,
             0.005,
             get_repo_fingerprint(),
             transform=fig.transFigure,
             size=8)

    if save:
        plt.savefig(os.path.basename(filename)[:-3] + ".png")
    if not no_show:
        plt.show()

    if dump:
        dump_data["lsts"] = lst_stamps
        dump_data["utcs"] = np.array([str(pytime) for pytime in utc_stamps])
        dump_data["indexes"] = indexes
        dump_data["frequencies"] = pad_frequencies(f_leda)
        dump_data["options"] = "Flag="+str(flag) \
          + " Filename="+filename \
          + " New cal="+str(new_cal) \
                               + " Merge="+str(merge) \
                               + " Flatten="+str(flatten) \
                               + " All LSTs="+str(all_lsts) \
                               + " Sky Model Substract="+str(sky) \
                               + " Use LFSM="+str(lfsm) \
                               + " Apply empirical gain correction="+str(emp)
        dump_data["fingerprint"] = get_repo_fingerprint()
        import json

        def jdefault(o):
            return o.__dict__

        dump_data["params"] = json.dumps(params, default=jdefault)

        hickle.dump(dump_data, os.path.basename(filename)[:-3] + ".hkl")
def quicklook(filename, save, dump, flag, merge, flatten, no_show, all_lsts):
    h5 = tb.open_file(filename)

    T_ant = apply_calibration(h5)
    f_leda = T_ant['f']

    ant_ids = ['252', '254', '255']

    print("Plotting...")
    fig = plt.figure(figsize=(12, 12))
    #plt.suptitle(h5.filename)

    lst_stamps = T_ant['lst']
    if len(lst_stamps) == 0:
        print "No LSTS in file"
        exit(1)

    # Report discontinuities in time
    for i in range(1, len(lst_stamps)):
        if lst_stamps[i] - lst_stamps[i - 1] > 1 / 60.0:  # 1 minute
            print "Discontinuity at LST", lst_stamps[i], (
                lst_stamps[i] - lst_stamps[i - 1]) * 60 * 60, "seconds"

    utc_stamps = T_ant['utc']
    xlims = (f_leda[0], f_leda[-1])
    #ylims = mdates.date2num((T_ant['utc'][0], T_ant['utc'][-1]))
    #hfmt = mdates.DateFormatter('%m/%d %H:%M')
    ylims = (T_ant['lst'][0], T_ant['lst'][-1])

    # Work out altitude of Gal center and Sun. Use whichever is highest
    # and put that in the padding, which is the stripe.
    unusable_lsts = []
    pad_length = 70
    padding = np.zeros((len(lst_stamps), pad_length))
    for i, d in enumerate(utc_stamps):
        ovro.date = d
        sun.compute(ovro)
        gal_center.compute(ovro)
        if sun.alt > -15 * np.pi / 180 or gal_center.alt > -15 * np.pi / 180:
            padding[i, :] = 10000
            unusable_lsts.append(i)
        else:
            padding[i, :] = 1000

    # Delete sun up LSTS
    if not all_lsts:
        print "Cutting out times when sun/galaxy up"
        padding = np.delete(padding, unusable_lsts, axis=0)
        lst_stamps = np.delete(lst_stamps, unusable_lsts, axis=0)
        utc_stamps = np.delete(utc_stamps, unusable_lsts, axis=0)
        if len(lst_stamps) == 0:
            print "No LSTs available at night time (use --all_lsts to see all)"
            exit(1)
        ylims = (lst_stamps[0], lst_stamps[-1])
        print len(lst_stamps), "usable LSTs"
    else:
        print "Using all LSTs"
    if len(lst_stamps) == 0:
        print "There is no data to display (number of LSTs is 0)"
        exit(1)

    yloc = []
    ylabel = []
    for i in range(0, len(lst_stamps), len(lst_stamps) / 7):
        yloc.append(lst_stamps[i]), ylabel.append(("%.1f" % lst_stamps[i]))
    if all_lsts:
        new_x_high = xlims[1] + pad_length * (xlims[1] -
                                              xlims[0]) / len(f_leda)
    else:
        new_x_high = xlims[1]

    dump_data = {}

    if flag and merge:
        # If we are going to merge the flags across antennas, we need to flag them all now
        for p in (0, 1):
            for ii, key in enumerate(ant_ids):
                ant = key + ("B" if p else "A")
                T_flagged = T_ant[ant]
                if not all_lsts:
                    T_flagged = np.delete(T_flagged, unusable_lsts, axis=0)
                new_mask = rfi_flag(T_flagged, freqs=f_leda).mask
                try:
                    merged_mask |= new_mask
                except NameError:
                    merged_mask = new_mask

    for p in [0, 1]:

        for ii, key in enumerate(ant_ids):
            if p == 0 and ii == 0:
                ax = fig.add_subplot(2, 3, 3 * p + ii + 1)
                origAX = ax
            else:
                ax = fig.add_subplot(2,
                                     3,
                                     3 * p + ii + 1,
                                     sharex=origAX,
                                     sharey=origAX)

            if p == 0: ant = key + "A"
            else: ant = key + "B"

            T_flagged = T_ant[ant]
            if not all_lsts:
                T_flagged = np.delete(T_flagged, unusable_lsts, axis=0)

            print "Max", np.max(T_flagged), "Min", np.min(T_flagged)

            if flag:
                if merge:
                    ## Already done
                    T_flagged = np.ma.array(T_flagged, mask=merged_mask)
                else:
                    ## Need to do it now - there's probably a way to deal with
                    ## this all in one pass
                    T_flagged = rfi_flag(T_flagged, freqs=f_leda)
                print "After flagging", "Max", np.ma.max(
                    T_flagged), "Min", np.ma.min(T_flagged)

            if dump:
                dump_data[ant] = T_flagged
                dump_data[ant + "_rms"] = add_uncertainties(T_flagged)
                av = np.ma.average(T_flagged, axis=0)
                weighted = av / dump_data[ant + "_rms"]**2
                dump_data[ant + "_weighted"] = weighted

            if flag:
                total = T_flagged.shape[0] * T_flagged.shape[1]
                num_in = np.ma.MaskedArray.count(T_flagged)
                print ant, ("%.1f%%" % (100 * (total - num_in) / total)
                            ), "flagged.", "Count:", total - num_in

            # Add the stripe onto the right edge of the data and adjust the extent of the x-axis (frequency) to cover the stripe.
            if all_lsts:
                T_flagged_plot = np.ma.concatenate((T_flagged, padding),
                                                   axis=1)
            else:
                T_flagged_plot = T_flagged

            ax.set_yticks(yloc)
            ax.set_yticklabels(ylabel)
            ax.tick_params(axis='y', pad=2)

            if flatten:
                if type(T_flagged_plot) is np.ma.core.MaskedArray:
                    abp = np.ma.median(T_flagged_plot.data, axis=0)
                else:
                    abp = np.ma.median(T_flagged_plot, axis=0)
                abp /= np.ma.median(abp)
                T_flagged_plot /= abp
                try:
                    clim = (percentile(T_flagged_plot.compressed(), 5),
                            percentile(T_flagged_plot.compressed(), 95))
                except AttributeError:
                    clim = (percentile(T_flagged_plot,
                                       5), percentile(T_flagged_plot, 95))

            else:
                clim = (1000, 10000)

            im = ax.imshow(
                T_flagged_plot,  # / np.median(xx, axis=0), 
                cmap='jet',
                aspect='auto',
                interpolation='nearest',
                clim=clim,
                extent=(xlims[0], new_x_high, ylims[1], ylims[0]))

            ax.set_title(ant)
            if p == 1: ax.set_xlabel("Frequency [MHz]")
            if ii == 0: ax.set_ylabel("LST [hr]")
            #ax.yaxis_date()
            #ax.yaxis.set_major_formatter(hfmt)
            #

    if not flatten:
        fig.subplots_adjust(left=0.07)
        fig.subplots_adjust(right=0.875)
        cbar_ax = fig.add_axes([0.9, 0.125, 0.025, 0.75])
        cbar = fig.colorbar(im, cax=cbar_ax)

        #plt.subplot(2,3,3)
        #cbar = plt.colorbar()
        cbar.set_label("Temperature [K]")
        cbar.ax.tick_params(axis='y', pad=2)
        #plt.tight_layout()

    if save:
        plt.savefig(os.path.basename(filename)[:-3] + ".png")
    if not no_show:
        plt.show()

    if dump:
        dump_data["lsts"] = lst_stamps
        dump_data["utcs"] = np.array([str(pytime) for pytime in utc_stamps])
        dump_data["frequencies"] = f_leda
        dump_data["options"] = "Flag=" + str(flag) + " Merge=" + str(
            merge) + " Flatten=" + str(flatten) + " All LSTSs=" + str(all_lsts)
        hickle.dump(dump_data, os.path.basename(filename)[:-3] + ".hkl")
Esempio n. 36
0
def main(args):
    # Parse the command line
    ## Baseline list
    if args.baseline is not None:
        ## Fill the baseline list with the conjugates, if needed
        newBaselines = []
        for pair in args.baseline:
            newBaselines.append((pair[1], pair[0]))
        args.baseline.extend(newBaselines)
    ## Polarization
    args.polToPlot = 'XX'
    if args.xy:
        args.polToPlot = 'XY'
    elif args.yx:
        args.polToPlot = 'YX'
    elif args.yy:
        args.polToPlot = 'YY'
    filename = args.filename

    print("Working on '%s'" % os.path.basename(filename))
    # Open the FITS IDI file and access the UV_DATA extension
    hdulist = astrofits.open(filename, mode='readonly')
    andata = hdulist['ANTENNA']
    fqdata = hdulist['FREQUENCY']
    fgdata = None
    for hdu in hdulist[1:]:
        if hdu.header['EXTNAME'] == 'FLAG':
            fgdata = hdu
    uvdata = hdulist['UV_DATA']

    # Pull out various bits of information we need to flag the file
    ## Antenna look-up table
    antLookup = {}
    for an, ai in zip(andata.data['ANNAME'], andata.data['ANTENNA_NO']):
        antLookup[an] = ai
    ## Frequency and polarization setup
    nBand, nFreq, nStk = uvdata.header['NO_BAND'], uvdata.header[
        'NO_CHAN'], uvdata.header['NO_STKD']
    stk0 = uvdata.header['STK_1']
    ## Baseline list
    bls = uvdata.data['BASELINE']
    ## Time of each integration
    obsdates = uvdata.data['DATE']
    obstimes = uvdata.data['TIME']
    inttimes = uvdata.data['INTTIM']
    ## Source list
    srcs = uvdata.data['SOURCE']
    ## Band information
    fqoffsets = fqdata.data['BANDFREQ'].ravel()
    ## Frequency channels
    freq = (numpy.arange(nFreq) -
            (uvdata.header['CRPIX3'] - 1)) * uvdata.header['CDELT3']
    freq += uvdata.header['CRVAL3']
    ## UVW coordinates
    try:
        u, v, w = uvdata.data['UU'], uvdata.data['VV'], uvdata.data['WW']
    except KeyError:
        u, v, w = uvdata.data['UU---SIN'], uvdata.data[
            'VV---SIN'], uvdata.data['WW---SIN']
    uvw = numpy.array([u, v, w]).T
    ## The actual visibility data
    flux = uvdata.data['FLUX'].astype(numpy.float32)

    # Convert the visibilities to something that we can easily work with
    nComp = flux.shape[1] // nBand // nFreq // nStk
    if nComp == 2:
        ## Case 1) - Just real and imaginary data
        flux = flux.view(numpy.complex64)
    else:
        ## Case 2) - Real, imaginary data + weights (drop the weights)
        flux = flux[:, 0::nComp] + 1j * flux[:, 1::nComp]
    flux.shape = (flux.shape[0], nBand, nFreq, nStk)

    # Find unique baselines, times, and sources to work with
    ubls = numpy.unique(bls)
    utimes = numpy.unique(obstimes)
    usrc = numpy.unique(srcs)

    # Convert times to real times
    times = utcjd_to_unix(obsdates + obstimes)
    times = numpy.unique(times)

    # Build a mask
    mask = numpy.zeros(flux.shape, dtype=numpy.bool)
    if fgdata is not None and not args.drop:
        reltimes = obsdates - obsdates[0] + obstimes
        maxtimes = reltimes + inttimes / 2.0 / 86400.0
        mintimes = reltimes - inttimes / 2.0 / 86400.0

        bls_ant1 = bls // 256
        bls_ant2 = bls % 256

        for row in fgdata.data:
            ant1, ant2 = row['ANTS']

            ## Only deal with flags that we need for the plots
            process_flag = False
            if args.include_auto or ant1 != ant2 or ant1 == 0 or ant2 == 0:
                if ant1 == 0 and ant2 == 0:
                    process_flag = True
                elif args.baseline is not None:
                    if ant2 == 0 and ant1 in [a0 for a0, a1 in args.baseline]:
                        process_flag = True
                    elif (ant1, ant2) in args.baseline:
                        process_flag = True
                elif args.ref_ant is not None:
                    if ant1 == args.ref_ant or ant2 == args.ref_ant:
                        process_flag = True
                else:
                    process_flag = True
            if not process_flag:
                continue

            tStart, tStop = row['TIMERANG']
            band = row['BANDS']
            try:
                len(band)
            except TypeError:
                band = [
                    band,
                ]
            cStart, cStop = row['CHANS']
            if cStop == 0:
                cStop = -1
            pol = row['PFLAGS'].astype(numpy.bool)

            if ant1 == 0 and ant2 == 0:
                btmask = numpy.where(
                    ((maxtimes >= tStart) & (mintimes <= tStop)))[0]
            elif ant1 == 0 or ant2 == 0:
                ant1 = max([ant1, ant2])
                btmask = numpy.where( ( (bls_ant1 == ant1) | (bls_ant2 == ant1) ) \
                                      & ( (maxtimes >= tStart) & (mintimes <= tStop) ) )[0]
            else:
                btmask = numpy.where( ( (bls_ant1 == ant1) & (bls_ant2 == ant2) ) \
                                      & ( (maxtimes >= tStart) & (mintimes <= tStop) ) )[0]
            for b, v in enumerate(band):
                if not v:
                    continue
                mask[btmask, b, cStart - 1:cStop, :] |= pol

    plot_bls = []
    cross = []
    for i in xrange(len(ubls)):
        bl = ubls[i]
        ant1, ant2 = (bl >> 8) & 0xFF, bl & 0xFF
        if args.include_auto or ant1 != ant2:
            if args.baseline is not None:
                if (ant1, ant2) in args.baseline:
                    plot_bls.append(bl)
                    cross.append(i)
            elif args.ref_ant is not None:
                if ant1 == args.ref_ant or ant2 == args.ref_ant:
                    plot_bls.append(bl)
                    cross.append(i)
            else:
                plot_bls.append(bl)
                cross.append(i)
    nBL = len(cross)

    # Decimation, if needed
    if args.decimate > 1:
        if nFreq % args.decimate != 0:
            raise RuntimeError(
                "Invalid freqeunce decimation factor:  %i %% %i = %i" %
                (nFreq, args.decimate, nFreq % args.decimate))

        nFreq //= args.decimate
        freq.shape = (freq.size // args.decimate, args.decimate)
        freq = freq.mean(axis=1)

        flux.shape = (flux.shape[0], flux.shape[1],
                      flux.shape[2] // args.decimate, args.decimate,
                      flux.shape[3])
        flux = flux.mean(axis=3)

        mask.shape = (mask.shape[0], mask.shape[1],
                      mask.shape[2] // args.decimate, args.decimate,
                      mask.shape[3])
        mask = mask.mean(axis=3)

    good = numpy.arange(freq.size // 8,
                        freq.size * 7 // 8)  # Inner 75% of the band

    # NOTE: Assumes that the Stokes parameters increment by -1
    namMapper = {}
    for i in xrange(nStk):
        stk = stk0 - i
        namMapper[i] = NUMERIC_STOKES[stk]
    polMapper = {'XX': 0, 'YY': 1, 'XY': 2, 'YX': 3}

    fig1 = plt.figure()
    fig2 = plt.figure()
    fig3 = plt.figure()
    fig4 = plt.figure()
    fig5 = plt.figure()

    k = 0
    nRow = int(numpy.sqrt(len(plot_bls)))
    nCol = int(numpy.ceil(len(plot_bls) * 1.0 / nRow))
    for b in xrange(len(plot_bls)):
        bl = plot_bls[b]
        valid = numpy.where(bls == bl)[0]
        i, j = (bl >> 8) & 0xFF, bl & 0xFF
        dTimes = obsdates[valid] + obstimes[valid]
        dTimes -= dTimes[0]
        dTimes *= 86400.0

        ax1, ax2, ax3, ax4, ax5 = None, None, None, None, None
        for band, offset in enumerate(fqoffsets):
            frq = freq + offset
            vis = numpy.ma.array(flux[valid, band, :,
                                      polMapper[args.polToPlot]],
                                 mask=mask[valid, band, :,
                                           polMapper[args.polToPlot]])

            ax1 = fig1.add_subplot(nRow,
                                   nCol * nBand,
                                   nBand * k + 1 + band,
                                   sharey=ax1)
            ax1.imshow(numpy.ma.angle(vis),
                       extent=(frq[0] / 1e6, frq[-1] / 1e6, dTimes[0],
                               dTimes[-1]),
                       origin='lower',
                       vmin=-numpy.pi,
                       vmax=numpy.pi,
                       interpolation='nearest')
            ax1.axis('auto')
            ax1.set_xlabel('Frequency [MHz]')
            if band == 0:
                ax1.set_ylabel('Elapsed Time [s]')
            ax1.set_title("%i,%i - %s" %
                          (i, j, namMapper[polMapper[args.polToPlot]]))
            ax1.set_xlim((frq[0] / 1e6, frq[-1] / 1e6))
            ax1.set_ylim((dTimes[0], dTimes[-1]))

            ax2 = fig2.add_subplot(nRow,
                                   nCol * nBand,
                                   nBand * k + 1 + band,
                                   sharey=ax2)
            amp = numpy.ma.abs(vis)
            vmin, vmax = percentile(amp, 1), percentile(amp, 99)
            ax2.imshow(amp,
                       extent=(frq[0] / 1e6, frq[-1] / 1e6, dTimes[0],
                               dTimes[-1]),
                       origin='lower',
                       interpolation='nearest',
                       vmin=vmin,
                       vmax=vmax)
            ax2.axis('auto')
            ax2.set_xlabel('Frequency [MHz]')
            if band == 0:
                ax2.set_ylabel('Elapsed Time [s]')
            ax2.set_title("%i,%i - %s" %
                          (i, j, namMapper[polMapper[args.polToPlot]]))
            ax2.set_xlim((frq[0] / 1e6, frq[-1] / 1e6))
            ax2.set_ylim((dTimes[0], dTimes[-1]))

            ax3 = fig3.add_subplot(nRow,
                                   nCol * nBand,
                                   nBand * k + 1 + band,
                                   sharey=ax3)
            ax3.plot(frq / 1e6, numpy.ma.abs(vis.mean(axis=0)))
            ax3.set_xlabel('Frequency [MHz]')
            if band == 0:
                ax3.set_ylabel('Mean Vis. Amp. [lin.]')
            ax3.set_title("%i,%i - %s" %
                          (i, j, namMapper[polMapper[args.polToPlot]]))
            ax3.set_xlim((frq[0] / 1e6, frq[-1] / 1e6))

            ax4 = fig4.add_subplot(nRow,
                                   nCol * nBand,
                                   nBand * k + 1 + band,
                                   sharey=ax4)
            ax4.plot(numpy.ma.angle(vis[:, good].mean(axis=1)) * 180 /
                     numpy.pi,
                     dTimes,
                     linestyle='',
                     marker='+')
            ax4.set_xlim((-180, 180))
            ax4.set_xlabel('Mean Vis. Phase [deg]')
            if band == 0:
                ax4.set_ylabel('Elapsed Time [s]')
            ax4.set_title("%i,%i - %s" %
                          (i, j, namMapper[polMapper[args.polToPlot]]))
            ax4.set_ylim((dTimes[0], dTimes[-1]))

            ax5 = fig5.add_subplot(nRow,
                                   nCol * nBand,
                                   nBand * k + 1 + band,
                                   sharey=ax5)
            ax5.plot(numpy.ma.abs(vis[:, good].mean(axis=1)) * 180 / numpy.pi,
                     dTimes,
                     linestyle='',
                     marker='+')
            ax5.set_xlabel('Mean Vis. Amp. [lin.]')
            if band == 0:
                ax5.set_ylabel('Elapsed Time [s]')
            ax5.set_title("%i,%i - %s" %
                          (i, j, namMapper[polMapper[args.polToPlot]]))
            ax5.set_ylim((dTimes[0], dTimes[-1]))

            if band > 0:
                for ax in (ax1, ax2, ax3, ax4, ax5):
                    plt.setp(ax.get_yticklabels(), visible=False)
            if band < nBand - 1:
                for ax in (ax1, ax2, ax3, ax4, ax5):
                    xticks = ax.xaxis.get_major_ticks()
                    xticks[-1].label1.set_visible(False)

        k += 1

    for f in (fig1, fig2, fig3, fig4, fig5):
        f.suptitle(
            "%s to %s UTC" %
            (datetime.utcfromtimestamp(times[0]).strftime("%Y/%m/%d %H:%M"),
             datetime.utcfromtimestamp(times[-1]).strftime("%Y/%m/%d %H:%M")))
        if nBand > 1:
            f.subplots_adjust(wspace=0.0)

    plt.show()
Esempio n. 37
0
def main(args):
    # Set the station
    if args.metadata is not None:
        station = stations.parse_ssmif(args.metadata)
        ssmifContents = open(args.metadata).readlines()
    else:
        station = stations.lwa1
        ssmifContents = open(os.path.join(dataPath,
                                          'lwa1-ssmif.txt')).readlines()
    antennas = station.antennas

    toKeep = []
    for g in (1, 10, 54, 248, 251, 258):
        for i, ant in enumerate(antennas):
            if ant.stand.id == g and ant.pol == 0:
                toKeep.append(i)
    for i, j in enumerate(toKeep):
        print(i, j, antennas[j].stand.id)

    # Length of the FFT
    LFFT = args.fft_length

    # Make sure that the file chunk size contains is an integer multiple
    # of the FFT length so that no data gets dropped
    maxFrames = int((30000 * 260) / float(LFFT)) * LFFT
    # It seems like that would be a good idea, however...  TBW data comes one
    # capture at a time so doing something like this actually truncates data
    # from the last set of stands for the first integration.  So, we really
    # should stick with
    maxFrames = (30000 * 260)

    fh = open(args.filename, "rb")
    nFrames = os.path.getsize(args.filename) // tbw.FRAME_SIZE
    dataBits = tbw.get_data_bits(fh)
    # The number of ant/pols in the file is hard coded because I cannot figure out
    # a way to get this number in a systematic fashion
    antpols = len(antennas)
    nChunks = int(math.ceil(1.0 * nFrames / maxFrames))
    if dataBits == 12:
        nSamples = 400
    else:
        nSamples = 1200

    # Read in the first frame and get the date/time of the first sample
    # of the frame.  This is needed to get the list of stands.
    junkFrame = tbw.read_frame(fh)
    fh.seek(0)
    beginTime = junkFrame.time
    beginDate = junkFrame.time.datetime

    # File summary
    print("Filename: %s" % args.filename)
    print("Date of First Frame: %s" % str(beginDate))
    print("Ant/Pols: %i" % antpols)
    print("Sample Length: %i-bit" % dataBits)
    print("Frames: %i" % nFrames)
    print("Chunks: %i" % nChunks)
    print("===")

    nChunks = 1

    # Skip over any non-TBW frames at the beginning of the file
    i = 0
    junkFrame = tbw.read_frame(fh)
    while not junkFrame.header.is_tbw:
        try:
            junkFrame = tbw.read_frame(fh)
        except errors.SyncError:
            fh.seek(0)
            while True:
                try:
                    junkFrame = tbn.read_frame(fh)
                    i += 1
                except errors.SyncError:
                    break
            fh.seek(-2 * tbn.FRAME_SIZE, 1)
            junkFrame = tbw.read_frame(fh)
        i += 1
    fh.seek(-tbw.FRAME_SIZE, 1)
    print("Skipped %i non-TBW frames at the beginning of the file" % i)

    # Master loop over all of the file chunks
    masterSpectra = numpy.zeros((nChunks, antpols, LFFT))
    for i in range(nChunks):
        # Find out how many frames remain in the file.  If this number is larger
        # than the maximum of frames we can work with at a time (maxFrames),
        # only deal with that chunk
        framesRemaining = nFrames - i * maxFrames
        if framesRemaining > maxFrames:
            framesWork = maxFrames
        else:
            framesWork = framesRemaining
        print("Working on chunk %i, %i frames remaining" %
              ((i + 1), framesRemaining))

        data = numpy.zeros((12, 12000000), dtype=numpy.int16)
        # If there are fewer frames than we need to fill an FFT, skip this chunk
        if data.shape[1] < 2 * LFFT:
            break
        # Inner loop that actually reads the frames into the data array
        for j in range(framesWork):
            # Read in the next frame and anticipate any problems that could occur
            try:
                cFrame = tbw.read_frame(fh)
            except errors.EOFError:
                break
            except errors.SyncError:
                #print("WARNING: Mark 5C sync error on frame #%i" % (int(fh.tell())/tbw.FRAME_SIZE-1))
                continue
            if not cFrame.header.is_tbw:
                continue

            stand = cFrame.header.id
            # In the current configuration, stands start at 1 and go up to 10.  So, we
            # can use this little trick to populate the data array
            aStand = 2 * (stand - 1)
            #if cFrame.header.frame_count % 10000 == 0 and config['verbose']:
            #print("%3i -> %3i  %6.3f  %5i  %i" % (stand, aStand, cFrame.time, cFrame.header.frame_count, cFrame.payload.timetag))

            # Actually load the data.  x pol goes into the even numbers, y pol into the
            # odd numbers
            count = cFrame.header.frame_count - 1
            if aStand not in toKeep:
                continue

            # Convert to reduced index
            aStand = 2 * toKeep.index(aStand)

            data[aStand, count * nSamples:(count + 1) *
                 nSamples] = cFrame.payload.data[0, :]
            data[aStand + 1, count * nSamples:(count + 1) *
                 nSamples] = cFrame.payload.data[1, :]

        # Time series analysis - mean, std. dev, saturation count
        tsMean = data.mean(axis=1)
        tsStd = data.std(axis=1)
        tsSat = numpy.where((data == 2047) | (data == -2047), 1, 0).sum(axis=1)

        # Time series analysis - percentiles
        p = [50, 75, 90, 95, 99]
        tsPct = numpy.zeros((data.shape[0], len(p)))
        for i in xrange(len(p)):
            for j in xrange(data.shape[0]):
                tsPct[j, i] = percentile(numpy.abs(data[j, :]), p[i])

        # Frequency domain analysis - spectra
        freq = numpy.fft.fftfreq(2 * args.fft_length, d=1.0 / 196e6)
        freq = freq[:args.fft_length]

        delays = numpy.zeros((data.shape[0], freq.size))
        signalsF, validF = FEngine(data,
                                   freq,
                                   delays,
                                   LFFT=args.fft_length,
                                   Overlap=1,
                                   sample_rate=196e6,
                                   clip_level=0)

        # Cleanup to save memory
        del validF, data
        print(signalsF.shape)

        # SK control values
        skM = signalsF.shape[2]
        skN = 1

        # Frequency domain analysis -  spectral kurtosis
        k = numpy.zeros((signalsF.shape[0], signalsF.shape[1]))
        for l in xrange(signalsF.shape[0]):
            for m in xrange(freq.size):
                k[l, m] = kurtosis.spectral_fft(signalsF[l, m, :])
        kl, kh = kurtosis.get_limits(4, skM, skN)
        print(kl, kh)

        # Integrate the spectra for as long as we can
        masterSpectra = (numpy.abs(signalsF)**2).mean(axis=2)
        del signalsF

        # Mask out bad values (high spectral kurtosis) for the plot
        mask = numpy.where((k < kl) | (k > kh), 1, 0)
        mask = expandMask(mask, radius=4, merge=True)

        masterSpectra = numpy.ma.array(masterSpectra, mask=mask)

        # Save the data to an HDF5 file
        outname = os.path.splitext(args.filename)[0]
        outname = "%s-RFI.hdf5" % outname

        f = h5py.File(outname, 'w')
        f.attrs['filename'] = args.filename
        f.attrs['mode'] = 'TBW'
        f.attrs['station'] = 'LWA-1'
        f.attrs['dataBits'] = dataBits
        f.attrs['startTime'] = beginTime
        f.attrs['startTime_units'] = 's'
        f.attrs['startTime_sys'] = 'unix'
        f.attrs['sample_rate'] = 196e6
        f.attrs['sample_rate_units'] = 'Hz'
        f.attrs['RBW'] = freq[1] - freq[0]
        f.attrs['RBW_Units'] = 'Hz'

        f.attrs['SK-M'] = skM
        f.attrs['SK-N'] = skN

        for l in xrange(len(toKeep)):
            antX = antennas[toKeep[l]]
            antY = antennas[toKeep[l] + 1]

            stand = f.create_group('Stand%03i' % antX.stand.id)
            stand['freq'] = freq
            stand['freq'].attrs['Units'] = 'Hz'

            polX = stand.create_group('X')
            polY = stand.create_group('Y')
            polX.attrs['tsMean'] = tsMean[2 * l]
            polY.attrs['tsMean'] = tsMean[2 * l + 1]
            polX.attrs['tsStd'] = tsStd[2 * l]
            polY.attrs['tsStd'] = tsStd[2 * l + 1]
            polX.attrs['tsSat'] = tsSat[2 * l]
            polY.attrs['tsSat'] = tsSat[2 * l + 1]
            for i, v in enumerate(p):
                polX.attrs['ts%02i' % v] = tsPct[2 * l][i]
                polY.attrs['ts%02i' % v] = tsPct[2 * l + 1][i]

            polX['spectrum'] = masterSpectra[2 * l, :]
            polX['spectrum'].attrs['axis0'] = 'frequency'
            polY['spectrum'] = masterSpectra[2 * l + 1, :]
            polY['spectrum'].attrs['axis0'] = 'frequency'

            polX['kurtosis'] = k[2 * l, :]
            polX['kurtosis'].attrs['axis0'] = 'frequency'
            polY['kurtosis'] = k[2 * l + 1, :]
            polY['kurtosis'].attrs['axis0'] = 'frequency'

        # The plot
        fig = plt.figure()
        ax1 = fig.add_subplot(2, 1, 1)
        ax2 = fig.add_subplot(2, 1, 2)
        for l in xrange(k.shape[0]):
            ant = antennas[toKeep[l / 2]]

            ax1.plot(freq / 1e6,
                     numpy.log10(masterSpectra[l, :]) * 10,
                     label='Stand %i, Pol %i' %
                     (ant.stand.id, ant.pol + l % 2))

            ax2.plot(freq / 1e6,
                     k[l, :],
                     label='Stand %i, Pol %i' %
                     (ant.stand.id, ant.pol + l % 2))

        ax2.hlines(kl,
                   freq[0] / 1e6,
                   freq[-1] / 1e6,
                   linestyle=':',
                   label='Kurtosis Limit 4$\sigma$')
        ax2.hlines(kh,
                   freq[0] / 1e6,
                   freq[-1] / 1e6,
                   linestyle=':',
                   label='Kurtosis Limit 4$\sigma$')

        ax1.set_xlabel('Frequency [MHz]')
        ax1.set_ylabel('PSD [arb. dB/RBW]')
        ax1.legend(loc=0)

        ax2.set_ylim((kl / 2, kh * 2))
        ax2.set_xlabel('Frequency [MHz]')
        ax2.set_ylabel('Spectral Kurtosis')
        ax2.legend(loc=0)

        plt.show()
Esempio n. 38
0
def calculateCI(Vr,
                years,
                nodata,
                minRecords,
                yrsPerSim=1,
                sample_size=50,
                prange=90):
    """
    Fit a GEV to the wind speed records for a 2-D extent of
    wind speed values, providing a confidence range by resampling at
    random from the input values.

    :param Vr: `numpy.ndarray` of wind speeds (3-D - event, lat, lon)
    :param years: `numpy.ndarray` of years for which to evaluate
                  return period values.
    :param float nodata: missing data value.
    :param int minRecords: minimum number of valid wind speed values required
                           to fit distribution.
    :param int yrsPerSim: Values represent block maxima - this value indicates
                          the time span of the block (default 1).
    :param int sample_size: number of records to randomly sample for calculating
                            confidence interval of the fit.
    :param float prange: percentile range.


    :return: `numpy.ndarray` of return period wind speed values

    """

    lower = (100 - prange) / 2.
    upper = 100. - lower

    nrecords = Vr.shape[0]
    nsamples = nrecords / sample_size
    RpUpper = nodata * np.ones(
        (len(years), Vr.shape[1], Vr.shape[2]), dtype='f')
    RpLower = nodata * np.ones(
        (len(years), Vr.shape[1], Vr.shape[2]), dtype='f')

    w = np.zeros((len(years), nsamples), dtype='f')
    wUpper = np.zeros((len(years)), dtype='f')
    wLower = np.zeros((len(years)), dtype='f')

    for i in xrange(Vr.shape[1]):
        for j in xrange(Vr.shape[2]):
            if Vr[:, i, j].max() > 0.0:
                random.shuffle(Vr[:, i, j])
                for n in xrange(nsamples):
                    nstart = n * sample_size
                    nend = (n + 1) * sample_size - 1
                    vsub = Vr[nstart:nend, i, j]

                    vsub.sort()
                    if vsub.max() > 0.:
                        w[:, n], loc, scale, shp = evd.estimateEVD(
                            vsub, years, nodata, minRecords / 10, yrsPerSim)

                for n in range(len(years)):
                    wUpper[n] = percentile(w[n, :], upper)
                    wLower[n] = percentile(w[n, :], lower)

                RpUpper[:, i, j] = wUpper
                RpLower[:, i, j] = wLower

    return RpUpper, RpLower
Esempio n. 39
0
def calculateCI(Vr,
                years,
                nodata,
                minRecords,
                yrsPerSim=1,
                sample_size=50,
                prange=90):
    """
    Fit a GEV to the wind speed records for a 2-D extent of
    wind speed values, providing a confidence range by resampling at
    random from the input values.

    :param Vr: `numpy.ndarray` of wind speeds (3-D - event, lat, lon)
    :param years: `numpy.ndarray` of years for which to evaluate
                  return period values.
    :param float nodata: missing data value.
    :param int minRecords: minimum number of valid wind speed values required
                           to fit distribution.
    :param int yrsPerSim: Values represent block maxima - this value indicates
                          the time span of the block (default 1).
    :param int sample_size: number of records to randomly sample for calculating
                            confidence interval of the fit.
    :param float prange: percentile range.


    Return:
    -------

    :param RpUpper: Upper CI return period wind speed values for each lat/lon
    :param RpLower: Lower CI return period wind speed values for each lat/lon

    """

    lower = (100 - prange) / 2.  # 5th percentile default
    upper = 100. - lower  # 95th percentile default

    nrecords = Vr.shape[
        0]  # number of years (since we have aggregated into 1/yr)
    nsamples = nrecords / sample_size  # number of iterations to perform

    # RpUpper/RpLower = years x lat x lon
    RpUpper = nodata * np.ones(
        (len(years), Vr.shape[1], Vr.shape[2]), dtype='f')
    RpLower = nodata * np.ones(
        (len(years), Vr.shape[1], Vr.shape[2]), dtype='f')

    # w: years x number of iterations
    w = np.zeros((len(years), nsamples), dtype='f')
    wUpper = np.zeros((len(years)), dtype='f')
    wLower = np.zeros((len(years)), dtype='f')

    for i in xrange(Vr.shape[1]):  # lat
        for j in xrange(Vr.shape[2]):  # lon
            if Vr[:, i, j].max() > 0.0:  # check for valid data
                random.shuffle(Vr[:, i, j])  # shuffle the years
                for n in xrange(
                        nsamples):  # iterate through fitting of random samples
                    nstart = n * sample_size
                    nend = (n + 1) * sample_size - 1
                    vsub = Vr[nstart:nend, i,
                              j]  # select random 50(default) events

                    vsub.sort()
                    if vsub.max() > 0.:
                        # Perform the fitting on a random subset of samples
                        w[:, n], loc, scale, shp = evd.gevfit(
                            vsub, years, nodata, minRecords / 10, yrsPerSim)

                # Pull out the upper and lower percentiles from the random sample fits
                for n in range(len(years)):
                    wUpper[n] = percentile(w[n, :], upper)
                    wLower[n] = percentile(w[n, :], lower)

                # Store upper and lower percentiles for each return period, for each grid cell
                RpUpper[:, i, j] = wUpper
                RpLower[:, i, j] = wLower

    return RpUpper, RpLower