def plot_correlations(self):
     smoothing_window = max(1, int(len(self.raw_data[self.targets[0]]) / 10))
     for c_f in self.features:            
         grouped_features = mlab.rec_groupby(self.normalized_data, [c_f], [(c_f, len, 'count')])
         is_discrete = len(grouped_features) < 100
         if c_f in self.raw_data:
             self.raw_data.sort(order=c_f)
         is_date = c_f.find('date') >= 0
         if is_date:
             self.raw_data.sort(order=c_f)
         else:
             self.normalized_data.sort(order=c_f)
         for c_t in self.targets:
             try:
                 f = plt.figure()
                 if is_discrete:
                     d = mlab.rec_groupby(self.normalized_data, [c_f], [(c_t, numpy.average, 'avg')])
                     plt.bar(d[c_f], d['avg'])
                 else:
                     y = None
                     if c_t in self.raw_data and self.raw_data[c_t].dtype == '>f4':
                         y = self.raw_data[c_t]
                     else:
                         y = self.normalized_data[c_t]        
                         convolved_y = numpy.convolve(numpy.ones(smoothing_window, 'd')/smoothing_window, y, mode='valid')
                         x = self.raw_data[c_f] if is_date else self.normalized_data[c_f]
                         plt.plot(x[0:convolved_y.shape[0]], convolved_y)
                         if is_date: f.autofmt_xdate()
                 plt.ylabel(c_t)
                 plt.xlabel(c_f)
                 plt.savefig("%s/%s_x_%s" % ('plots', c_f, c_t))
             except:
                 print "Error creating plot (%s, %s)" % (c_f, c_t)
Exemple #2
0
def get_recommended_pricing(price_history):
    # Get recommended availability zone
    zones_stats = []
    for zone in price_history:
        price_arr = mlab.rec_groupby(
            price_history[zone], ('timestamp',), (('price', np.max, 'max_price'),))['max_price']
        three_sigma = np.mean(price_arr) + (3 * np.std(price_arr))
        percentile = np.percentile(price_arr, 99)
        zones_stats.append((zone, max(three_sigma, percentile)))
    zone, stat = sorted(zones_stats, key=lambda x: x[1])[0]

    price_arr = mlab.rec_groupby(
        price_history[zone], ('timestamp',), (('price', np.max, 'max_price'),))['max_price']

    n = len(price_arr)
    base = np.arange(n, dtype=np.float) / n
    weights = np.tanh((base - 0.7) / 0.2)
    weights = (weights - np.min(weights)) / (np.max(weights) - np.min(weights)) + 1
    weights /= np.sum(weights)

    weighted = np.multiply(weights, price_arr)

    price = np.max(
        price_arr[np.where(weighted >= np.percentile(weighted, 99, interpolation='nearest'))])

    return (zone, price)
def estimate_var(new_data, groupbyyear=False):
    summary_list = [(nm, summarize_simple, nm+'_var') for nm in new_data.dtype.names \
                        if nm not in ['year', 'weekoftheyear']]
    if groupbyyear:
        return rec_groupby(new_data, ['year'], summary_list)
    else :
        return rec_groupby(new_data, [], summary_list)
Exemple #4
0
 def plot_correlations(self):
     smoothing_window = max(1, int(len(self.raw_data[self.targets[0]]) / 10))
     for c_f in self.features:            
         grouped_features = mlab.rec_groupby(self.normalized_data, [c_f], [(c_f, len, 'count')])
         is_discrete = len(grouped_features) < 100
         if c_f in self.raw_data:
             self.raw_data.sort(order=c_f)
         is_date = c_f.find('date') >= 0
         if is_date:
             self.raw_data.sort(order=c_f)
         else:
             self.normalized_data.sort(order=c_f)
         for c_t in self.targets:
             try:
                 f = plt.figure()
                 if is_discrete:
                     d = mlab.rec_groupby(self.normalized_data, [c_f], [(c_t, numpy.average, 'avg')])
                     plt.bar(d[c_f], d['avg'])
                 else:
                     y = None
                     if c_t in self.raw_data and self.raw_data[c_t].dtype == '>f4':
                         y = self.raw_data[c_t]
                     else:
                         y = self.normalized_data[c_t]        
                         convolved_y = numpy.convolve(numpy.ones(smoothing_window, 'd')/smoothing_window, y, mode='valid')
                         x = self.raw_data[c_f] if is_date else self.normalized_data[c_f]
                         plt.plot(x[0:convolved_y.shape[0]], convolved_y)
                         if is_date: f.autofmt_xdate()
                 plt.ylabel(c_t)
                 plt.xlabel(c_f)
                 plt.savefig("%s/%s_x_%s" % ('plots', c_f, c_t))
             except:
                 print "Error creating plot (%s, %s)" % (c_f, c_t)
def gethistprices(query, numrows=1000, **kwargs):
    
    rec_arr = sqlite2rec(query, **kwargs)

    import matplotlib.mlab as mlab
    
    import numpy as np

    (syms, posuniq, pos) = np.unique(rec_arr.sym, True, True)
    
    new_rec_arr = mlab.rec_append_fields(rec_arr, 'idx', pos)
    
    nosym = mlab.rec_drop_fields(new_rec_arr, ['sym',])
    
    recnumrecs = mlab.rec_groupby(nosym, ('idx',), (('idx', len, 'idxcount'), ))

    idx = np.nonzero(recnumrecs.idxcount >= numrows)[0]

    idxcount = len(recnumrecs[idx])

    xs = np.empty((idxcount, numrows, len(nosym[0])-1), dtype=float)

    for i in xrange(idxcount):

        if kwargs.has_key('verbose') and kwargs['verbose'] and i % 50 == 0:
            print '%d of %d' % (i, idxcount)
        
        curdata = nosym[nosym.idx == idx[i]]

        curdata_arr = np.array(curdata.tolist(), dtype=float)
        xs[i] = curdata_arr[0:numrows:,0:-1]
        
    return (syms[idx], xs)
    def write_hex_stats(self, data, id_field, stat_fields, min_plots_per_hex,
            out_file):

        # Summarize the observed output
        stats = mlab.rec_groupby(data, (id_field,), stat_fields)

        # Filter so that the minimum number of plots per hex is maintained
        stats = stats[stats.PLOT_COUNT >= min_plots_per_hex]

        # Write out the file
        utilities.rec2csv(stats, out_file)
Exemple #7
0
    def write_hex_stats(self, data, id_field, stat_fields, min_plots_per_hex,
                        out_file):

        # Summarize the observed output
        stats = mlab.rec_groupby(data, (id_field, ), stat_fields)

        # Filter so that the minimum number of plots per hex is maintained
        stats = stats[stats.PLOT_COUNT >= min_plots_per_hex]

        # Write out the file
        utilities.rec2csv(stats, out_file)
Exemple #8
0
def print_price_history(price_history, recommend=True):
    fields = ['Zone', 'Current', 'Min', 'Max', 'Mean', 'Std']
    print('{:13}{:9}{:8}{:8}{:8}{:8}'.format(*fields))
    for zone in sorted(price_history.keys()):
        price_arr = mlab.rec_groupby(
            price_history[zone], ('timestamp',), (('price', np.max, 'max_price'),))['max_price']
        row = [zone, price_arr[-1], np.min(price_arr), np.max(price_arr),
               np.mean(price_arr), np.std(price_arr)]
        print('{:13}${:<8.2f}${:<7.2f}${:<7.2f}${:<7.2f}${:<7.2f}'.format(*row))
    if recommend:
        rec_zone, rec_price = get_recommended_pricing(price_history)
        print('RECOMMENDED BIDDING (ZONE: {}, PRICE: {:.2f})'.format(rec_zone, rec_price))
Exemple #9
0
def compute_probability_distribution(arr, is_cum_sum=True):
    # stats used - count

    agg_data = utils.array2recarray( arr )
    stats = (
        ("key", len, 'total'),
    )

    res = mlab.rec_groupby(agg_data, ('key',), stats)
    if is_cum_sum:
        cumsum = np.cumsum(res.total/float(len(arr)))
    else:
        cumsum = res.total/float(len(arr))
    # return array with the keys and the cummulative distribution
    return np.array([res.key, cumsum])
Exemple #10
0
def plot_price_history(price_history, plot_hist=False):
    num_zones = len(price_history.keys())

    plt.ion()
    fig_price = plt.figure(figsize=(15, 8))
    ax_price = fig_price.add_subplot(111)
    ax_price.xaxis.set_major_locator(DayLocator())
    ax_price.xaxis.set_minor_locator(HourLocator())
    ax_price.xaxis.set_major_formatter(DateFormatter('%b %d'))
    ax_price.autoscale_view()
    ax_price.grid(True)

    if plot_hist:
        num_rows = int(num_zones / 2) + (num_zones % 2)
        fig_hist = plt.figure(figsize=(15, 8))
        fig_hist.set_tight_layout(True)

    colors = plt.cm.Spectral(np.linspace(0, 1, num_zones))
    min_date, max_date = datetime.today(), datetime(1970, 1, 1)
    for zone, color, i in zip(sorted(price_history.keys()), colors, range(1, num_zones + 1)):
        # Plot price history curves
        grouped = mlab.rec_groupby(
            price_history[zone], ('timestamp',), (('price', np.max, 'max_price'),))
        price_arr, date_arr = grouped['max_price'], [
            x.astype(datetime) for x in grouped['timestamp']]
        price_stats = [zone, price_arr[-1],
                       np.min(price_arr), np.max(price_arr), np.mean(price_arr), np.std(price_arr)]
        label = '{:14}current: ${:<6.2f}min: ${:<6.2f}max: ${:<6.2f}mean: ${:<6.2f}std: ${:<6.2f}'.format(
            *price_stats)
        ax_price.plot_date(date_arr, price_arr, '-', color=color, linewidth=1.5, label=label)
        # Plot price history histogram
        if plot_hist:
            ax_hist = fig_hist.add_subplot(num_rows, 2, i)
            ax_hist.hist(price_arr, 200, range=(0, np.max(price_arr) + 0.5), color=color, alpha=0.7)
            ax_hist.set_title('{} (examples: {})'.format(zone, price_arr.size))
            ax_hist.set_xlabel("Price")
            ax_hist.set_ylabel("Frequency")
        # Calculating time boundaries
        min_date, max_date = min(min_date, np.min(date_arr)), max(max_date, np.max(date_arr))

    rec_zone, rec_price = get_recommended_pricing(price_history)
    label = 'RECOMMENDED BIDDING (ZONE: {}, PRICE: {:.2f})'.format(rec_zone, rec_price)
    ax_price.plot_date([min_date, max_date], [rec_price, rec_price], 'r-', linewidth=2, label=label)

    ax_price.legend(loc='upper center', fancybox=True, shadow=True, ncol=1).draggable()

    return fig_price, ax_price, fig_hist if plot_hist else None
Exemple #11
0
    def measure_fit(self):
        ''' Provide metrics of fit to determine how well the model performed '''
        # TODO: code up RMSE for non-holdout predictions
        if self.training_type == 'make predictions':
            print 'RMSE for non-holdout data not yet implemented'

        # calculate age-adjusted rates on the test data
        else:
            predicted = self.predictions[['country','year','age','pop','actual_deaths', 'mean_deaths', 'upper_deaths', 'lower_deaths']].view(np.recarray)
            predicted = recfunctions.append_fields(predicted, 'mean_rate', predicted.mean_deaths / predicted.pop * 100000.).view(np.recarray)
            predicted = recfunctions.append_fields(predicted, 'actual_rate', predicted.actual_deaths / predicted.pop * 100000.).view(np.recarray)
            predicted = recfunctions.append_fields(predicted, 'weight', np.ones(predicted.shape[0])).view(np.recarray)
            for a in self.age_list:
                predicted.weight[np.where(predicted.age==a)[0]] = self.age_weights.weight[np.where(self.age_weights.age==a)[0]]
            predicted.mean_rate = predicted.mean_rate * predicted.weight
            predicted.actual_rate = predicted.actual_rate * predicted.weight
            from matplotlib import mlab
            adj_rates = mlab.rec_groupby(predicted, ('country','year'), (('mean_rate', np.sum, 'adj_mean_rate'),('actual_rate', np.sum, 'adj_actual_rate')))

            # calculate RMSE/RMdSE
            err = adj_rates.adj_mean_rate - adj_rates.adj_actual_rate
            sq_err = err ** 2.
            mse = np.mean(sq_err)
            mdse = np.median(sq_err)
            rmse = np.sqrt(mse)
            rmdse = np.sqrt(mdse)

            # calculate AARE/MdARE
            abs_rel_err = np.abs(err / adj_rates.adj_actual_rate)
            aare = np.mean(abs_rel_err)
            mdare = np.median(abs_rel_err)

            # calculate coverage (age-specific, not age-adjusted)
            coverage = np.array((predicted.upper_deaths >= predicted.actual_deaths) & (predicted.lower_deaths <= predicted.actual_deaths)).astype(np.int).mean()

            # output fit metrics
            print 'Root Mean Square Error: ' + str(rmse), '\nRoot Median Square Error: ' + str(rmdse), '\nAverage Absolute Relative Error: ' + str(aare), '\nMedian Absolute Relative Error: ' + str(mdare), '\nCoverage: ' + str(coverage)
            pl.rec2csv(np.core.records.fromarrays([np.array(('rmse','rmdse','aare','mdare','coverage')),np.array((rmse,rmdse,aare,mdare,coverage))], names=['metric','value']), '/home/j/Project/Causes of Death/CoDMod/tmp/' + self.name + '_fits_' + self.cause + '_' + self.sex + '.csv')
# Generate numpy array
tx_recs       = log_np = log_util.log_data_to_np_arrays(log_data, tx_log_index)

# Define the fields to group by
group_fields = ('addr1',)

# Define the aggregation functions
stat_calc = (
    ('retry_count',  np.mean, 'avg_num_tx'),
    ('length',       len,     'num_pkts'),
    ('length',       np.mean, 'avg_len'),
    ('length',       sum,     'tot_len'),
    ('time_to_done', np.mean, 'avg_time'))

# Calculate the aggregate statistics
tx_stats = mlab.rec_groupby(tx_recs, group_fields, stat_calc)

# Display the results
print('\nTx Statistics for {0}:\n'.format(LOGFILE))

print('{0:^18} | {1:^8} | {2:^10} | {3:^14} | {4:^11} | {5:^5}'.format(
    'Dest Addr',
    'Num MPDUs',
    'Avg Length',
    'Total Tx Bytes',
    'Avg Tx Time',
    'Avg Num Tx'))

for ii in range(len(tx_stats)):
    print('{0:<18} | {1:8d} | {2:10.1f} | {3:14} | {4:11.3f} | {5:5.1f}'.format(
        wlan_exp_util.mac_addr_to_str(tx_stats['addr1'][ii]),
Exemple #13
0
log_np = log_util.log_data_to_np_arrays(log_data, tx_log_index)
log_tx = log_np['TX_HIGH']

# Define the fields to group by
group_fields = ('addr1',)

# Define the aggregation functions
stat_calc = (
    ('num_tx',       np.mean, 'avg_num_tx'),
    ('length',       len,     'num_pkts'),
    ('length',       np.mean, 'avg_len'),
    ('length',       sum,     'tot_len'),
    ('time_to_done', np.mean, 'avg_time'))

# Calculate the aggregate statistics
tx_stats = mlab.rec_groupby(log_tx, group_fields, stat_calc)

# Display the results
print('\nTx Statistics for {0}:\n'.format(os.path.basename(LOGFILE)))

print('{0:^18} | {1:^9} | {2:^10} | {3:^14} | {4:^11} | {5:^5}'.format(
    'Dest Addr',
    'Num MPDUs',
    'Avg Length',
    'Total Tx Bytes',
    'Avg Tx Time',
    'Avg Num Tx'))

for ii in range(len(tx_stats)):
    print('{0:<18} | {1:9d} | {2:10.1f} | {3:14} | {4:11.3f} | {5:5.2f}'.format(
        wlan_exp_util.mac_addr_to_str(tx_stats['addr1'][ii]),
Exemple #14
0
def profile(
        aCGH,
        signal=None,
        #chromosomes=None,
        ymin=None,
        ymax=None,
        cmin=-1,
        cmax=1,
        cmap=plt.cm.RdYlBu_r):
    """
    Plots the aCGH signal (the log ratio of intensities) in a way that values are ordered according to the distribution of the probes throughout the genome.
    
    Parameters
    ----------
    
    aCGH : :class:`pycgh.datatypes.ArrayCGH`
        The object representing the Array CGH.
    
    signal : array_like, optional (default: None)
        The signal representing the log 2 ratio of the intensities of the test and the reference signal. If not provided, it is reconstructed using the test and reference signal contained in the aCGH object.
    
    ymin : float, optional (default: None)
        The lower limit of the y axis. If None, it is set as -ymax.
    
    ymax : float, optional (default: None)
        The upper limit of the y axis. If None, it is chosen the value of the signal array having maximum absolute value.
    
    cmin : float, optional (default: -1)
        The lower color limit of the scatter plot.
    
    cmax : float, optional (default: +1)
        The upper color limit of the scatter plot.
    
    cmap : :class:`matplotlib.colors.Colormap`, optional (default: matplotlib.pylab.cm.RdYlBu_r)
        The color map to be passed to the actual plotting function.
    
    cbticks : array_like, optional (default: None)
        The *ticks* in the color bar. If None, the color bar is divided in 9 equal parts, including all values (colors) present in the plot.
        
    Returns
    -------
    
    coords : array_like
        The *position* of the probes shifted by an amount which depends on the location of the DNA fragment in the genome.
    
    """

    chr_map = dict((str(c), c) for c in xrange(1, 25))
    chr_map['X'] = 23
    chr_map['Y'] = 24

    inverse_chr_map = dict((c, str(c)) for c in xrange(1, 23))
    inverse_chr_map[23] = 'X'
    inverse_chr_map[24] = 'Y'

    ## Chromosomes Fitering
    #if not chromosomes is None:
    #    chr_filtered = np.unique(chr_map[str(chr).strip().upper()]
    #                             for chr in chromosomes)
    #
    #    chridx = np.array(np.zeros(len(aCGH.F['chromosome'])), dtype=bool)
    #    for chr in chr_filtered:
    #        np.logical_or(chridx, aCGH.F['chromosome'] == chr, chridx)
    #
    #    #aCGH = aCGH.F[chridx]
    #    chromosomes = sorted(chr_filtered)
    #
    #    # Signal and segmentation filtering
    #    if not signal is None:
    #        signal = signal[chridx]
    #    if not segmentation is None:
    #        segmentation = segmentation[chridx]
    #else:

    # Signal Calculation
    if signal is None:
        test_signal = aCGH.M['test_signal']
        reference_signal = aCGH.M['reference_signal']
        signal = (np.log2(test_signal) -
                  np.log2(reference_signal)).compressed()
    elif isinstance(signal, str) or isinstance(signal, unicode):
        signal = aCGH.F[signal]
    elif np.ma.getmask(signal) is np.ma.nomask:
        if len(signal) > aCGH.size:  # Not filtered signal wrt current aCGH
            signal = signal[~aCGH['mask']]  # Manual extraction
    else:
        signal = signal.compressed()

    # Plot-Coordinates Calculation
    from matplotlib import mlab as ml
    positions = aCGH.F[['chromosome', 'start_base']]  # A compressed copy
    chromosomes = np.unique(positions['chromosome'])

    # Calculation of the max start_base for each Chromosome as shift
    coords = positions['start_base']
    summary = ml.rec_groupby(positions,
                             groupby=('chromosome', ),
                             stats=(('start_base', np.max, 'shifts'), ))
    shifts = np.zeros(len(summary) + 1)
    shifts[1:] = np.cumsum(summary['shifts'])

    # Applying shifts
    for i, chr in enumerate(chromosomes):
        coords[positions['chromosome'] == chr] += shifts[i]

    # Chromosomes ticks and separators
    ticks = (shifts[:-1] + shifts[1:]) / 2.0
    separators = shifts[1:-1]

    # CGH Scatter plot
    plt.scatter(coords,
                signal,
                c=signal,
                cmap=cmap,
                vmin=cmin,
                vmax=cmax,
                s=1,
                edgecolors='none')

    # Axis limits
    if ymax is None:
        ymax = max(abs(min(np.nanmin(signal), -1.1)),
                   max(np.nanmax(signal), 1.1))
    if ymin is None:
        ymin = -ymax
    plt.axis([coords.min(), coords.max(), ymin, ymax])

    # Visual effects
    plt.colorbar(extend='both')
    plt.axhline(0.0, lw=1, color='gray', ls='--')
    plt.axhline(np.log2(3 / 2.), lw=1, color='orange', ls='--')  # gain
    plt.axhline(1.0, lw=1, color='red', ls='--')
    plt.axhline(-1.0, lw=1, color='blue', ls='--')

    for sep in separators:
        plt.axvline(sep - 1, lw=1, color='gray', ls='-')

    label_ticks = ['Chr %s' % inverse_chr_map[k] for k in chromosomes]
    plt.xticks(ticks, label_ticks, rotation=90, size=8)
    #plt.tick_params(axis='x', direction='out', length=3, colors='black',
    #                labelbottom='on')

    return coords
Exemple #15
0
def volume_code(volume):
    'code the continuous volume data categorically'
    ind = np.searchsorted([1e5,1e6, 5e6,10e6, 1e7], volume)
    return ind
summaryfuncs = (
    ('date', lambda x: [thisdate.year for thisdate in x], 'years'),
    ('date', lambda x: [thisdate.month for thisdate in x], 'months'),
    ('date', lambda x: [thisdate.weekday() for thisdate in x], 'weekday'),
    ('adj_close', daily_return, 'dreturn'),
    ('volume', volume_code, 'volcode'),
    )
rsum = mlab.rec_summarize(r, summaryfuncs)
stats = (
    ('dreturn', len, 'rcnt'),
    ('dreturn', np.mean, 'rmean'),
    ('dreturn', np.median, 'rmedian'),
    ('dreturn', np.std, 'rsigma'),
    )
print 'summary by years'
ry = mlab.rec_groupby(rsum, ('years',), stats)
print mlab. rec2txt(ry)
print 'summary by months'
rm = mlab.rec_groupby(rsum, ('months',), stats)
print mlab.rec2txt(rm)
print 'summary by year and month'
rym = mlab.rec_groupby(rsum, ('years','months'), stats)
print mlab.rec2txt(rym)
print 'summary by volume'
rv = mlab.rec_groupby(rsum, ('volcode',), stats)
print mlab.rec2txt(rv)
Exemple #16
0
rsum = mlab.rec_summarize(r, summaryfuncs)

# stats is a list of (dtype_name, function, output_dtype_name).
# rec_groupby will summarize the attribute identified by the
# dtype_name over the groups in the groupby list, and assign the
# result to the output_dtype_name
stats = (
    ('dreturn', len, 'rcnt'),
    ('dreturn', np.mean, 'rmean'),
    ('dreturn', np.median, 'rmedian'),
    ('dreturn', np.std, 'rsigma'),
)

# you can summarize over a single variable, like years or months
print 'summary by years'
ry = mlab.rec_groupby(rsum, ('years', ), stats)
print mlab.rec2txt(ry)

print 'summary by months'
rm = mlab.rec_groupby(rsum, ('months', ), stats)
print mlab.rec2txt(rm)

# or over multiple variables like years and months
print 'summary by year and month'
rym = mlab.rec_groupby(rsum, ('years', 'months'), stats)
print mlab.rec2txt(rym)

print 'summary by volume'
rv = mlab.rec_groupby(rsum, ('volcode', ), stats)
print mlab.rec2txt(rv)
Exemple #17
0
def agg_compactness(array):
    stats = (('traj_compactness', len, 'total'), )

    return mpl.rec_groupby(array, ('traj_compactness', ), stats)
Exemple #18
0
colors = brewer2mpl.get_map('YlGnBu', 'sequential', 9).mpl_colors
colors = [colors[5], colors[8]]
i = 0

dows = ["wd", "we"]
labels = ["WD", "WE"]

stats = (
    ('node_count', len, 'total'),
)

for dow in dows:

    table = "/Users/igobrilhante/Documents/workspace/research/ComeTogether/experiments/network."+city+"_fs_poiclusterf_traj_"+dow+"_"+str(hours)+"h_trajs_communities.csv"
    data = mlab.csv2rec(table)
    agg = mlab.rec_groupby(data, ('node_count',), stats)
    print agg
    plt.plot(agg.node_count, agg.total, color=colors[i], label=labels[i], marker=markers[i], markersize=marker_size, linewidth=line_width, markeredgewidth=0.0, alpha=alpha)
    i += 1


plt.tick_params(axis='both', which='major', labelsize=16, colors="#000000")
# plt.xlim([-100, 1000])
# plt.ylim([-0.1, 1.1])


plt.xlabel('Number of nodes')
plt.ylabel('Number of communities')

fig.tight_layout()
#
def agg_degree( array ):
    stats = (
        ('degree', len, 'total'),
    )

    return mpl.rec_groupby(array, ('degree',), stats)
Exemple #20
0
    def make_aggregation(self,lista,grouping):
        #Saco el timestamp del primer taxi 
        time =  lista.values()[0].state['dtime']
        #time = calendar.timegm(time.timetuple())
        
        #great deal to obtain proper data structure for numpy operations
        #can be much more efficient
        pars_list = []
        for item in Car.PARS:
            pars_list.append(item)

        dtype = []
        for par in pars_list:
            dtype.append((par, Car.PARS[par]))
        
        # print pars_list
        # print dtype

        all_states = []
        for car in lista.values():
             all_states.append(tuple([car.state[par] for par in pars_list]))        
        # print all_states

        arr = np.rec.array(all_states, dtype)
        # print arr,"\n"

        def queue(x):
            return np.sum((x > 1))

        # Agrega por area o areaPic dependiendo de la capa que sea    
        # if layer == "Money":
        #     grouping = ('areanumPick', 'statusf')
        # else:
        #     grouping = ('areanum', 'statusf')

        #define which metrics to calculate with which function
        metrics = (('areanum', np.count_nonzero, 'aggregate'),
                   ('waitingtimef', np.min, 'minwait'),
                   ('waitingtimef', np.mean, 'meanwait'),
                   ('waitingtimef', np.max, 'maxwait'),

                   ('runlengthf', np.mean, 'meanrun'),
                   ('runlengthf', np.max, 'maxrun'),

                   ('runDistance', np.mean, 'meanRunDistance'),

                   ('searchlengthf', np.mean, 'meansearch'),
                   ('searchlengthf', np.max, 'maxsearch'),


                   ('waitingtimef', queue, 'queuelength'),                   
                   
                   ('areaSearchingTime',np.min,'minareaSearchingTime'),
                   ('areaSearchingTime',np.mean,'meanareaSearchingTime'),
                   ('areaSearchingTime',np.max,'maxareaSearchingTime'),
                   
                   ('areaTime',np.min,'minareaTime'),
                   ('areaTime',np.mean,'meanareaTime'),
                   ('areaTime',np.max,'maxareaTime'),
                   
                   ('areaIniOccupiedTime',np.min,'minareaIniOccupiedTime'),
                   ('areaIniOccupiedTime',np.mean,'meanareaIniOccupiedTime'),
                   ('areaIniOccupiedTime',np.max,'maxareaIniOccupiedTime'),
                   
                   ('areaPercentage',np.min,'minareaPercentage'),
                   ('areaPercentage',np.mean,'meanareaPercentage'),
                   ('areaPercentage',np.max,'maxareaPercentage'),
                   
            )
        #and actually make the calculations
        result=mlab.rec_groupby(arr, grouping, metrics)
        #print result
        
        #and convert it back to dictionary
        keys = result.dtype.names
        #print keys
        
        #return [dict(zip(keys, record)) for record in result]
        
        aggregates = [dict(zip(keys, record)) for record in result]
        # for aggregate in aggregates:
        #     aggregate['dtime'] = time
        f.insertColumAggregado(aggregates,'dtime',time)
        return aggregates
def agg_compactness( array ):
    stats = (
        ('traj_compactness', len, 'total'),
    )

    return mpl.rec_groupby(array, ('traj_compactness',), stats)
Exemple #22
0
def agg_degree(array):
    stats = (('degree', len, 'total'), )

    return mpl.rec_groupby(array, ('degree', ), stats)