Example #1
0
 def panel4d(self):
     data = np.random.rand(2, 4, 10000, 3)
     p4 = pd.Panel4D(data,
                     labels=('gallahad', 'lancelot'),
                     items=('I', 'II', 'III', 'IV'),
                     minor_axis=('A', 'B', 'C'))
     return p4
Example #2
0
 def test_pandas(self):
     global df, p, p4d, s_int32, s_int64, s_int16, s_int8
     s_int8 = pd.Series([1, 2, 3], dtype=np.int8)
     s_int16 = pd.Series([1, 2, 3], dtype=np.int16)
     s_int32 = pd.Series([1, 2, 3], dtype=np.int32)
     s_int64 = pd.Series([1, 2, 3], dtype=np.int64)
     d = dict(('s' + str(i), s) for i, s in enumerate([s_int8, s_int16, s_int32, s_int64]))
     df = pd.DataFrame(d)
     df['float32'] = np.array([1.0, 2.0, 3.0], dtype=np.float32)
     df['float64'] = np.array([1.0, 2.0, 3.0], dtype=np.float64)
     p = pd.Panel({'A': df, 'B': df})
     p4d = pd.Panel4D({'i': p, 'ii': p})
     vars = ['s_int8', 's_int16', 's_int32', 's_int64', 'df', 'p', 'p4d']
     with ensure_clean() as path:
         stash(path, verbose=False)
         _g = globals()
         for v in vars:
             del _g[v]
         vault = unstash(path, verbose=False)
         self.assertTrue('s_int8' in vault)
         self.assertTrue('s_int16' in vault)
         self.assertTrue('s_int32' in vault)
         self.assertTrue('s_int64' in vault)
         self.assertTrue('df' in vault)
         self.assertTrue('p' in vault)
         self.assertTrue('p4d' in vault)
         for v in vars:
             del _g[v]
Example #3
0
def aggregate_maps(maplist):

    classes = np.unique([k for m in maplist for k in m.keys()])
    n = maplist[0].values()[0].values()[0].shape[0] / 2

    # allocate normalized relation maps
    maps = pandas.Panel4D(labels=classes,
                          items=classes,
                          major_axis=range(2 * n),
                          minor_axis=range(2 * n))
    maps = maps.fillna(
        1
    )  # Use a simple, small, uniform prior to deal with empty (zero) counts in the histogram

    for mapi in maplist:
        for c, mapi_from in mapi.iteritems():
            for c0, mapi_to in mapi_from.iteritems():
                maps[c][c0] += mapi_to

    # normalization
    tot = maps.sum(axis='labels')
    for c1 in classes:
        maps.ix[:, c1, :, :] = maps.ix[:, c1, :, :].divide(tot[c1]).as_matrix()

    return maps
def panel4d_from_filenames(alg_filenames):
    results = dict()

    for input_filename in alg_filenames:
        with open(input_filename) as input_file:
            problem_results_json = json.load(input_file)

        problem_results = problem_results_panel(problem_results_json)

        problem_shortname = os.path.splitext(
            os.path.basename(input_filename))[0]
        results[problem_shortname] = problem_results

    result_panel = pandas.Panel4D(results)
    return result_panel
    def __init__(self, alpha, gamma):
        rnd0 = 0.0 + np.zeros((4, 64, 2))
        p3d = pd.Panel(rnd0)
        #print "p3d: ",p3d.values

        p4d = pd.Panel4D(dict(AN=p3d, BF=p3d, CR=p3d, DL=p3d))
        self.qtable = p4d
        #print "self.qtable: ",self.qtable.values
        self.alpha = alpha
        self.gamma = gamma
        self.trial_count = 0
        self.qvalue = np.zeros(4)
        self.next_qvalue = np.zeros(4)
        self.total_time = 0
        self.trial_steps_df = pd.DataFrame(np.random.randint(1, size=(101, 3)))
        pd.set_option("display.max_rows", 200)
        self.success_rate = 0.0
Example #6
0
    def read_variable_netcdf(self, fpath, s, variable=None):

        if not isinstance(s, tuple):
            s = (s,)

        if variable is None:
            var = self.varname
        else:
            var = variable

        df = None
        with netCDF4.Dataset(fpath, 'r') as nc:

            # read axes
            dims = AeoLiS.get_dims(var)
            if dims is None:
                NotImplemented('No support for variable %s' % var)
            axs = []
            for i, dim in enumerate(dims):
                if dim == 'time':
                    ax = netCDF4.num2date(nc.variables['time'][s[i]],
                                          nc.variables['time'].units)
                else:
                    ax = nc.variables[dim][s[i]]
                if isinstance(ax, np.ndarray):
                    axs.append(pd.Index(ax, name=dim))

            # read data
            data = nc.variables[var][s]

            # construct pandas object
            if len(axs) == 1:
                df = pd.Series(data, index=axs[0])
            elif len(axs) == 2:
                df = pd.DataFrame(data, index=axs[0], columns=axs[1])
            elif len(axs) == 3:
                df = pd.Panel(data, items=axs[0], major_axis=axs[1], minor_axis=axs[2])
            elif len(axs) == 4:
                df = pd.Panel4D(data, labels=axs[0], items=axs[1], major_axis=axs[2], minor_axis=axs[3])
            else:
                raise NotImplemented('No pandas structure with more than four dimensions, reduce dimensionality')

        return df
    entity_classes = randomize_occurrences(entity_classes,
                                           preserve_years=preserve_years)
networks = calculate_cooccurrence_networks(entity_classes, target_years)

# In[62]:

if randomized_control and chain:  #If we have a chained randomization process, then keep going!
    randomizations = {0: networks}
    for iteration in range(1, chain):
        if not iteration % 100:
            print(iteration)
        entity_classes = randomize_occurrences(entity_classes,
                                               preserve_years=preserve_years)
        randomizations[iteration] = calculate_cooccurrence_networks(
            entity_classes, target_years)
    networks = pd.Panel4D(randomizations)

# In[63]:

networks.major_axis = class_lookup.index[networks.major_axis]
networks.minor_axis = class_lookup.index[networks.minor_axis]

# In[64]:

if randomized_control:
    if preserve_years:
        file_name = 'synthetic_control_cooccurrence_%s%s_preserve_years_%s' % (
            n_years_label, entity_column, class_system)
    else:
        file_name = 'synthetic_control_cooccurrence_%s%s_no_preserve_years_%s' % (
            n_years_label, entity_column, class_system)
Example #8
0
def load_dataframe(fname,
                   shape=None,
                   start=0,
                   stop=np.inf,
                   step=1,
                   verbose=False):
    '''Load data from output file in Pandas dataframe or equivalent

    Parameters
    ----------
    fname : string
        Path to output file
    shape : tuple or list, optional
        Shape of data in file
    start : int, optional
        Start block position for read
    stop : int, optional
        Stop block position for read
    step : int, optional
        Step or stride blocks for read
    verbose : bool, optional
        Flag to enable process output

    Returns
    -------
    pandas.DataFrame, pandas.Panel or pandas.Panel4D
        Pandas object containing data from output file

    Examples
    --------
    >>> load_dataframe('mass.out', stop=10) # read first 10 blocks
    >>> load_dataframe('supply.out', step=10) # read every 10th block
    >>> load_dataframe('supply.out', start=100, step=2, stop=200) # read every even block from 100th to 200th
    '''

    if shape is None:
        shape = get_dims(fname)

    dims = load_dimensions(fname)
    data = read_fortran(fname,
                        shape=shape,
                        start=start,
                        stop=stop,
                        step=step,
                        verbose=verbose)

    #ix = pd.TimedeltaIndex(start=0, periods=z.shape[0]-1, freq='H')
    ix = pd.DatetimeIndex(start=0,
                          periods=data.shape[0],
                          freq='%dS' % round(dims['dt_out'] * step))

    if len(shape) == 1:
        return pd.DataFrame(data, index=ix)
    elif len(shape) == 2:
        return pd.DataFrame(data, index=ix, columns=dims['ax_x'])
    elif len(shape) == 3:
        return pd.Panel(data, items=ix, major_axis=dims['ax_x'])
    elif len(shape) == 4:
        return pd.Panel4D(data, labels=ix, items=dims['ax_x'])
    else:
        raise ValueError('Unsupported dimension count [%d]' % len(shape))
def calculate_citation_networks(citations,
                                metrics,
                                target_years,
                                classes=classes,
                                n_years=n_years):
    networks = pd.Panel4D(labels=metrics,
                          items=target_years,
                          major_axis=classes,
                          minor_axis=classes,
                          dtype='float64')

    for year in target_years:
        print(year)
        #         these_citations = citations[citations['Year_Citing_Patent']<=year]
        if n_years is None or n_years == 'all' or n_years == 'cumulative':
            these_citations = citations[
                citations['Year_Citing_Patent'] <= year]
        else:
            these_citations = citations[(
                (citations['Year_Citing_Patent'] <= year) &
                (citations['Year_Citing_Patent'] > (year - n_years)))]

        if 'Class_CoCitation_Count' in metrics:
            print('Class_CoCitation_Count')
            networks.ix['Class_CoCitation_Count',
                        year, :, :] = cocitation_counts(these_citations)

        citation_counts = calculate_citation_counts(
            these_citations, relation='class_cites_class')
        if 'Class_Cites_Class_Count' in metrics:
            print('Class_Cites_Class_Count')
            networks.ix['Class_Cites_Class_Count',
                        year, :, :] = array(citation_counts.todense())
        if 'Class_Cited_by_Class_Count' in metrics:
            print('Class_Cited_by_Class_Count')
            networks.ix['Class_Cited_by_Class_Count',
                        year, :, :] = array(citation_counts.todense().T)

        if 'Class_Cites_Class_Input_Cosine_Similarity' in metrics:
            print('Class_Cites_Class_Input_Cosine_Similarity')
            networks.ix['Class_Cites_Class_Input_Cosine_Similarity',
                        year, :, :] = cosine_similarities(citation_counts)
        if 'Class_Cites_Class_Output_Cosine_Similarity' in metrics:
            print('Class_Cites_Class_Output_Cosine_Similarity')
            networks.ix['Class_Cites_Class_Output_Cosine_Similarity',
                        year, :, :] = cosine_similarities(citation_counts.T)

        citation_counts = calculate_citation_counts(
            these_citations, relation='class_cites_patent')
        if 'Class_Cites_Patent_Input_Cosine_Similarity' in metrics:
            print('Class_Cites_Patent_Input_Cosine_Similarity')
            networks.ix['Class_Cites_Patent_Input_Cosine_Similarity',
                        year, :, :] = cosine_similarities(citation_counts.T)

        if 'Class_Cites_Patent_Input_Jaccard_Similarity' in metrics:
            print('Class_Cites_Patent_Input_Jaccard_Similarity')
            networks.ix['Class_Cites_Patent_Input_Jaccard_Similarity',
                        year, :, :] = jaccard_similarities(citation_counts.T)

        citation_counts = calculate_citation_counts(
            these_citations, relation='patent_cites_class')
        if 'Patent_Cites_Class_Output_Cosine_Similarity' in metrics:
            print('Patent_Cites_Class_Output_Cosine_Similarity')
            networks.ix['Patent_Cites_Class_Output_Cosine_Similarity',
                        year, :, :] = cosine_similarities(citation_counts.T)

        if 'Patent_Cites_Class_Output_Jaccard_Similarity' in metrics:
            print('Patent_Cites_Class_Output_Jaccard_Similarity')
            networks.ix['Patent_Cites_Class_Output_Jaccard_Similarity',
                        year, :, :] = jaccard_similarities(citation_counts.T)

    return networks
    static = not time_periods.loc[period, 'DTA']
    if static:
        name = time_periods.loc[period, 'Period']
        static_periods.append(name)
        TRIP_TABLE_FILE = r'D:\ShoresideTDM\TimePeriods\{}\trip_table.csv'.format(name)
        trip_tables[name] = pd.read_csv(TRIP_TABLE_FILE, index_col = 0).loc[node_ids, np.array(node_ids).astype(str)]
trip_tables = pd.Panel(trip_tables)

P = len(static_periods)
L = len(link_ids)
N = len(node_ids)

#Create 4-dimensional table (Static Periods x Links x Origin Nodes x Destination Nodes)
od_flows = pd.Panel4D(np.zeros((P, L, N, N)),
                      labels = static_periods,
                      items = link_ids,
                      major_axis = node_ids,
                      minor_axis = node_ids.astype(str))

#For each time period, if a link is in a shortest path between two nodes, put all of that time period's trip between said nodes in the 4D table
for period in static_periods:
    for l in link_ids:
        od_flows.ix[period, l, :, :] = trip_tables[period] * shortest_paths[l]
#print('TEST: {}'.format(od_flows.ix['MD', 1295, 104, '530']))

#Sum over origin and destination nodes to get flows on each link for each time period
link_flows = od_flows.sum(3).sum(2)
link_flows.to_csv(OUTPUT_FILE)

end_time = time.time()
runtime = round(end_time - start_time, 1)
Example #11
0
# In[11]:

if output_cooccurrence:
    M = None
    for entity in entity_types:
        print(entity)
        (M_entity, 
         standard_deviation_entity, 
         all_max_entity, 
         all_min_entity) = running_stats('synthetic_cooccurrence_%s_%s'%(entity, class_system),
                                          cooccurrence_base_file_name%(entity, class_system),
                                          coocurrence_controls_directory
                                         )
        if M is None:
            M = pd.Panel4D({'Class_CoOccurrence_Count_%s'%entity: M_entity})
            standard_deviation = pd.Panel4D({'Class_CoOccurrence_Count_%s'%entity: standard_deviation_entity})
            all_max = pd.Panel4D({'Class_CoOccurrence_Count_%s'%entity: all_max_entity}) 
            all_min = pd.Panel4D({'Class_CoOccurrence_Count_%s'%entity: all_min_entity})
        else:
            M['Class_CoOccurrence_Count_%s'%entity] = M_entity
            standard_deviation['Class_CoOccurrence_Count_%s'%entity] = standard_deviation_entity
            all_max['Class_CoOccurrence_Count_%s'%entity] = all_max_entity
            all_min['Class_CoOccurrence_Count_%s'%entity] = all_min_entity

    store = pd.HDFStore(data_directory+'Class_Relatedness_Networks/cooccurrence/%s.h5'%(output_cooccurrence),
                        mode='a', table=True)
    store.put('/randomized_mean_%s%s'%(n_years_label, class_system), M, 'table', append=False)
    store.put('/randomized_std_%s%s'%(n_years_label, class_system), standard_deviation, 'table', append=False)

    store.put('/randomized_max_%s%s'%(n_years_label, class_system), all_max, 'table', append=False)
Example #12
0
def align(data, reference_frame=0, upsample_factor=10, center=True, **kwargs):
    """ Perform image alignment on data.

        usage: aligned_data, shifts = align(data)

        Performs sub-pixel image translation registration (image alignment)
        on the data. Each frame of each element is registered seperately.
        skimage.features.register_translation() is used for obtaining the
        shifts and scipy.ndimage.shift() is used to apply the shifts to the
        data. See the documentation of those functions for more information;
        keyword arguments are passed through. By default, the upsample_factor
        is set to 10.

        The data can be given as a pandas Panel4D (sims default) or Panel
        (a single element), or as a numpy array.

        The reference frame is the frame to which all other images in the
        stack are adjusted. By default this is 0, the first frame. A list
        may also be given, in which case a different reference frame for
        each element can be set.

        If center=True (the default), the shifts are centered to the median
        of the shifts to minimize blank edges.

        Returns the aligned data and the shifts.
    """
    was_panel = False
    was_numpy = False
    if isinstance(data, pd.Panel):
        data = pd.Panel4D(data=data.values,
                          labels=['x'],
                          items=data.items,
                          major_axis=data.major_axis,
                          minor_axis=data.minor_axis)
        was_panel = True
    elif isinstance(data, np.ndarray):
        if data.ndim == 3:
            data = pd.Panel4D(data=[data])
        elif data.ndim == 4:
            data = pd.Panel4D(data=data)
        else:
            msg = 'data must be 3 dimensional (one stack of 2D images), or '
            msg += '4 dimentional (multiple stacks of 2D images).'
            raise TypeError(msg)
        was_numpy = True

    if isinstance(reference_frame, int):
        reference_frame = [reference_frame] * data.shape[0]

    # Calculate shifts
    shifts = []
    for lbl, ref in zip(data.labels, reference_frame):
        shifts_per_lbl = []
        for lyr in data.loc[lbl]:
            sh = register_translation(data.loc[lbl, ref],
                                      data.loc[lbl, lyr],
                                      upsample_factor=upsample_factor)[0]
            shifts_per_lbl.append(sh)
        shifts.append(np.vstack(shifts_per_lbl))
    shifts = pd.Panel(shifts, items=data.labels, minor_axis=['y', 'x'])

    # Center shifts
    if center:
        xmedian = shifts.loc[:, :, 'x'].median()
        ymedian = shifts.loc[:, :, 'y'].median()
        shifts.loc[:, :, 'x'] += xmedian
        shifts.loc[:, :, 'y'] += ymedian

    # Apply shifts to data
    shifted = pd.Panel4D(np.zeros(data.shape), labels=data.labels)
    for lbl in data.labels:
        for frm in data.items:
            shifted.loc[lbl, frm] = shift(data.loc[lbl, frm], shifts.loc[lbl,
                                                                         frm])
    if was_panel:
        return (shifted['x'], shifts['x'])
    elif was_numpy:
        return (shifted.values.squeeze(), shifts.values.squeeze())
    else:
        return (shifted, shifts)
Example #13
0
    def get_seasonal_clim_cross_section_with_ttest_data(
            self,
            start_year=None,
            end_year=None,
            season_to_months=None,
            varname="votemper",
            start_point=None,
            end_point=None):
        """

        :param start_year:
        :param end_year:
        :param season_to_months:
        :param varname:
        :param start_point:
        :param end_point:
        """

        if start_year is None:
            start_year = min(self.year_to_path.keys())

        if end_year is None:
            end_year = max(self.year_to_path.keys())

        # Set up month to season relation
        month_to_season = defaultdict(lambda: "no-season")
        for m in range(1, 13):
            for s, months in season_to_months.items():
                if m in months:
                    month_to_season[m] = s
                    break

        season_to_field_list = defaultdict(list)
        for y in range(start_year, end_year + 1):
            fpath = self.year_to_path[y]

            with MFDataset(fpath) as ds:

                data_var = ds.variables[varname]

                assert data_var.ndim == 4

                data = data_var[:]  # (t, z, y, x)

                nt, nz, ny, nx = data.shape

                time_var = ds.variables["time_counter"]

                dates = num2date(time_var[:], time_var.units)

                panel = pd.Panel4D(data=data,
                                   labels=dates,
                                   items=range(nz),
                                   major_axis=range(ny),
                                   minor_axis=range(nx))

                seas_mean = panel.groupby(lambda d: month_to_season[d.month],
                                          axis="labels").mean()

                print(seas_mean)

                for the_season in seas_mean:
                    season_to_field_list[the_season].append(
                        seas_mean[the_season].values)

        result = {}
        for the_season, field_list in season_to_field_list.items():
            mean_field = np.mean(field_list, axis=0).transpose((0, 2, 1))
            std_field = np.std(field_list, axis=0).transpose((0, 2, 1))
            nobs = len(field_list)

            print(mean_field.shape)

            result[the_season] = (np.ma.masked_where(~self.lake_mask,
                                                     mean_field), std_field,
                                  nobs)

        return result
Example #14
0
def dict_to_panel(maps):

    return pandas.Panel4D(maps)