def panel4d(self): data = np.random.rand(2, 4, 10000, 3) p4 = pd.Panel4D(data, labels=('gallahad', 'lancelot'), items=('I', 'II', 'III', 'IV'), minor_axis=('A', 'B', 'C')) return p4
def test_pandas(self): global df, p, p4d, s_int32, s_int64, s_int16, s_int8 s_int8 = pd.Series([1, 2, 3], dtype=np.int8) s_int16 = pd.Series([1, 2, 3], dtype=np.int16) s_int32 = pd.Series([1, 2, 3], dtype=np.int32) s_int64 = pd.Series([1, 2, 3], dtype=np.int64) d = dict(('s' + str(i), s) for i, s in enumerate([s_int8, s_int16, s_int32, s_int64])) df = pd.DataFrame(d) df['float32'] = np.array([1.0, 2.0, 3.0], dtype=np.float32) df['float64'] = np.array([1.0, 2.0, 3.0], dtype=np.float64) p = pd.Panel({'A': df, 'B': df}) p4d = pd.Panel4D({'i': p, 'ii': p}) vars = ['s_int8', 's_int16', 's_int32', 's_int64', 'df', 'p', 'p4d'] with ensure_clean() as path: stash(path, verbose=False) _g = globals() for v in vars: del _g[v] vault = unstash(path, verbose=False) self.assertTrue('s_int8' in vault) self.assertTrue('s_int16' in vault) self.assertTrue('s_int32' in vault) self.assertTrue('s_int64' in vault) self.assertTrue('df' in vault) self.assertTrue('p' in vault) self.assertTrue('p4d' in vault) for v in vars: del _g[v]
def aggregate_maps(maplist): classes = np.unique([k for m in maplist for k in m.keys()]) n = maplist[0].values()[0].values()[0].shape[0] / 2 # allocate normalized relation maps maps = pandas.Panel4D(labels=classes, items=classes, major_axis=range(2 * n), minor_axis=range(2 * n)) maps = maps.fillna( 1 ) # Use a simple, small, uniform prior to deal with empty (zero) counts in the histogram for mapi in maplist: for c, mapi_from in mapi.iteritems(): for c0, mapi_to in mapi_from.iteritems(): maps[c][c0] += mapi_to # normalization tot = maps.sum(axis='labels') for c1 in classes: maps.ix[:, c1, :, :] = maps.ix[:, c1, :, :].divide(tot[c1]).as_matrix() return maps
def panel4d_from_filenames(alg_filenames): results = dict() for input_filename in alg_filenames: with open(input_filename) as input_file: problem_results_json = json.load(input_file) problem_results = problem_results_panel(problem_results_json) problem_shortname = os.path.splitext( os.path.basename(input_filename))[0] results[problem_shortname] = problem_results result_panel = pandas.Panel4D(results) return result_panel
def __init__(self, alpha, gamma): rnd0 = 0.0 + np.zeros((4, 64, 2)) p3d = pd.Panel(rnd0) #print "p3d: ",p3d.values p4d = pd.Panel4D(dict(AN=p3d, BF=p3d, CR=p3d, DL=p3d)) self.qtable = p4d #print "self.qtable: ",self.qtable.values self.alpha = alpha self.gamma = gamma self.trial_count = 0 self.qvalue = np.zeros(4) self.next_qvalue = np.zeros(4) self.total_time = 0 self.trial_steps_df = pd.DataFrame(np.random.randint(1, size=(101, 3))) pd.set_option("display.max_rows", 200) self.success_rate = 0.0
def read_variable_netcdf(self, fpath, s, variable=None): if not isinstance(s, tuple): s = (s,) if variable is None: var = self.varname else: var = variable df = None with netCDF4.Dataset(fpath, 'r') as nc: # read axes dims = AeoLiS.get_dims(var) if dims is None: NotImplemented('No support for variable %s' % var) axs = [] for i, dim in enumerate(dims): if dim == 'time': ax = netCDF4.num2date(nc.variables['time'][s[i]], nc.variables['time'].units) else: ax = nc.variables[dim][s[i]] if isinstance(ax, np.ndarray): axs.append(pd.Index(ax, name=dim)) # read data data = nc.variables[var][s] # construct pandas object if len(axs) == 1: df = pd.Series(data, index=axs[0]) elif len(axs) == 2: df = pd.DataFrame(data, index=axs[0], columns=axs[1]) elif len(axs) == 3: df = pd.Panel(data, items=axs[0], major_axis=axs[1], minor_axis=axs[2]) elif len(axs) == 4: df = pd.Panel4D(data, labels=axs[0], items=axs[1], major_axis=axs[2], minor_axis=axs[3]) else: raise NotImplemented('No pandas structure with more than four dimensions, reduce dimensionality') return df
entity_classes = randomize_occurrences(entity_classes, preserve_years=preserve_years) networks = calculate_cooccurrence_networks(entity_classes, target_years) # In[62]: if randomized_control and chain: #If we have a chained randomization process, then keep going! randomizations = {0: networks} for iteration in range(1, chain): if not iteration % 100: print(iteration) entity_classes = randomize_occurrences(entity_classes, preserve_years=preserve_years) randomizations[iteration] = calculate_cooccurrence_networks( entity_classes, target_years) networks = pd.Panel4D(randomizations) # In[63]: networks.major_axis = class_lookup.index[networks.major_axis] networks.minor_axis = class_lookup.index[networks.minor_axis] # In[64]: if randomized_control: if preserve_years: file_name = 'synthetic_control_cooccurrence_%s%s_preserve_years_%s' % ( n_years_label, entity_column, class_system) else: file_name = 'synthetic_control_cooccurrence_%s%s_no_preserve_years_%s' % ( n_years_label, entity_column, class_system)
def load_dataframe(fname, shape=None, start=0, stop=np.inf, step=1, verbose=False): '''Load data from output file in Pandas dataframe or equivalent Parameters ---------- fname : string Path to output file shape : tuple or list, optional Shape of data in file start : int, optional Start block position for read stop : int, optional Stop block position for read step : int, optional Step or stride blocks for read verbose : bool, optional Flag to enable process output Returns ------- pandas.DataFrame, pandas.Panel or pandas.Panel4D Pandas object containing data from output file Examples -------- >>> load_dataframe('mass.out', stop=10) # read first 10 blocks >>> load_dataframe('supply.out', step=10) # read every 10th block >>> load_dataframe('supply.out', start=100, step=2, stop=200) # read every even block from 100th to 200th ''' if shape is None: shape = get_dims(fname) dims = load_dimensions(fname) data = read_fortran(fname, shape=shape, start=start, stop=stop, step=step, verbose=verbose) #ix = pd.TimedeltaIndex(start=0, periods=z.shape[0]-1, freq='H') ix = pd.DatetimeIndex(start=0, periods=data.shape[0], freq='%dS' % round(dims['dt_out'] * step)) if len(shape) == 1: return pd.DataFrame(data, index=ix) elif len(shape) == 2: return pd.DataFrame(data, index=ix, columns=dims['ax_x']) elif len(shape) == 3: return pd.Panel(data, items=ix, major_axis=dims['ax_x']) elif len(shape) == 4: return pd.Panel4D(data, labels=ix, items=dims['ax_x']) else: raise ValueError('Unsupported dimension count [%d]' % len(shape))
def calculate_citation_networks(citations, metrics, target_years, classes=classes, n_years=n_years): networks = pd.Panel4D(labels=metrics, items=target_years, major_axis=classes, minor_axis=classes, dtype='float64') for year in target_years: print(year) # these_citations = citations[citations['Year_Citing_Patent']<=year] if n_years is None or n_years == 'all' or n_years == 'cumulative': these_citations = citations[ citations['Year_Citing_Patent'] <= year] else: these_citations = citations[( (citations['Year_Citing_Patent'] <= year) & (citations['Year_Citing_Patent'] > (year - n_years)))] if 'Class_CoCitation_Count' in metrics: print('Class_CoCitation_Count') networks.ix['Class_CoCitation_Count', year, :, :] = cocitation_counts(these_citations) citation_counts = calculate_citation_counts( these_citations, relation='class_cites_class') if 'Class_Cites_Class_Count' in metrics: print('Class_Cites_Class_Count') networks.ix['Class_Cites_Class_Count', year, :, :] = array(citation_counts.todense()) if 'Class_Cited_by_Class_Count' in metrics: print('Class_Cited_by_Class_Count') networks.ix['Class_Cited_by_Class_Count', year, :, :] = array(citation_counts.todense().T) if 'Class_Cites_Class_Input_Cosine_Similarity' in metrics: print('Class_Cites_Class_Input_Cosine_Similarity') networks.ix['Class_Cites_Class_Input_Cosine_Similarity', year, :, :] = cosine_similarities(citation_counts) if 'Class_Cites_Class_Output_Cosine_Similarity' in metrics: print('Class_Cites_Class_Output_Cosine_Similarity') networks.ix['Class_Cites_Class_Output_Cosine_Similarity', year, :, :] = cosine_similarities(citation_counts.T) citation_counts = calculate_citation_counts( these_citations, relation='class_cites_patent') if 'Class_Cites_Patent_Input_Cosine_Similarity' in metrics: print('Class_Cites_Patent_Input_Cosine_Similarity') networks.ix['Class_Cites_Patent_Input_Cosine_Similarity', year, :, :] = cosine_similarities(citation_counts.T) if 'Class_Cites_Patent_Input_Jaccard_Similarity' in metrics: print('Class_Cites_Patent_Input_Jaccard_Similarity') networks.ix['Class_Cites_Patent_Input_Jaccard_Similarity', year, :, :] = jaccard_similarities(citation_counts.T) citation_counts = calculate_citation_counts( these_citations, relation='patent_cites_class') if 'Patent_Cites_Class_Output_Cosine_Similarity' in metrics: print('Patent_Cites_Class_Output_Cosine_Similarity') networks.ix['Patent_Cites_Class_Output_Cosine_Similarity', year, :, :] = cosine_similarities(citation_counts.T) if 'Patent_Cites_Class_Output_Jaccard_Similarity' in metrics: print('Patent_Cites_Class_Output_Jaccard_Similarity') networks.ix['Patent_Cites_Class_Output_Jaccard_Similarity', year, :, :] = jaccard_similarities(citation_counts.T) return networks
static = not time_periods.loc[period, 'DTA'] if static: name = time_periods.loc[period, 'Period'] static_periods.append(name) TRIP_TABLE_FILE = r'D:\ShoresideTDM\TimePeriods\{}\trip_table.csv'.format(name) trip_tables[name] = pd.read_csv(TRIP_TABLE_FILE, index_col = 0).loc[node_ids, np.array(node_ids).astype(str)] trip_tables = pd.Panel(trip_tables) P = len(static_periods) L = len(link_ids) N = len(node_ids) #Create 4-dimensional table (Static Periods x Links x Origin Nodes x Destination Nodes) od_flows = pd.Panel4D(np.zeros((P, L, N, N)), labels = static_periods, items = link_ids, major_axis = node_ids, minor_axis = node_ids.astype(str)) #For each time period, if a link is in a shortest path between two nodes, put all of that time period's trip between said nodes in the 4D table for period in static_periods: for l in link_ids: od_flows.ix[period, l, :, :] = trip_tables[period] * shortest_paths[l] #print('TEST: {}'.format(od_flows.ix['MD', 1295, 104, '530'])) #Sum over origin and destination nodes to get flows on each link for each time period link_flows = od_flows.sum(3).sum(2) link_flows.to_csv(OUTPUT_FILE) end_time = time.time() runtime = round(end_time - start_time, 1)
# In[11]: if output_cooccurrence: M = None for entity in entity_types: print(entity) (M_entity, standard_deviation_entity, all_max_entity, all_min_entity) = running_stats('synthetic_cooccurrence_%s_%s'%(entity, class_system), cooccurrence_base_file_name%(entity, class_system), coocurrence_controls_directory ) if M is None: M = pd.Panel4D({'Class_CoOccurrence_Count_%s'%entity: M_entity}) standard_deviation = pd.Panel4D({'Class_CoOccurrence_Count_%s'%entity: standard_deviation_entity}) all_max = pd.Panel4D({'Class_CoOccurrence_Count_%s'%entity: all_max_entity}) all_min = pd.Panel4D({'Class_CoOccurrence_Count_%s'%entity: all_min_entity}) else: M['Class_CoOccurrence_Count_%s'%entity] = M_entity standard_deviation['Class_CoOccurrence_Count_%s'%entity] = standard_deviation_entity all_max['Class_CoOccurrence_Count_%s'%entity] = all_max_entity all_min['Class_CoOccurrence_Count_%s'%entity] = all_min_entity store = pd.HDFStore(data_directory+'Class_Relatedness_Networks/cooccurrence/%s.h5'%(output_cooccurrence), mode='a', table=True) store.put('/randomized_mean_%s%s'%(n_years_label, class_system), M, 'table', append=False) store.put('/randomized_std_%s%s'%(n_years_label, class_system), standard_deviation, 'table', append=False) store.put('/randomized_max_%s%s'%(n_years_label, class_system), all_max, 'table', append=False)
def align(data, reference_frame=0, upsample_factor=10, center=True, **kwargs): """ Perform image alignment on data. usage: aligned_data, shifts = align(data) Performs sub-pixel image translation registration (image alignment) on the data. Each frame of each element is registered seperately. skimage.features.register_translation() is used for obtaining the shifts and scipy.ndimage.shift() is used to apply the shifts to the data. See the documentation of those functions for more information; keyword arguments are passed through. By default, the upsample_factor is set to 10. The data can be given as a pandas Panel4D (sims default) or Panel (a single element), or as a numpy array. The reference frame is the frame to which all other images in the stack are adjusted. By default this is 0, the first frame. A list may also be given, in which case a different reference frame for each element can be set. If center=True (the default), the shifts are centered to the median of the shifts to minimize blank edges. Returns the aligned data and the shifts. """ was_panel = False was_numpy = False if isinstance(data, pd.Panel): data = pd.Panel4D(data=data.values, labels=['x'], items=data.items, major_axis=data.major_axis, minor_axis=data.minor_axis) was_panel = True elif isinstance(data, np.ndarray): if data.ndim == 3: data = pd.Panel4D(data=[data]) elif data.ndim == 4: data = pd.Panel4D(data=data) else: msg = 'data must be 3 dimensional (one stack of 2D images), or ' msg += '4 dimentional (multiple stacks of 2D images).' raise TypeError(msg) was_numpy = True if isinstance(reference_frame, int): reference_frame = [reference_frame] * data.shape[0] # Calculate shifts shifts = [] for lbl, ref in zip(data.labels, reference_frame): shifts_per_lbl = [] for lyr in data.loc[lbl]: sh = register_translation(data.loc[lbl, ref], data.loc[lbl, lyr], upsample_factor=upsample_factor)[0] shifts_per_lbl.append(sh) shifts.append(np.vstack(shifts_per_lbl)) shifts = pd.Panel(shifts, items=data.labels, minor_axis=['y', 'x']) # Center shifts if center: xmedian = shifts.loc[:, :, 'x'].median() ymedian = shifts.loc[:, :, 'y'].median() shifts.loc[:, :, 'x'] += xmedian shifts.loc[:, :, 'y'] += ymedian # Apply shifts to data shifted = pd.Panel4D(np.zeros(data.shape), labels=data.labels) for lbl in data.labels: for frm in data.items: shifted.loc[lbl, frm] = shift(data.loc[lbl, frm], shifts.loc[lbl, frm]) if was_panel: return (shifted['x'], shifts['x']) elif was_numpy: return (shifted.values.squeeze(), shifts.values.squeeze()) else: return (shifted, shifts)
def get_seasonal_clim_cross_section_with_ttest_data( self, start_year=None, end_year=None, season_to_months=None, varname="votemper", start_point=None, end_point=None): """ :param start_year: :param end_year: :param season_to_months: :param varname: :param start_point: :param end_point: """ if start_year is None: start_year = min(self.year_to_path.keys()) if end_year is None: end_year = max(self.year_to_path.keys()) # Set up month to season relation month_to_season = defaultdict(lambda: "no-season") for m in range(1, 13): for s, months in season_to_months.items(): if m in months: month_to_season[m] = s break season_to_field_list = defaultdict(list) for y in range(start_year, end_year + 1): fpath = self.year_to_path[y] with MFDataset(fpath) as ds: data_var = ds.variables[varname] assert data_var.ndim == 4 data = data_var[:] # (t, z, y, x) nt, nz, ny, nx = data.shape time_var = ds.variables["time_counter"] dates = num2date(time_var[:], time_var.units) panel = pd.Panel4D(data=data, labels=dates, items=range(nz), major_axis=range(ny), minor_axis=range(nx)) seas_mean = panel.groupby(lambda d: month_to_season[d.month], axis="labels").mean() print(seas_mean) for the_season in seas_mean: season_to_field_list[the_season].append( seas_mean[the_season].values) result = {} for the_season, field_list in season_to_field_list.items(): mean_field = np.mean(field_list, axis=0).transpose((0, 2, 1)) std_field = np.std(field_list, axis=0).transpose((0, 2, 1)) nobs = len(field_list) print(mean_field.shape) result[the_season] = (np.ma.masked_where(~self.lake_mask, mean_field), std_field, nobs) return result
def dict_to_panel(maps): return pandas.Panel4D(maps)