class PandasHDFHandler(FileHandler): r""" Handler for HDF5 files using Pandas. """ def _open_for_read(self): self.handle = HDFStore(self.fname, mode='r') def _open_for_write(self): self.handle = HDFStore(self.fname) def list_items(self): keys = [key.strip('/') for key in self.handle.keys()] items = [(key, _get_type_from_attrs(self.handle.get_storer(key).attrs)) for key in keys if '/' not in key] # ---- for backward compatibility (LArray < 0.33) ---- # axes items += [(key.split('/')[-1], 'Axis_Backward_Comp') for key in keys if '__axes__' in key] # groups items += [(key.split('/')[-1], 'Group_Backward_Comp') for key in keys if '__groups__' in key] return items def _read_item(self, key, typename, *args, **kwargs): if typename in _supported_typenames: hdf_key = '/' + key # ---- for backward compatibility (LArray < 0.33) ---- elif typename == 'Axis_Backward_Comp': hdf_key = '__axes__/' + key elif typename == 'Group_Backward_Comp': hdf_key = '__groups__/' + key else: raise TypeError() return read_hdf(self.handle, hdf_key, *args, **kwargs) def _dump_item(self, key, value, *args, **kwargs): hdf_key = '/' + key if isinstance(value, (Array, Axis)): value.to_hdf(self.handle, hdf_key, *args, **kwargs) elif isinstance(value, Group): hdf_axis_key = '/' + value.axis.name value.to_hdf(self.handle, hdf_key, hdf_axis_key, *args, **kwargs) elif isinstance(value, _supported_scalars_types): s = pd.Series(data=value) self.handle.put(hdf_key, s) self.handle.get_storer(hdf_key).attrs.type = type(value).__name__ else: raise TypeError() def _read_metadata(self): metadata = Metadata.from_hdf(self.handle) if metadata is None: metadata = Metadata() return metadata def _dump_metadata(self, metadata): metadata.to_hdf(self.handle) def close(self): self.handle.close()
def save(self, filename='store.hdf', path='results'): """ Saves `self.results` into the HDF file `filename` in the tree `path`. """ if self.results.empty: return print('Saving to {} ({})'.format(filename, path)) if path == 'config': logging.error( 'Cant use "config" as path, using "config2" instead.') path = "config2" store = HDFStore(filename) store[path] = self.results store.get_storer(path).attrs.config = self.config store.get_storer(path).attrs.seed = self.seed store.get_storer(path).attrs.parallel = self.parallel store.close()
def save(self, store: pandas.HDFStore) -> None: """ Save a model to an open HDFStore. Notes: Performs an IO operation. Args: store (pandas.HDFStore) Returns: None """ # save the config as an attribute config = self.get_config() store.put('model', pandas.DataFrame()) store.get_storer('model').attrs.config = config # save the parameters for i in range(self.num_weights): key = os.path.join('weights', 'weights' + str(i)) self.weights[i].save_params(store, key) for i in range(self.num_layers): key = os.path.join('layers', 'layers' + str(i)) self.layers[i].save_params(store, key)
def from_saved(cls, store: pandas.HDFStore) -> None: """ Create the PCA from its saved parameters. Notes: Performs an IO operation. Args: store (pandas.HDFStore) Returns: PCA """ config = store.get_storer('pca').attrs.config pca = cls(config['num_components'], config['stepsize']) pca.W = be.float_tensor(store.get('pca/W').values) pca.var = be.float_tensor(store.get('pca/var').values[:, 0]) # check the mean is present if 'pca/mean' in store.keys(): pca.mean = be.float_tensor(store.get('pca/mean').values[:, 0]) # if the saved PCA was fit from SVD, there is not calculator defined if 'pca/var_calc' in store.keys(): pca.var_calc = math_utils.MeanVarianceArrayCalculator.from_dataframe( store.get('pca/var_calc')) else: pca.var_calc = math_utils.MeanVarianceArrayCalculator() return pca
def from_saved(cls, store: pandas.HDFStore) -> None: """ Build a model by reading from an open HDFStore. Notes: Performs an IO operation. Args: store (pandas.HDFStore) Returns: None """ # create the model from the config config = store.get_storer('model').attrs.config model = cls.from_config(config) # load the weights for i in range(len(model.weights)): key = os.path.join('weights', 'weights' + str(i)) model.weights[i].load_params(store, key) # load the layer parameters for i in range(len(model.layers)): key = os.path.join('layers', 'layers' + str(i)) model.layers[i].load_params(store, key) return model
class Serialization(object): def __init__(self, filename, mode='r', compress=True): self._filename = filename self._compress = compress self._mode = mode def __enter__(self): if self._compress: self._store = HDFStore(self._filename, complib='blosc:lz4', complevel=9, mode=self._mode) else: # pragma: no cover self._store = HDFStore(self._filename, mode=self._mode) return self def __exit__(self, exc_type, exc_val, exc_tb): self._store.close() @property def keys(self): return self._store.keys() def store_pandas_object(self, path, obj, **metadata): self._store.put(path, obj, format='fixed') self._store.get_storer(path).attrs.metadata = metadata def retrieve_pandas_object(self, path): # Get the metadata metadata = self._store.get_storer(path).attrs.metadata # Get the object obj = self._store.get(path) return obj, metadata
def from_saved(cls, store: pandas.HDFStore) -> None: config = store.get_storer('model').attrs.config model = cls.from_config(config) for i in range(len(model.layers)): key = os.path.join('layers', 'layers_'+str(i)) model.layers[i].load_params(store, key) for i in range(len(model.connections)): key = os.path.join('connections', 'weights_'+str(i)) model.connections[i].weights.load_params(store, key) return model
def save(self, store: pandas.HDFStore) -> None: config = self.get_config() store.put('model', pandas.DataFrame()) store.get_storer('model').attrs.config = config for i in range(self.num_layers): key = os.path.join('layers', 'layers_'+str(i)) self.layers[i].save_params(store, key) for i in range(self.num_connections): key = os.path.join('connections', 'weights_'+str(i)) self.connections[i].weights.save_params(store, key)
def Merge(): if len(sys.argv) < 3: print('No valid filenames given!') print(__pdoc__['Merge']) sys.exit(1) infiles = [glob(f) for f in sys.argv[1:-1]] outfile = sys.argv[-1] LHAPATH = getenv('LHAPATH') if getenv('LHAPATH') else 'results' print("Will concatenate into {}.".format(outfile)) store = HDFStore(outfile) df = DataFrame() store_conf = None for fs in infiles: for f in fs: print('Reading %s ...' % f) tmp_store = HDFStore(f) tmp_conf = tmp_store.get_storer(LHAPATH).attrs.config try: tmp_conf = tmp_store.get_storer(LHAPATH).attrs.config if not store_conf: store_conf = tmp_conf except AttributeError: print('No config attribute found in {}'.format(f)) if 'scatterplot' in store_conf: tmp_conf['scatterplot'] = store_conf['scatterplot'] if store_conf and store_conf != tmp_conf: print('Warning: merge file with different config {}'.format(f)) tmp_df = tmp_store['results'] try: tmp_df['scan_seed'] = tmp_store.get_storer(LHAPATH).attrs.seed tmp_df['scan_parallel'] = tmp_store.get_storer(LHAPATH).attrs.parallel except AttributeError: pass df = df.append(tmp_df, ignore_index=True) tmp_store.close() store[LHAPATH] = df store.get_storer(LHAPATH).attrs.config = store_conf store.close()
def save_xarray_to_HDF5(dataArray, filename, complib=None): """Save the xarray DataArray to HDF file using pandas HDFStore attrs will be saved as metadata via pickle requries pytables complib : {'zlib', 'bzip2', 'lzo', 'blosc', None}, default None""" from pandas import HDFStore f = HDFStore(filename, mode='w', complib=complib) f.put('data', dataArray.to_pandas()) if len(dataArray.attrs) > 0: f.get_storer('data').attrs.metadata = dataArray.attrs f.close()
class Serialization(object): def __init__(self, filename): self._filename = filename def __enter__(self): self._store = HDFStore(self._filename, complib='blosc', complevel=9) return self def __exit__(self, exc_type, exc_val, exc_tb): self._store.close() @property def keys(self): return self._store.keys() def store_pandas_object(self, name, object, **metadata): self._store.put(name, object) self._store.get_storer(name).attrs.metadata = metadata def retrieve_pandas_object(self, name): # Get the metadata metadata = self._store.get_storer(name).attrs.metadata # Get the object obj = self._store[name] return obj, metadata
def Edit(): parser = ArgumentParser(description='Interactively load/edit/save/plot HDF files.') parser.add_argument('files', metavar='h5file.h5', type=str, nargs='+', help='HDF file(s) to edit.') args = parser.parse_args() HDFFILES = [ k for f in args.files for k in glob(f) ] LHAPATH = getenv('LHAPATH') if getenv('LHAPATH') else 'results' store = False DATA = {} header = "Your data files are stored in 'DATA'" for f in HDFFILES: print('Reading %s ...' % f) DATA[f] = read_hdf(f) if len(DATA) == 1: HDFFILE = HDFFILES[0] DATA = DATA[HDFFILE] store = HDFStore(HDFFILE) try: conf = store.get_storer(LHAPATH).attrs.config except: print("no config stored in hdf file") else: header += "and accessible via DATA['path/to/filename.h5']" if len(DATA) == 0: print('No valid data files specified.\n') else: HDFDIR = path.dirname(path.abspath(HDFFILES[0])) + '/' print('Changing working directory to {}.\n'.format(HDFDIR)) chdir(HDFDIR) if ipy: embed(header=header) else: print(header) code.interact(local=locals()) if store: store.close()
def save(self, store: pandas.HDFStore, num_components_save: int = None) -> None: """ Save the PCA transform in an HDFStore. Allows to save only the first num_components_save. Notes: Performs an IO operation. Args: store (pandas.HDFStore) num_components_save (int): the number of principal components to save. If None, all are saved. Returns: None """ n = num_components_save if num_components_save is not None \ else self.num_components assert n <= self.num_components # the config config = {'num_components': n, 'stepsize': self.stepsize} store.put('pca', pandas.DataFrame()) store.get_storer('pca').attrs.config = config # the parameters store.put('pca/W', pandas.DataFrame(be.to_numpy_array(self.W[:, :n]))) store.put('pca/var', pandas.DataFrame(be.to_numpy_array(self.var[:n]))) # check if the mean exists before saving if self.mean is not None: store.put('pca/mean', pandas.DataFrame(be.to_numpy_array(self.mean))) var_calc_df = self.var_calc.to_dataframe() # if fit from SVD, there is no calculator used if var_calc_df is not None: store.put('pca/var_calc', var_calc_df.iloc[:n])
def test_multiple_open_close(setup_path): # gh-4409: open & close multiple times with ensure_clean_path(setup_path) as path: df = tm.makeDataFrame() df.to_hdf(path, "df", mode="w", format="table") # single store = HDFStore(path) assert "CLOSED" not in store.info() assert store.is_open store.close() assert "CLOSED" in store.info() assert not store.is_open with ensure_clean_path(setup_path) as path: if pytables._table_file_open_policy_is_strict: # multiples store1 = HDFStore(path) msg = ( r"The file [\S]* is already opened\. Please close it before " r"reopening in write mode\." ) with pytest.raises(ValueError, match=msg): HDFStore(path) store1.close() else: # multiples store1 = HDFStore(path) store2 = HDFStore(path) assert "CLOSED" not in store1.info() assert "CLOSED" not in store2.info() assert store1.is_open assert store2.is_open store1.close() assert "CLOSED" in store1.info() assert not store1.is_open assert "CLOSED" not in store2.info() assert store2.is_open store2.close() assert "CLOSED" in store1.info() assert "CLOSED" in store2.info() assert not store1.is_open assert not store2.is_open # nested close store = HDFStore(path, mode="w") store.append("df", df) store2 = HDFStore(path) store2.append("df2", df) store2.close() assert "CLOSED" in store2.info() assert not store2.is_open store.close() assert "CLOSED" in store.info() assert not store.is_open # double closing store = HDFStore(path, mode="w") store.append("df", df) store2 = HDFStore(path) store.close() assert "CLOSED" in store.info() assert not store.is_open store2.close() assert "CLOSED" in store2.info() assert not store2.is_open # ops on a closed store with ensure_clean_path(setup_path) as path: df = tm.makeDataFrame() df.to_hdf(path, "df", mode="w", format="table") store = HDFStore(path) store.close() msg = r"[\S]* file is not open!" with pytest.raises(ClosedFileError, match=msg): store.keys() with pytest.raises(ClosedFileError, match=msg): "df" in store with pytest.raises(ClosedFileError, match=msg): len(store) with pytest.raises(ClosedFileError, match=msg): store["df"] with pytest.raises(ClosedFileError, match=msg): store.select("df") with pytest.raises(ClosedFileError, match=msg): store.get("df") with pytest.raises(ClosedFileError, match=msg): store.append("df2", df) with pytest.raises(ClosedFileError, match=msg): store.put("df3", df) with pytest.raises(ClosedFileError, match=msg): store.get_storer("df2") with pytest.raises(ClosedFileError, match=msg): store.remove("df2") with pytest.raises(ClosedFileError, match=msg): store.select("df") msg = "'HDFStore' object has no attribute 'df'" with pytest.raises(AttributeError, match=msg): store.df
def Plot(): """ Basic usage: `PlotLHA --help` Requires a YAML config file that specifies at least the `'scatterplot'` dict with the list '`plots`'. * Automatically uses the `'latex'` attribute of specified LHA blocks for labels. * Fields for x/y/z axes can be specified by either `BLOCKNAME.values.LHAID` or the specified `'parameter'` attribute. * New fields to plot can be computed using existing fields * Optional constraints on the different fields may be specified * Various options can be passed to `matplotlib`s `legend`, `scatter`, `colorbar` functions. * Optional ticks can be set manually. __Example config.yml__ --- scatterplot: conf: datafile: "mssm.h5" newfields: TanBeta: "DATA['HMIX.values.2'].apply(abs).apply(tan)" constraints: - "PDATA['TREELEVELUNITARITYwTRILINEARS.values.1']<0.5" # enforces e.g. unitarity plots: - filename: "mssm_TanBetaMSUSYmH.png" # one scatterplot y-axis: {field: TanBeta, label: '$\\tan\\beta$'} x-axis: field: MSUSY label: "$m_{SUSY}$ (TeV)$" lognorm: True ticks: - [1000,2000,3000,4000] - ['$1$','$2','$3','$4$'] z-axis: field: MASS.values.25 colorbar: True label: "$m_h$ (GeV)" alpha: 0.8 textbox: {x: 0.9, y: 0.3, text: 'some info'} - filename: "mssm_mhiggs.png" # multiple lines in one plot with legend constraints: [] # ignore all global constraints x-axis: field: MSUSY, label: 'Massparameter (GeV)' y-axis: lognorm: True, label: '$m_{SUSY}$ (GeV)' plots: - y-axis: MASS.values.25 color: red label: '$m_{h_1}$' - y-axis: MASS.values.26 color: green label: '$m_{h_2}$' - y-axis: MASS.values.35 color: blue label: '$m_{A}$' """ parser = ArgumentParser(description='Plot ScanLHA results.') parser.add_argument( "config", type=str, help= "path to YAML file config.yml containing the plot (and optional scan) config." ) parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) if args.verbose: logging.getLogger().setLevel(logging.DEBUG) c = Config(args.config) DIR = os.path.dirname(os.path.abspath(args.config)) + '/' if 'scatterplot' not in c: logging.error('config file must contain "scatterplot" dict.') exit(1) if 'plots' not in c['scatterplot']: logging.error('no plots to plot') exit(1) conf = PlotConf() conf = conf.new_child(c['scatterplot'].get('conf', {})) if not os.path.isfile(conf['datafile']): logging.error('Data file {} does not exist.'.format(conf['datafile'])) exit(1) store = HDFStore(conf['datafile']) path = 'results' # TODO DATA = store[path] attrs = store.get_storer(path).attrs if hasattr(attrs, 'config') and conf.get('conf_overwrite', False): attrs.config['scatterplot'] = {} c.append(attrs.config) if not DATA.empty and 'newfields' in conf: for field, expr in conf['newfields'].items(): logging.debug("executing DATA[{}] = {}]".format(field, expr)) DATA[field] = eval(expr) logging.debug("done.") pcount = 0 for p in c['scatterplot']['plots']: lcount = 0 pconf = conf.new_child(p) plt.cla() plt.clf() plt.rcParams.update(pconf['rcParams']) if pconf['fontsize'] != conf['fontsize']: plt.rcParams.update({'font.size': pconf['fontsize']}) if pconf['colorbar_only']: plt.figure(figsize=(8, 0.25)) ax = plt.gca() norm = Normalize(vmin=pconf['z-axis']['vmin'], vmax=pconf['z-axis']['vmax']) if pconf['z-axis']['lognorm']: norm = LogNorm(vmin=pconf['z-axis']['vmin'], vmax=pconf['z-axis']['vmax']) cbar = ColorbarBase( ax, norm=norm, cmap=pconf['cmap'], orientation=pconf['z-axis']['colorbar_orientation']) if pconf['z-axis']['colorbar_orientation'] == 'horizontal': ax.xaxis.set_label_position('top') ax.xaxis.set_ticks_position('top') if pconf['z-axis']['label']: cbar.set_label(pconf['z-axis']['label']) if pconf['z-axis']['ticks']: cbar.set_ticks(pconf['z-axis']['ticks']) plt.savefig(pconf['filename'], bbox_inches='tight') plt.figure() continue if pconf['title']: plt.title(conf['title']) if 'plots' not in p: p['plots'] = [p] for l in p['plots']: lconf = pconf.new_child(l) label = lconf['label'] label = label if label else None cmap = lconf['cmap'] zorder = lconf.get('zorder', lcount) color = lconf.get('color', "C{}".format(lcount)) x = lconf.get('x-field', lconf['x-axis'].get('field', None)) y = lconf.get('y-field', lconf['y-axis'].get('field', None)) z = lconf.get('z-field', lconf['z-axis'].get('field', None)) xlabel = lconf['x-axis']['label'] ylabel = lconf['y-axis']['label'] zlabel = lconf['z-axis']['label'] if hasattr(c, 'parameters'): xlabel = c.parameters.get( x, {'latex': xlabel})['latex'] if not xlabel else xlabel ylabel = c.parameters.get( y, {'latex': ylabel})['latex'] if not ylabel else ylabel zlabel = c.parameters.get( z, {'latex': zlabel})['latex'] if not zlabel else zlabel if xlabel: plt.xlabel(xlabel) if ylabel: plt.ylabel(ylabel) if lconf['hline']: plt.axhline(y=y, color=color, linestyle='-', lw=lconf['lw'], label=label, zorder=zorder, alpha=lconf['alpha']) continue if lconf['vline']: plt.axvline(x=x, color=color, linestyle='-', lw=lconf['lw'], label=label, zorder=zorder, alpha=lconf['alpha']) continue if hasattr(c, 'parameters'): x = c.parameters.get(x, {'lha': x})['lha'] y = c.parameters.get(y, {'lha': y})['lha'] z = c.parameters.get(z, {'lha': z})['lha'] PDATA = DATA if (lconf['datafile'] and lconf['datafile'] != conf['datafile']): conf['datafile'] = lconf['datafile'] # TODO DATA = HDFStore(lconf['datafile'])['results'] # TODO PDATA = DATA if not PDATA.empty and 'newfields' in conf: for field, expr in conf['newfields'].items(): logging.debug("executing PATA[{}] = {}]".format( field, expr)) PDATA[field] = eval(expr) logging.debug("done.") for ax, field in {'x-axis': x, 'y-axis': y, 'z-axis': z}.items(): bounds = lconf[ax]['boundaries'] if len(bounds) == 2: PDATA = PDATA[(PDATA[field] >= bounds[0]) & (PDATA[field] <= bounds[1])] for constr in lconf['constraints']: PDATA = PDATA[eval(constr)] if lconf['x-axis']['lognorm']: plt.xscale('log') if lconf['y-axis']['lognorm']: plt.yscale('log') if z: color = PDATA[z] vmin = PDATA[z].min( ) if not lconf['z-axis']['vmin'] else lconf['z-axis']['vmin'] vmax = PDATA[z].max( ) if not lconf['z-axis']['vmax'] else lconf['z-axis']['vmax'] else: vmin = None vmax = None znorm = LogNorm(vmin=vmin, vmax=vmax) if lconf['z-axis']['lognorm'] else None cs = plt.scatter(PDATA[x], PDATA[y], zorder=zorder, label=label, cmap=cmap, c=color, vmin=vmin, vmax=vmax, norm=znorm, s=lconf['s'], alpha=lconf['alpha']) plt.margins(x=0.01, y=0.01) # TODO if lconf['x-axis']['ticks']: plt.xticks(lconf['x-axis']['ticks'][0], lconf['x-axis']['ticks'][1]) if lconf['y-axis']['ticks']: plt.yticks(lconf['y-axis']['ticks'][0], lconf['y-axis']['ticks'][1]) if lconf['z-axis']['colorbar']: cbar = plt.colorbar( cs, orientation=lconf['z-axis']['colorbar_orientation']) if zlabel: cbar.set_label(zlabel) if lconf['z-axis']['ticks']: cbar.set_ticks(lconf['z-axis']['ticks']) lcount += 1 if any([l.get('label', False) for l in p['plots']]): plt.legend(**pconf['legend']) if pconf['textbox'] and 'text' in pconf['textbox']: bbox = pconf['textbox'].get( 'bbox', dict(boxstyle='round', facecolor='white', alpha=0.2)) va = pconf['textbox'].get('va', 'top') ha = pconf['textbox'].get('ha', 'left') textsize = pconf['textbox'].get( 'fontsize', pconf['rcParams'].get('font.size', 15)) xtext = pconf['textbox'].get('x', 0.95) ytext = pconf['textbox'].get('y', 0.85) plt.gcf().text(xtext, ytext, pconf['textbox']['text'], fontsize=textsize, va=va, ha=ha, bbox=bbox) plotfile = DIR + p.get('filename', 'plot{}.png'.format(pcount)) logging.info("Saving {}.".format(plotfile)) plt.savefig(plotfile, bbox_inches="tight", dpi=pconf['dpi']) pcount += 1 store.close()
def Plot(): global PDATA, DATA, c, conf, logging, args, path, DIR, store """ Basic usage: `PlotLHA --help` Requires a YAML config file that specifies at least the `'scatterplot'` dict with the list '`plots`'. * Automatically uses the `'latex'` attribute of specified LHA blocks for labels. * Fields for x/y/z axes can be specified by either `BLOCKNAME.values.LHAID` or the specified `'parameter'` attribute. * New fields to plot can be computed using existing fields * Optional constraints on the different fields may be specified * Various options can be passed to `matplotlib`s `legend`, `scatter`, `colorbar` functions. * Optional ticks can be set manually. __Example config.yml__ --- scatterplot: conf: datafile: "mssm.h5" newfields: TanBeta: "DATA['HMIX.values.2'].apply(abs).apply(tan)" constraints: - "PDATA['TREELEVELUNITARITYwTRILINEARS.values.1']<0.5" # enforces e.g. unitarity plots: - filename: "mssm_TanBetaMSUSYmH.png" # one scatterplot y-axis: {field: TanBeta, label: '$\\tan\\beta$'} x-axis: field: MSUSY label: "$m_{SUSY}$ (TeV)$" lognorm: True ticks: - [1000,2000,3000,4000] - ['$1$','$2','$3','$4$'] z-axis: field: MASS.values.25 colorbar: True label: "$m_h$ (GeV)" alpha: 0.8 textbox: {x: 0.9, y: 0.3, text: 'some info'} - filename: "mssm_mhiggs.png" # multiple lines in one plot with legend constraints: [] # ignore all global constraints x-axis: field: MSUSY, label: 'Massparameter (GeV)' y-axis: lognorm: True, label: '$m_{SUSY}$ (GeV)' plots: - y-axis: MASS.values.25 color: red label: '$m_{h_1}$' - y-axis: MASS.values.26 color: green label: '$m_{h_2}$' - y-axis: MASS.values.35 color: blue label: '$m_{A}$' """ parser = ArgumentParser(description='Plot ScanLHA results.') parser.add_argument( "config", type=str, help= "path to YAML file config.yml containing the plot (and optional scan) config." ) parser.add_argument( "-i", "--interactive", action="store_true", help= "opens interactive plot environment with IPython: plot using the 'plot()' function" ) parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) if args.verbose: logging.getLogger().setLevel(logging.DEBUG) c = Config(args.config) DIR = os.path.dirname(os.path.abspath(args.config)) + '/' if 'scatterplot' not in c: logging.error('config file must contain "scatterplot" dict.') exit(1) if 'plots' not in c['scatterplot']: logging.error('no plots to plot') exit(1) conf = PlotConf() conf = conf.new_child(c['scatterplot'].get('conf', {})) if not os.path.isfile(conf['datafile']): logging.error('Data file {} does not exist.'.format(conf['datafile'])) exit(1) store = HDFStore(conf['datafile']) path = 'results' # TODO DATA = store[path] attrs = store.get_storer(path).attrs if hasattr(attrs, 'config') and conf.get('conf_overwrite', False): attrs.config['scatterplot'] = {} c.append(attrs.config) if (args.interactive): embed() else: plot() store.close()
import save_dist_m import traintest as tt import plot_results df = gdb.get_current_data() df.to_csv("per_day_from_pandas.csv") #df = pd.read_csv("per_day_from_pandas.csv") preprocess.normalize_by_event_count(df) #hdf5 doesn't like unicode df['country'] = df['country'].apply(lambda x: x.encode('ascii', 'ignore')) countrydict = preprocess.get_country_lookup(df) hdf = HDFStore('project_data.h5') hdf.put('per_day_preprocessed', df, format='table', data_columns=True) hdf.get_storer('per_day_preprocessed').attrs.country_lookup = countrydict ##END PREPROCESSING train_years = 5 test_years = 1 hdf = HDFStore('project_data.h5') df = hdf['per_day_preprocessed'] basename_out = "last_6_years" train_start = 20091030 trainxy, testxy, countrylist = gtt.get_train_test(df, train_start, train_years, test_years) train_x = trainxy[0] train_y = trainxy[1] test_x = testxy[0] test_y = testxy[1] np.savez("train_test_" + basename_out + ".npz", train_x = train_x, train_y = train_y, test_x = test_x, test_y = test_y, countrylist=countrylist)
import save_dist_m import traintest as tt import plot_results df = gdb.get_current_data() df.to_csv("per_day_from_pandas.csv") # df = pd.read_csv("per_day_from_pandas.csv") preprocess.normalize_by_event_count(df) # hdf5 doesn't like unicode df["country"] = df["country"].apply(lambda x: x.encode("ascii", "ignore")) countrydict = preprocess.get_country_lookup(df) hdf = HDFStore("project_data.h5") hdf.put("per_day_preprocessed", df, format="table", data_columns=True) hdf.get_storer("per_day_preprocessed").attrs.country_lookup = countrydict ##END PREPROCESSING train_years = 5 test_years = 1 hdf = HDFStore("project_data.h5") df = hdf["per_day_preprocessed"] basename_out = "last_6_years" train_start = 20091030 trainxy, testxy, countrylist = gtt.get_train_test(df, train_start, train_years, test_years) train_x = trainxy[0] train_y = trainxy[1] test_x = testxy[0] test_y = testxy[1] np.savez( "train_test_" + basename_out + ".npz",
''' Returns the settings used to run an interactive TPCF Usages: python tpcfSettings.py <tpcf filename> ''' from __future__ import print_function from pandas import HDFStore import pprint import sys if len(sys.argv) != 2: print('\nUsage: python tpcfSettings.py <tpcf filename>\n') sys.exit() fname = sys.argv[1] store = HDFStore(fname) attrs = store.get_storer('data').attrs.metadata pp = pprint.PrettyPrinter() pp.pprint(attrs) store.close()