def readdata_nosplit_scaled_subject(input_size, subjects, feature): import deepdish.io as ddio mkdir_recursive('dataset') trainData = ddio.load('dataset/targetdata_scaled.hdf5') testlabelData = ddio.load('dataset/labeldata_scaled.hdf5') indexData = ddio.load('dataset/index_scaled.hdf5') X = np.float32(trainData[feature]) y = np.float32(testlabelData[feature]) att = np.concatenate((X, y), axis=1) #np.random.shuffle(att) X, y = att[:, :input_size], att[:, input_size:] subjectLabel = (np.array(pd.DataFrame(indexData)[1])) print("==============") print(subjectLabel) nums = [ '100', '101', '103', '105', '106', '107', '108', '109', '111', '112', '113', '115', '116', '117', '118', '119', '121', '122', '123', '124', '200', '201', '202', '203', '205', '207', '208', '209', '210', '212', '213', '214', '215', '217', '219', '220', '221', '222', '223', '228', '230', '231', '232', '233', '234' ] num_index = 0 group = [] for x in subjectLabel: for beat in range(x): group.append(nums[num_index]) num_index += 1 #group = np.array(group) return (X, y, group)
def parse_kmap(self, key='coeffs'): """ Retrieve the parameters to construct the momentum conversion function. """ self.parse_bfile() self.fr, self.fc = dio.load(self.kfile)['calibration'][key] self.xcent, self.ycent = dio.load(self.kfile)['pcent']
def readdata_nosplit(input_size, feature): import deepdish.io as ddio mkdir_recursive('dataset') trainData = ddio.load('dataset/targetdata.hdf5') testlabelData = ddio.load('dataset/labeldata.hdf5') X = np.float32(trainData[feature]) y = np.float32(testlabelData[feature]) att = np.concatenate((X, y), axis=1) np.random.shuffle(att) X, y = att[:, :input_size], att[:, input_size:] return (X, y)
def loaddata(input_size, feature): import deepdish.io as ddio mkdir_recursive('dataset') trainData = ddio.load('dataset/train.hdf5') testlabelData = ddio.load('dataset/trainlabel.hdf5') X = np.float32(trainData[feature]) y = np.float32(testlabelData[feature]) att = np.concatenate((X, y), axis=1) np.random.shuffle(att) X, y = att[:, :input_size], att[:, input_size:] valData = ddio.load('dataset/test.hdf5') vallabelData = ddio.load('dataset/testlabel.hdf5') Xval = np.float32(valData[feature]) yval = np.float32(vallabelData[feature]) return (X, y, Xval, yval)
def compute_roc_auc_from_sim(category, path_sim_matrix, is_quiet=False): """ Args: category: category name path_sim_matrix: path to the similarity matrix is_quiet: if False output extra information Returns: roc_auc: average ROC AUC for all labeled anchors. """ if not is_quiet: print 'Sim matrix path:', path_sim_matrix try: sim = scipy.io.loadmat(path_sim_matrix) except NotImplementedError: # matlab v7.3 file sim = dio.load(path_sim_matrix) labels_path = join(HDF5_LABELS_DIR, 'labels_{}.hdf5'.format(category)) with h5py.File(labels_path, mode='r') as f: d = covert_labels_to_dict(f) roc_auc, roc_auc_list = compute_roc(d, sim) print '{} n_acnhors: {} ROC_AUC: {:.3f}'.format(category, len(d['anchors']), roc_auc) return roc_auc
def loaddata_LOGO(input_size, feature): import deepdish.io as ddio mkdir_recursive('dataset') trainData = ddio.load('dataset/targetdata.hdf5') testlabelData = ddio.load('dataset/labeldata.hdf5') indexData = ddio.load('dataset/index.hdf5') X = np.float32(trainData[feature]) y = np.float32(testlabelData[feature]) att = np.concatenate((X, y), axis=1) np.random.shuffle(att) X, y = att[:, :input_size], att[:, input_size:] import pandas as pd subjectLabel = (np.array(pd.DataFrame(indexData)[1])) group = [] for x in subjectLabel: for beat in range(x): group.append(x) group = np.array(group) return (X, y, group)
def read_saved_state(self, continuing=False): """ Read a saved state of the sampler to disk. The required information to reconstruct the state of the run is read from an hdf5 file. This currently adds the whole chain to the sampler. We then remove the old checkpoint and write all unnecessary items back to disk. FIXME: Load only the necessary quantities, rather than read/write? Parameters ---------- sampler: `dynesty.NestedSampler` NestedSampler instance to reconstruct from the saved state. continuing: bool Whether the run is continuing or terminating, if True, the loaded state is mostly written back to disk. """ resume_file = '{}/{}_resume.h5'.format(self.outdir, self.label) if os.path.isfile(resume_file): saved = load(resume_file) self.sampler.saved_u = list(saved['unit_cube_samples']) self.sampler.saved_v = list(saved['physical_samples']) self.sampler.saved_logl = list(saved['sample_likelihoods']) self.sampler.saved_logvol = list(saved['sample_log_volume']) self.sampler.saved_logwt = list(saved['sample_log_weights']) self.sampler.saved_logz = list(saved['cumulative_log_evidence']) self.sampler.saved_logzvar = list( saved['cumulative_log_evidence_error']) self.sampler.saved_id = list(saved['id']) self.sampler.saved_it = list(saved['it']) self.sampler.saved_nc = list(saved['nc']) self.sampler.saved_boundidx = list(saved['boundidx']) self.sampler.saved_bounditer = list(saved['bounditer']) self.sampler.saved_scale = list(saved['scale']) self.sampler.saved_h = list(saved['cumulative_information']) self.sampler.ncall = saved['ncall'] self.sampler.live_logl = list(saved['live_logl']) self.sampler.it = saved['iteration'] + 1 self.sampler.live_u = saved['live_u'] self.sampler.live_v = saved['live_v'] self.sampler.nlive = saved['nlive'] self.sampler.live_bound = saved['live_bound'] self.sampler.live_it = saved['live_it'] self.sampler.added_live = saved['added_live'] self._remove_checkpoint() if continuing: self.write_current_state() return True else: return False
def load(cls, path): if path is None: return cls.load_from_dict({}) else: d = io.load(path) # Check class type class_name = d.get('name') if class_name is not None: return cls.getclass(class_name).load_from_dict(d) else: return cls.load_from_dict(d)
def load(cls, path): if path is None: return cls.load_from_dict({}) else: d = io.load(path) # Check class type class_name = d.get("name") if class_name is not None: return cls.getclass(class_name).load_from_dict(d) else: return cls.load_from_dict(d)
def existing_file_background(filepath): """ Returns a numpy array from an image stored at filepath """ if filepath.endswith(".h5"): return dio.load(filepath) else: # If using OpenCV, we have to get RGB, not BGR try: return cv2.imread(filepath)[:, :, [2, 1, 0]] except TypeError: log = logging.getLogger() log.info("Could nor load " + filepath) return np.zeros((10, 10), dtype=np.uint8)
def readdata_nosplit(input_size, feature): import deepdish.io as ddio mkdir_recursive('dataset') #trainData = ddio.load('dataset/targetdata_std.hdf5') #testlabelData= ddio.load('dataset/labeldata_std.hdf5') #indexData= ddio.load('dataset/index_std.hdf5') trainData = ddio.load('dataset/targetdata_debug.hdf5') testlabelData= ddio.load('dataset/labeldata_debug.hdf5') indexData= ddio.load('dataset/index_debug.hdf5') X = np.float32(trainData[feature]) y = np.float32(testlabelData[feature]) att = np.concatenate((X,y), axis=1) np.random.shuffle(att) X , y = att[:,:input_size], att[:, input_size:] subjectLabel = (np.array(pd.DataFrame(indexData)[1])) group = [] for x in subjectLabel: for beat in range(x): group.append(x) group = np.array(group) print(np.unique(group, return_counts = True)) return (X, y, group)
def test(): ''' ''' data = io.load(open('test_data.h5', 'rb')) #data = remove_tau(data) # -- Load scikit classifier classifier = joblib.load('sklBDT_trk2.pkl') # -- Get classifier predictions yhat = classifier.predict_proba(data['X'])[:, 2] io.save(open('yhat_test.h5', 'wb'), yhat)
def loaddata(input_size, feature): import deepdish.io as ddio mkdir_recursive('dataset') data = ddio.load('dataset/targetdata.hdf5') label = ddio.load('dataset/labeldata.hdf5') X = np.float32(data[feature]) y = np.float32(label[feature]) att = np.concatenate((X, y), axis=1) np.random.shuffle(att) X, y = att[:, :input_size], att[:, input_size:] from sklearn.model_selection import train_test_split X, Xval, y, yval = train_test_split(X, y, test_size=0.3, random_state=1) # trainData = ddio.load('dataset/train.hdf5') # testlabelData= ddio.load('dataset/trainlabel.hdf5') # X = np.float32(trainData[feature]) # y = np.float32(testlabelData[feature]) # att = np.concatenate((X,y), axis=1) # np.random.shuffle(att) # X , y = att[:,:input_size], att[:, input_size:] return (X, y, Xval, yval)
def __init__(self, Ps, Es, Vs=None): if type(Ps) == str: self.Psfname = Ps self.p = dd.load(Ps) else: self.Psfname = None self.p = Ps self.setup = Es self.setup['nvar'] = 2 # self.Vs=Vs self.verbose = Es['verbose'] if self.verbose: start = time.time() self.set_equations() self.dt = Es['dt'] self.time_elapsed = 0 if self.setup['setPDE']: self.rhs = self.rhs_pde self.p['nd'] = len(Es['n']) if self.p['nd'] == 2: self.p['nx'], self.p['ny'] = Es['n'] self.p['lx'], self.p['ly'] = Es['l'] self.l = [self.p['lx'], self.p['ly']] self.n = [self.p['nx'], self.p['ny']] self.dg = tuple([l / float(n) for l, n in zip(self.l, self.n)]) self.dx = self.dg[0] elif self.p['nd'] == 1: self.dg = [Es['l'][0] / float(Es['n'][0])] self.dx = self.dg[0] self.dx2 = self.dx**2 self.dt = Es['dt'] * self.dx2 / self.p['delta_s'] self.X = np.linspace(0, Es['l'][0], Es['n'][0]) from utilities.laplacian_sparse import create_laplacian #,create_gradient self.lapmat = create_laplacian( self.setup['n'], self.setup['l'], self.setup['bc'], [1.0, self.p['delta_s'], self.p['delta_s']], verbose=self.verbose) # self.gradmat=create_gradient(self.setup['n'],self.setup['l'], self.setup['bc'] , [1.0,self.p['Dw'],self.p['Dh']]) if self.verbose: print("Laplacian created") else: self.rhs = self.rhs_ode self.set_integrator() if Vs is not None: self.setup_initial_condition(Vs) if self.verbose: print("Time to setup: ", time.time() - start)
def batch(paths, iptagger, batch_size, random=True): while True: if random: np.random.shuffle(paths) for fp in paths: d = io.load(fp) X = np.concatenate([d['X'], d[iptagger + '_vars']], axis=1) le = LabelEncoder() y = le.fit_transform(d['y']) w = d['w'] if random: ix = range(X.shape[0]) np.random.shuffle(ix) X, y, w = X[ix], y[ix], w[ix] for i in xrange(int(np.ceil(X.shape[0] / float(batch_size)))): yield X[(i * batch_size):((i+1)*batch_size)], y[(i * batch_size):((i+1)*batch_size)], w[(i * batch_size):((i+1)*batch_size)]
def parse_bfile(self): """ Retrieve the binning parameters. """ binDict = dio.load(self.bfile) binaxes = list( map(lambda x: x.decode('utf-8'), binDict['binaxes'].tolist())) binranges = binDict['binranges'].tolist() binsteps = binDict['binsteps'].tolist() # Retrieve the binning steps along X and Y axes self.xstep = self.listfind(binaxes, 'X', binsteps) self.ystep = self.listfind(binaxes, 'Y', binsteps) # Retrieve the binning ranges (br) along X and Y axes self.xbr_start, self.xbr_end = self.listfind(binaxes, 'X', binranges) self.ybr_start, self.ybr_end = self.listfind(binaxes, 'Y', binranges)
def __init__(self,Ps,Es,Vs=None): if type(Ps)==str: self.Psfname=Ps if Ps.endswith('csv'): import pandas as pd self.p = pd.read_csv(Ps, index_col=0, squeeze=True).to_dict() elif Ps.endwith('hdf'): self.p=dd.load(Ps) else: self.Psfname=None self.p = Ps self.setup=Es # self.Vs=Vs self.verbose=Es['verbose'] if self.verbose: start=time.time() self.set_equations() self.dt = 0.1 self.time_elapsed = 0 if self.setup['setPDE']: self.p['nd']=len(Es['n']) if self.p['nd']==2: self.p['nx'],self.p['ny']=Es['n'] self.p['lx'],self.p['ly']=Es['l'] self.l=[self.p['lx'],self.p['ly']] self.n=[self.p['nx'],self.p['ny']] self.dg = tuple([l/float(n) for l,n in zip(self.l,self.n)]) self.dx = self.dg[0] elif self.p['nd']==1: self.dg=[Es['l'][0]/float(Es['n'][0])] self.dx=self.dg[0] self.dx2 = self.dx**2 self.dt=Es['dt']*self.dx2 / np.amax(self.diffusion_coeffs) self.X = np.linspace(0,Es['l'][0],Es['n'][0]) from utilities.laplacian_sparse import create_laplacian #,create_gradient self.lapmat=create_laplacian(self.setup['n'],self.setup['l'], self.setup['bc'] , self.diffusion_coeffs,verbose=self.verbose) # self.gradmat=create_gradient(self.setup['n'],self.setup['l'], self.setup['bc'] , [1.0,self.p['Dw'],self.p['Dh']]) self.set_integrator() if self.verbose: print("Laplacian created") if Vs is not None: self.setup_initial_condition(Vs) if self.verbose: print("Time to setup: ",time.time()-start)
def main(): opts = parse_options() inFile = opts.inputFile tree = opts.treeName df = root2pandas(inFile, tree) # -- save a pandas df to hdf5 (better to first convert it back to ndarray, to be fair) import deepdish.io as io outFile = inFile.replace(".root", ".h5") io.save(outFile, df) # -- let's load it back in to make sure it actually worked! new_df = io.load(outFile) # -- check the shape again -- nice check to run every time you create a df print "File check!" print "(Number of events, Number of branches): ", new_df.shape
def integrate_from_steady_state(init_cond, alpha, Tmax, ito, idx_finish, step, version): Ps = dd.load(Ps_normal) Ps['Tmax'] = Tmax Ps['alpha'] = 0.0 Es = Es_normal.copy() Es['rhs'] = version m = BenincaModel(Es=Es, Ps=Ps, Vs=None) if init_cond == 0: init_cond = calc_for_constant(m) elif init_cond == 1: init_cond = np.array([0.103, 0.019, 0.033, 0.040]) elif init_cond == 2: init_cond = np.array([0.8, 0.1, 0.05, 0.1]) print("Initial condition:", init_cond) print("Integrating with SDEINT") tspan, result, forcing = calc_for_oscillation_with_Ito( m, init_cond, alpha, Tmax, ito, idx_finish, step) forcing_tspan = m.Ft(tspan) return tspan, result, forcing, forcing_tspan
def batch(paths, iptagger, batch_size, random=True): while True: if random: np.random.shuffle(paths) for fp in paths: d = io.load(fp) X = np.concatenate([d['X'], d[iptagger + '_vars']], axis=1) le = LabelEncoder() y = le.fit_transform(d['y']) w = d['w'] if random: ix = range(X.shape[0]) np.random.shuffle(ix) X, y, w = X[ix], y[ix], w[ix] for i in xrange(int(np.ceil(X.shape[0] / float(batch_size)))): yield X[(i * batch_size):((i + 1) * batch_size)], y[( i * batch_size):((i + 1) * batch_size)], w[(i * batch_size):((i + 1) * batch_size)]
def train(): ''' ''' data = io.load(open('train_data.h5', 'rb')) #data = remove_tau(data) if CROSS_VAL: param_grid = {'n_estimators':[50, 100], 'max_depth':[3, 5, 10], 'min_samples_split':[2, 5]} fit_params = { 'sample_weight' : data['w'], } metaclassifier = GridSearchCV(GradientBoostingClassifier(), param_grid=param_grid, fit_params=fit_params, \ cv=2, n_jobs=4, verbose=2)#, scoring=roc_score) metaclassifier.fit(data['X'], data['y']) classifier = metaclassifier.best_estimator_ print 'Best classifier:', metaclassifier.best_params_ else: classifier = GradientBoostingClassifier(n_estimators=200, min_samples_split=2, max_depth=10, verbose=1) classifier.fit(data['X'], data['y'], sample_weight=data['w']) joblib.dump(classifier, 'sklBDT_trk2.pkl', protocol=cPickle.HIGHEST_PROTOCOL)
def load(cls, path): """ Loads an instance of the class from a file. Parameters ---------- path : str Path to an HDF5 file. Examples -------- This is an abstract data type, but let us say that ``Foo`` inherits from ``Saveable``. To construct an object of this class from a file, we do: >>> foo = Foo.load('foo.h5') #doctest: +SKIP """ if path is None: return cls.load_from_dict({}) else: d = io.load(path) return cls.load_from_dict(d)
def main(embed_size, normed, input_id, run_name): configure_logging() logger = logging.getLogger("RNNIP Training") logger.info("Loading hdf5's") test_dict = io.load(os.path.join('data', 'test_dict_' + input_id + '.h5')) train_dict = io.load(os.path.join('data', 'train_dict_' + input_id + '.h5')) X_train_stream0 = train_dict['grade'] X_train_stream1 = train_dict['X'] y_train = train_dict['y'] X_test_stream0 = test_dict['grade'] X_test_stream1 = test_dict['X'] y_test = test_dict['y'] ip3d = test_dict['ip3d'] logger.info('Building model') model = build_model(X_train_stream0, X_train_stream1, embed_size, normed) model.summary() logger.info('Compiling model') model.compile('adam', 'categorical_crossentropy', metrics=['accuracy']) #-- if the pre-trained model exists, load it in, otherwise start from scratch safe_mkdir('weights') weights_file = os.path.join('weights', 'rnnip_' + run_name + '.h5') try: model.load_weights(weights_file) logger.info('Loaded pre-trained model from ' + weights_file) except IOError: logger.info('No pre-trained model found in ' + weights_file) logger.info('Training:') try: model.fit([X_train_stream0, X_train_stream1], y_train, batch_size=512, callbacks=[ EarlyStopping(verbose=True, patience=20, monitor='val_loss'), ModelCheckpoint(weights_file, monitor='val_loss', verbose=True, save_best_only=True) ], epochs=300, validation_split=0.2) except KeyboardInterrupt: logger.info('Training ended early.') # -- load in best network logger.info('Loading best epoch') model.load_weights(weights_file) json_string = model.to_json() safe_mkdir('json_models') open(os.path.join('json_models', run_name + '.json'), 'w').write(json_string) logger.info('Testing') safe_mkdir('predictions') yhat = model.predict([X_test_stream0, X_test_stream1], verbose=True, batch_size=10000) io.save(os.path.join('predictions', 'yhat' + run_name + '.h5'), yhat) logger.info('Plotting ROC') plot_ROC(y_test, yhat, ip3d, run_name)
def _fit_and_score_ckpt(workdir=None, checkpoint=True, force_refresh=False, **fit_and_score_kwargs): """Fit estimator and compute scores for a given dataset split. This function wraps :func:`sklearn:sklearn.model_selection._validation._fit_and_score`, while also saving checkpoint files containing the estimator, paramters, This is useful if fitting and scoring is costly or if it is being performed within a large cross-validation experiment. In avoid collisions with scores computed for other CV splits, this function computes a hash from a nested dictionary containing all keyword arguments as well as estimator parameters. It then saves the scores and parameters in <hash>_params.h5 and the estimator itself in <hash>_estimator.pkl Parameters ---------- workdir : path-like object, default=None A string or :term:`python:path-like-object` indicating the directory in which to store checkpoint files checkpoint : bool, default=True If True, checkpoint the parameters, estimators, and scores. force_refresh : bool, default=False If True, recompute scores even if the checkpoint file already exists. Otherwise, load scores from checkpoint files and return. **fit_and_score_kwargs : kwargs Key-word arguments passed to :func:`sklearn:sklearn.model_selection._validation._fit_and_score` Returns ------- train_scores : dict of scorer name -> float Score on training set (for all the scorers), returned only if `return_train_score` is `True`. test_scores : dict of scorer name -> float Score on testing set (for all the scorers). n_test_samples : int Number of test samples. fit_time : float Time spent for fitting in seconds. score_time : float Time spent for scoring in seconds. parameters : dict or None The parameters that have been evaluated. estimator : estimator object The fitted estimator """ if not checkpoint: return _fit_and_score(**fit_and_score_kwargs) if workdir is None: raise ValueError( "If checkpoint is True, you must supply a working directory " "through the ``workdir`` argument.") estimator = fit_and_score_kwargs.pop("estimator", None) estimator_params = _serialize_estimator_params(estimator.get_params()) all_params = { "estimator_params": estimator_params, "fit_and_score_kwargs": fit_and_score_kwargs, } cv_hash = hashlib.md5( json.dumps(all_params, sort_keys=True, ensure_ascii=True, default=str).encode()).hexdigest() h5_file = os.path.join(workdir, cv_hash + "_params.h5") pkl_file = os.path.join(workdir, cv_hash + "_estimator.pkl") if not force_refresh and os.path.exists(h5_file): ckpt_dict = ddio.load(h5_file) scores = ckpt_dict["scores"] if fit_and_score_kwargs.get("return_estimator", False): with open(pkl_file, "rb") as fp: estimator = pickle.load(fp) scores.append(estimator) return scores else: scores = _fit_and_score(estimator, **fit_and_score_kwargs) os.makedirs(workdir, exist_ok=True) if fit_and_score_kwargs.get("return_estimator", False): estimator = scores[-1] with open(pkl_file, "wb") as fp: pickle.dump(estimator, fp) ckpt_scores = scores[:-1] if isinstance(estimator, Pipeline): model = estimator.steps[-1] else: model = estimator estimator_params = _serialize_estimator_params( estimator.get_params()) fitted_params = { "alpha_": getattr(model, "alpha_", None), "alphas_": getattr(model, "alphas_", None), "l1_ratio_": getattr(model, "l1_ratio_", None), "mse_path_": getattr(model, "mse_path_", None), "scoring_path_": getattr(model, "scoring_path_", None), "intercept_": getattr(model, "intercept_", None), "coef_": getattr(model, "coef_", None), } else: estimator_params = None fitted_params = None ckpt_scores = scores fit_and_score_kwargs.pop("X") fit_and_score_kwargs.pop("y") if "scorer" in fit_and_score_kwargs: fit_and_score_kwargs["scorer"] = list( fit_and_score_kwargs["scorer"].keys()) ckpt_dict = { "scores": ckpt_scores, "fit_and_score_kwargs": fit_and_score_kwargs, "estimator_params": estimator_params, "fitted_params": fitted_params, } ddio.save(h5_file, ckpt_dict) return scores
def main(): import argparse parser = argparse.ArgumentParser( description=("Look inside HDF5 files. Works particularly well " "for HDF5 files saved with deepdish.io.save()."), prog='ddls', epilog='example: ddls test.h5 -i /foo/bar --ipython') parser.add_argument('file', nargs='+', help='filename of HDF5 file') parser.add_argument('-d', '--depth', type=int, default=4, help='max depth, defaults to 4') parser.add_argument('-nc', '--no-color', action='store_true', help='turn off bash colors') parser.add_argument('-i', '--inspect', metavar='GRP', help='prints a specific variable (e.g. /data)') parser.add_argument('--ipython', action='store_true', help=('loads file into an IPython session.' 'Works with -i')) parser.add_argument('--raw', action='store_true', help=('prints the raw HDF5 structure for complex ' 'data types, such as sparse matrices and pandas ' 'data frames')) parser.add_argument( '-f', '--filter', type=str, help=('Print only entries that match this regular expression')) parser.add_argument('-l', '--leaves-only', action='store_true', help=('Only print leaves')) parser.add_argument('-s', '--summarize', action='store_true', help=('Print summary statistics of numpy arrays')) parser.add_argument('-c', '--compression', action='store_true', help=('Print compression method for each array')) parser.add_argument('-v', '--version', action='version', version='deepdish {} (io protocol {})'.format( __version__, IO_VERSION)) args = parser.parse_args() colorize = sys.stdout.isatty() and not args.no_color settings = {} if args.filter: settings['filter'] = args.filter if args.leaves_only: settings['leaves-only'] = True if args.summarize: settings['summarize'] = True if args.compression: settings['compression'] = True def single_file(files): if len(files) >= 2: s = 'Error: Select a single file when using --inspect' print(paint(s, 'red', colorize=colorize)) sys.exit(1) return files[0] def run_ipython(fn, group=None, data=None): file_desc = paint(fn, 'yellow', colorize=colorize) if group is None: path_desc = file_desc else: path_desc = '{}:{}'.format( file_desc, paint(group, 'white', colorize=colorize)) welcome = "Loaded {} into '{}':".format( path_desc, paint('data', 'blue', colorize=colorize)) # Import deepdish for the session import deepdish as dd import IPython IPython.embed(header=welcome) i = 0 if args.inspect is not None: fn = single_file(args.file) try: data = io.load(fn, args.inspect) except ValueError: s = 'Error: Could not find group: {}'.format(args.inspect) print(paint(s, 'red', colorize=colorize)) sys.exit(1) if args.ipython: run_ipython(fn, group=args.inspect, data=data) else: print(data) elif args.ipython: fn = single_file(args.file) data = io.load(fn) run_ipython(fn, data=data) else: for f in args.file: # State that will be incremented settings['filtered_count'] = 0 s = get_tree(f, raw=args.raw, settings=settings) if s is not None: if i > 0: print() if len(args.file) >= 2: print(paint(f, 'yellow', colorize=colorize)) s.print(colorize=colorize, max_level=args.depth, settings=settings) i += 1 if settings.get('filter'): print('Filtered on: {} ({} rows omitted)'.format( paint(args.filter, 'purple', colorize=colorize), paint(str(settings['filtered_count']), 'white', colorize=colorize)))
def write_current_state(self): """ Write the current state of the sampler to disk. The required information to reconstruct the state of the run are written to an hdf5 file. All but the most recent removed live point in the chain are removed from the sampler to reduce memory usage. This means it is necessary to not append the first live point to the file if updating a previous checkpoint. Parameters ---------- sampler: `dynesty.NestedSampler` NestedSampler to write to disk. """ check_directory_exists_and_if_not_mkdir(self.outdir) resume_file = '{}/{}_resume.h5'.format(self.outdir, self.label) if os.path.isfile(resume_file): saved = load(resume_file) current_state = dict( unit_cube_samples=np.vstack( [saved['unit_cube_samples'], self.sampler.saved_u[1:]]), physical_samples=np.vstack( [saved['physical_samples'], self.sampler.saved_v[1:]]), sample_likelihoods=np.concatenate( [saved['sample_likelihoods'], self.sampler.saved_logl[1:]]), sample_log_volume=np.concatenate([ saved['sample_log_volume'], self.sampler.saved_logvol[1:] ]), sample_log_weights=np.concatenate([ saved['sample_log_weights'], self.sampler.saved_logwt[1:] ]), cumulative_log_evidence=np.concatenate([ saved['cumulative_log_evidence'], self.sampler.saved_logz[1:] ]), cumulative_log_evidence_error=np.concatenate([ saved['cumulative_log_evidence_error'], self.sampler.saved_logzvar[1:] ]), cumulative_information=np.concatenate([ saved['cumulative_information'], self.sampler.saved_h[1:] ]), id=np.concatenate([saved['id'], self.sampler.saved_id[1:]]), it=np.concatenate([saved['it'], self.sampler.saved_it[1:]]), nc=np.concatenate([saved['nc'], self.sampler.saved_nc[1:]]), boundidx=np.concatenate( [saved['boundidx'], self.sampler.saved_boundidx[1:]]), bounditer=np.concatenate( [saved['bounditer'], self.sampler.saved_bounditer[1:]]), scale=np.concatenate( [saved['scale'], self.sampler.saved_scale[1:]]), ) else: current_state = dict( unit_cube_samples=self.sampler.saved_u, physical_samples=self.sampler.saved_v, sample_likelihoods=self.sampler.saved_logl, sample_log_volume=self.sampler.saved_logvol, sample_log_weights=self.sampler.saved_logwt, cumulative_log_evidence=self.sampler.saved_logz, cumulative_log_evidence_error=self.sampler.saved_logzvar, cumulative_information=self.sampler.saved_h, id=self.sampler.saved_id, it=self.sampler.saved_it, nc=self.sampler.saved_nc, boundidx=self.sampler.saved_boundidx, bounditer=self.sampler.saved_bounditer, scale=self.sampler.saved_scale, ) current_state.update(ncall=self.sampler.ncall, live_logl=self.sampler.live_logl, iteration=self.sampler.it - 1, live_u=self.sampler.live_u, live_v=self.sampler.live_v, nlive=self.sampler.nlive, live_bound=self.sampler.live_bound, live_it=self.sampler.live_it, added_live=self.sampler.added_live) weights = np.exp(current_state['sample_log_weights'] - current_state['cumulative_log_evidence'][-1]) current_state[ 'posterior'] = self.external_sampler.utils.resample_equal( np.array(current_state['physical_samples']), weights) save(resume_file, current_state) self.sampler.saved_id = [self.sampler.saved_id[-1]] self.sampler.saved_u = [self.sampler.saved_u[-1]] self.sampler.saved_v = [self.sampler.saved_v[-1]] self.sampler.saved_logl = [self.sampler.saved_logl[-1]] self.sampler.saved_logvol = [self.sampler.saved_logvol[-1]] self.sampler.saved_logwt = [self.sampler.saved_logwt[-1]] self.sampler.saved_logz = [self.sampler.saved_logz[-1]] self.sampler.saved_logzvar = [self.sampler.saved_logzvar[-1]] self.sampler.saved_h = [self.sampler.saved_h[-1]] self.sampler.saved_nc = [self.sampler.saved_nc[-1]] self.sampler.saved_boundidx = [self.sampler.saved_boundidx[-1]] self.sampler.saved_it = [self.sampler.saved_it[-1]] self.sampler.saved_bounditer = [self.sampler.saved_bounditer[-1]] self.sampler.saved_scale = [self.sampler.saved_scale[-1]]
def main(): import argparse parser = argparse.ArgumentParser( description=("Look inside HDF5 files. Works particularly well " "for HDF5 files saved with deepdish.io.save()."), prog='ddls', epilog='example: ddls test.h5 -i /foo/bar --ipython') parser.add_argument('file', nargs='+', help='filename of HDF5 file') parser.add_argument('-d', '--depth', type=int, default=4, help='max depth, defaults to 4') parser.add_argument('-nc', '--no-color', action='store_true', help='turn off bash colors') parser.add_argument('-i', '--inspect', metavar='GRP', help='prints a specific variable (e.g. /data)') parser.add_argument('--ipython', action='store_true', help=('loads file into an IPython session.' 'Works with -i')) parser.add_argument('--raw', action='store_true', help=('prints the raw HDF5 structure for complex ' 'data types, such as sparse matrices and pandas ' 'data frames')) parser.add_argument('-f', '--filter', type=str, help=('Print only entries that match this regular expression')) parser.add_argument('-l', '--leaves-only', action='store_true', help=('Only print leaves')) parser.add_argument('-s', '--summarize', action='store_true', help=('Print summary statistics of numpy arrays')) parser.add_argument('-c', '--compression', action='store_true', help=('Print compression method for each array')) parser.add_argument('-v', '--version', action='version', version='deepdish {} (io protocol {})'.format(__version__, IO_VERSION)) args = parser.parse_args() colorize = sys.stdout.isatty() and not args.no_color settings = {} if args.filter: settings['filter'] = args.filter if args.leaves_only: settings['leaves-only'] = True if args.summarize: settings['summarize'] = True if args.compression: settings['compression'] = True def single_file(files): if len(files) >= 2: s = 'Error: Select a single file when using --inspect' print(paint(s, 'red', colorize=colorize)) sys.exit(1) return files[0] def run_ipython(fn, group=None, data=None): file_desc = paint(fn, 'yellow', colorize=colorize) if group is None: path_desc = file_desc else: path_desc = '{}:{}'.format( file_desc, paint(group, 'white', colorize=colorize)) welcome = "Loaded {} into '{}':".format( path_desc, paint('data', 'blue', colorize=colorize)) # Import deepdish for the session import deepdish as dd import IPython IPython.embed(header=welcome) i = 0 if args.inspect is not None: fn = single_file(args.file) try: data = io.load(fn, args.inspect) except ValueError: s = 'Error: Could not find group: {}'.format(args.inspect) print(paint(s, 'red', colorize=colorize)) sys.exit(1) if args.ipython: run_ipython(fn, group=args.inspect, data=data) else: print(data) elif args.ipython: fn = single_file(args.file) data = io.load(fn) run_ipython(fn, data=data) else: for f in args.file: # State that will be incremented settings['filtered_count'] = 0 s = get_tree(f, raw=args.raw, settings=settings) if s is not None: if i > 0: print() if len(args.file) >= 2: print(paint(f, 'yellow', colorize=colorize)) s.print(colorize=colorize, max_level=args.depth, settings=settings) i += 1 if settings.get('filter'): print('Filtered on: {} ({} rows omitted)'.format( paint(args.filter, 'purple', colorize=colorize), paint(str(settings['filtered_count']), 'white', colorize=colorize)))
def extract(filepath, keys): # with open(filepath, 'rb') as buf: # d = io.load(buf) d = io.load(filepath) new_d = {k: v for k, v in d.iteritems() if k in keys} return new_d
def get_n_vars(train_paths, iptagger): # with open(train_paths[0], 'rb') as buf: # d = io.load(buf) d = io.load(train_paths[0]) return np.concatenate([d['X'], d[iptagger + '_vars']], axis=1).shape[1]
def parse_wmap(self, key='warping'): """ Retrieve the parameters to construct the distortion correction function """ self.warping = dio.load(self.kfile)[key]
def parse_Emap(self, key='coeffs'): """ Retrieve the parameters to construct the energy conversion function. """ self.poly_a = dio.load(self.Efile)['calibration'][key]
def loadParmSet(fname): from deepdish.io import load return load(fname)
def main(MODEL_FILE): test_dict = io.load('./data/test_dict_IPConv.h5') train_dict = io.load('./data/train_dict_IPConv.h5') X_train = train_dict['X'] y_train = train_dict['y'] n_features = X_train.shape[2] X_test = test_dict['X'] y_test = test_dict['y'] ip3d = test_dict['ip3d'] # this is a df print 'Building model...' if (MODEL_FILE == 'CRNN'): graph = build_graph(n_features) model = Sequential() model.add(graph) model.add(Dense(64)) elif (MODEL_FILE == 'RNN'): graph = build_graph_noCNN(n_features) model = Sequential() model.add(graph) model.add(Dense(64)) model.add(Dropout(0.4)) model.add(Highway(activation = 'relu')) model.add(Dropout(0.4)) #3 model.add(Dense(4)) model.add(Activation('softmax')) print 'Compiling model...' model.compile('adam', 'categorical_crossentropy') model.summary() print 'Training:' try: model.fit(X_train, y_train, batch_size=512, callbacks = [ EarlyStopping(verbose=True, patience=20, monitor='val_loss'), ModelCheckpoint(MODEL_FILE + '-progress', monitor='val_loss', verbose=True, save_best_only=True) ], nb_epoch=200, validation_split = 0.2, show_accuracy=True) except KeyboardInterrupt: print 'Training ended early.' # -- load in best network model.load_weights(MODEL_FILE + '-progress') print 'Saving weights...' model.save_weights('./weights/ip3d-replacement_' + MODEL_FILE + '.h5', overwrite=True) print 'Testing...' yhat = model.predict(X_test, verbose = True, batch_size = 512) print 'Plotting ROC...' fg = plot_ROC(y_test, yhat, ip3d, MODEL_FILE) #plt.show() fg.savefig('./plots/roc_' + MODEL_FILE + '.pdf')
def main(): import argparse parser = argparse.ArgumentParser( description=("Look inside HDF5 files. Works particularly well " "for HDF5 files saved with deepdish.io.save()."), prog='ddls', epilog='example: ddls test.h5 -i /foo/bar --ipython') parser.add_argument('file', nargs='+', help='filename of HDF5 file') parser.add_argument('-d', '--depth', type=int, default=4, help='max depth, defaults to 4') parser.add_argument('-nc', '--no-color', action='store_true', help='turn off bash colors') parser.add_argument('-i', '--inspect', metavar='GRP', help='prints a specific variable (e.g. /data)') parser.add_argument('--ipython', action='store_true', help=('loads file into an IPython session.' 'Works with -i')) parser.add_argument('--raw', action='store_true', help=('prints the raw HDF5 structure for complex ' 'data types, such as sparse matrices and pandas ' 'data frames')) parser.add_argument('-v', '--version', action='version', version='deepdish {} (io protocol {})'.format(__version__, IO_VERSION)) args = parser.parse_args() colorize = sys.stdout.isatty() and not args.no_color def single_file(files): if len(files) >= 2: s = 'Error: Select a single file when using --inspect' print(paint(s, 'red', colorize=colorize)) sys.exit(1) return files[0] def run_ipython(fn, group=None, data=None): file_desc = paint(fn, 'yellow', colorize=colorize) if group is None: path_desc = file_desc else: path_desc = '{}:{}'.format( file_desc, paint(group, 'white', colorize=colorize)) welcome = "Loaded {} into '{}':".format( path_desc, paint('data', 'blue', colorize=colorize)) # Import deepdish for the session import deepdish as dd import IPython IPython.embed(header=welcome) i = 0 if args.inspect is not None: fn = single_file(args.file) try: data = io.load(fn, args.inspect) except ValueError: s = 'Error: Could not find group: {}'.format(args.inspect) print(paint(s, 'red', colorize=colorize)) sys.exit(1) if args.ipython: run_ipython(fn, group=args.inspect, data=data) else: print(data) elif args.ipython: fn = single_file(args.file) data = io.load(fn) run_ipython(fn, data=data) else: for f in args.file: s = get_tree(f, raw=args.raw) if s is not None: if i > 0: print() if len(args.file) >= 2: print(paint(f, 'yellow', colorize=colorize)) s.print(colorize=colorize, max_level=args.depth) i += 1
import numpy as np import matplotlib.pyplot as plt import pandas as pd from tqdm import tqdm import deepdish.io as ddio from utils import * from config import get_config config = get_config() classes = ['A', 'E', 'j', 'L', 'N', 'P', 'R', 'V'] input_shape = (config.input_size, 1) input_train = ddio.load('dataset/traindata_tri.hdf5') target_train = ddio.load('dataset/trainlabel_tri.hdf5') input_test = ddio.load('dataset/testdata_tri.hdf5') target_test = ddio.load('dataset/testlabel_tri.hdf5') # Data & model configuration batch_size = config.batch no_epochs = config.ae_epochs validation_split = 0.25 verbosity = 1 latent_dim = 2 num_channels = 1 # # ================= # # Encoder # # =================
def main(embed_size, normed, input_id, run_name): configure_logging() logger = logging.getLogger("RNNIP Training") logger.info("Loading hdf5's") test_dict = io.load(os.path.join('data', 'test_dict_' + input_id + '.h5')) train_dict = io.load(os.path.join('data', 'train_dict_' + input_id + '.h5')) X_train_stream0 = train_dict['grade'] X_train_stream1 = train_dict['X'] y_train = train_dict['y'] X_test_stream0 = test_dict['grade'] X_test_stream1 = test_dict['X'] y_test = test_dict['y'] ip3d = test_dict['ip3d'] logger.info('Building model') model = build_model(X_train_stream0, X_train_stream1, embed_size, normed) model.summary() logger.info('Compiling model') model.compile('adam', 'categorical_crossentropy', metrics=['accuracy']) #-- if the pre-trained model exists, load it in, otherwise start from scratch safe_mkdir('weights') weights_file = os.path.join('weights', 'rnnip_' + run_name +'.h5') try: model.load_weights(weights_file) logger.info('Loaded pre-trained model from ' + weights_file) except IOError: logger.info('No pre-trained model found in ' + weights_file) logger.info('Training:') try: model.fit([X_train_stream0, X_train_stream1], y_train, batch_size=512, callbacks = [ EarlyStopping(verbose=True, patience=20, monitor='val_loss'), ModelCheckpoint( weights_file, monitor='val_loss', verbose=True, save_best_only=True ) ], epochs=300, validation_split = 0.2) except KeyboardInterrupt: logger.info('Training ended early.') # -- load in best network logger.info('Loading best epoch') model.load_weights(weights_file) json_string = model.to_json() safe_mkdir('json_models') open(os.path.join('json_models', run_name +'.json'), 'w').write(json_string) logger.info('Testing') safe_mkdir('predictions') yhat = model.predict([X_test_stream0, X_test_stream1], verbose=True, batch_size=10000) io.save(os.path.join('predictions', 'yhat'+ run_name +'.h5'), yhat) logger.info('Plotting ROC') plot_ROC(y_test, yhat, ip3d, run_name)
def extract(filepath, keys): # with open(filepath, 'rb') as buf: # d = io.load(buf) d = io.load(filepath) new_d = {k:v for k,v in d.iteritems() if k in keys} return new_d
def main(MODEL_FILE): test_dict = io.load('./data/test_dict_IPConv.h5') train_dict = io.load('./data/train_dict_IPConv.h5') X_train = train_dict['X'] y_train = train_dict['y'] n_features = X_train.shape[2] X_test = test_dict['X'] y_test = test_dict['y'] # this is a df ip3d = test_dict['ip3d'] print 'Building model...' if (MODEL_FILE == 'CRNN'): graph = build_graph(n_features) model = Sequential() model.add(graph) # removing because of tensorflow #model.add(MaxoutDense(64, 5, input_shape=graph.nodes['dropout'].output_shape[1:])) model.add(Dense(64)) elif (MODEL_FILE == 'RNN'): model = Sequential() model.add(GRU(25, input_shape=(N_TRACKS, n_features))) #GRU model.add(Dropout(0.2)) # removing because of tensorflow #model.add(MaxoutDense(64, 5)) #, input_shape=graph.nodes['dropout'].output_shape[1:])) model.add(Dense(64)) model.add(Dropout(0.4)) model.add(Highway(activation = 'relu')) model.add(Dropout(0.3)) model.add(Dense(4)) model.add(Activation('softmax')) print 'Compiling model...' model.compile('adam', 'categorical_crossentropy') model.summary() print 'Training:' try: model.fit(X_train, y_train, batch_size=512, callbacks = [ EarlyStopping(verbose=True, patience=20, monitor='val_loss'), ModelCheckpoint(MODEL_FILE + '-progress', monitor='val_loss', verbose=True, save_best_only=True) ], nb_epoch=2, validation_split = 0.2, show_accuracy=True) except KeyboardInterrupt: print 'Training ended early.' # -- load in best network model.load_weights(MODEL_FILE + '-progress') print 'Saving protobuf' # write out to a new directory called models # the actual graph file is graph.pb # the graph def is in the global session import tensorflow as tf import keras.backend.tensorflow_backend as tfbe sess = tfbe._SESSION saver = tf.train.Saver() tf.train.write_graph(sess.graph_def, 'models/', 'graph.pb', as_text=False) save_path = saver.save(sess, "./model-weights.ckpt") print "Model saved in file: %s" % save_path print saver.as_saver_def().filename_tensor_name print saver.as_saver_def().restore_op_name print model.get_output() print 'Saving weights...' model.save_weights('./weights/ip3d-replacement_' + MODEL_FILE + '.h5', overwrite=True) json_string = model.to_json() open(MODEL_FILE + '.json', 'w').write(json_string) print 'Testing...' yhat = model.predict(X_test, verbose = True, batch_size = 512) print 'Plotting ROC...' fg = plot_ROC(y_test, yhat, ip3d, MODEL_FILE) #plt.show() fg.savefig('./plots/roc_' + MODEL_FILE + '.pdf')