def main(): """ Execution logic """ x_0, y_0 = 0.5, 2.2 segment_end = 5.5 step = 0.1 x_nodes_list, y_nodes_list = [], [] for accuracy in range(2, 5): x_nodes, y_nodes = runge_kutta_interpolation(given_function, x_0, y_0, segment_end, step, accuracy) x_nodes_list.append(x_nodes) y_nodes_list.append(y_nodes) display_plot_async(x_nodes_list, y_nodes_list) print('Runge-Kutta interpolation results') print(tp.header(['X', 'Y', 'Y2', 'Y3', 'Y4'])) for x, y, y2, y3, y4 in zip( x_nodes_list[0], [explicit_solution(x) for x in x_nodes_list[0]], y_nodes_list[0], y_nodes_list[1], y_nodes_list[2]): print(tp.row([ x, y, y2, y3, y4, ]))
def minimize(self, f_df, x0, display=sys.stdout, maxiter=1e3): self.display = display self.theta = x0 # setup xk = self.algorithm.send(destruct(x0).copy()) store = defaultdict(list) runtimes = [] if len(self.operators) == 0: self.operators = [proxops.identity()] # setup obj, grad = wrap(f_df, x0) transform = compose(destruct, *reversed(self.operators), self.restruct) self.optional_print(tp.header(['Iteration', 'Objective', '||Grad||', 'Runtime'])) try: for k in count(): # setup tstart = perf_counter() f = obj(xk) df = grad(xk) xk = transform(self.algorithm.send(df)) runtimes.append(perf_counter() - tstart) store['f'].append(f) # Update display self.optional_print(tp.row([k, f, np.linalg.norm(destruct(df)), tp.humantime(runtimes[-1])])) if k >= maxiter: break except KeyboardInterrupt: pass self.optional_print(tp.bottom(4)) # cleanup self.optional_print(u'\u279b Final objective: {}'.format(store['f'][-1])) self.optional_print(u'\u279b Total runtime: {}'.format(tp.humantime(sum(runtimes)))) self.optional_print(u'\u279b Per iteration runtime: {} +/- {}'.format( tp.humantime(np.mean(runtimes)), tp.humantime(np.std(runtimes)), )) # result return OptimizeResult({ 'x': self.restruct(xk), 'f': f, 'df': self.restruct(df), 'k': k, 'obj': np.array(store['f']), })
def update_display(self, iteration, disp_level, col_width=12): """ Prints information about the optimization procedure to standard output Parameters ---------- iteration : int The current iteration. Must either a positive integer or -1, which indicates the end of the algorithm disp_level : int An integer which controls how much information to display, ranging from 0 (nothing) to 3 (lots of stuff) col_width : int The width of each column in the data table, used if disp_level > 1 """ # exit and print nothing if disp_level is zero if disp_level == 0: return else: # simple update, no table if disp_level == 1 and iteration >= 0: print('[Iteration %i]' % iteration) # fancy table updates if disp_level > 1: # get the metadata from this iteration data = self.metadata.tail(1).irow(0).to_dict() # choose what keys to use keys = ['Time (s)', 'Primal resid', 'Dual resid'] if disp_level > 2: keys += ['Momentum (rho)', 'Primal runtimes'] # initial update. print out table headers if iteration == 1: hr = tableprint.hr(len(keys), column_width=col_width) print('\n' + hr) print(tableprint.header(keys, column_width=col_width)) print(hr) # print data tabledata = map(lambda d: float(d) if d.size == 1 else ', '.join(map(lambda i: '{:.2f}'.format(i), d)), [np.array(data[key]) for key in keys]) print(tableprint.row(tabledata, column_width=col_width, precision='4g')) if iteration == -1: print(tableprint.hr(len(keys), column_width=col_width) + '\n') # print convergence statement if iteration == -1 and self.converged: print('Converged after %i iterations!' % len(self.metadata))
def sample(data_path: str, encoder_path: str, vocab_path: str, sample_length: int = 30, output: str = None): dump = torch.load(encoder_path, map_location=lambda storage, loc: storage) encodermodel = dump['encodermodel'] decodermodel = dump['decodermodel'] # Some scaler (sklearn standardscaler) scaler = dump['scaler'] # Also load previous training config config_parameters = dump['config'] vocab = torch.load(vocab_path) print(encodermodel) print(decodermodel) # load images from previous encodermodel = encodermodel.to(DEVICE).eval() decodermodel = decodermodel.to(DEVICE).eval() kaldi_string = parsecopyfeats(data_path, **config_parameters['feature_args']) width_length = sample_length * 4 with stdout_or_file(output) as writer: writer.write( tp.header(["InputUtterance", "Output Sentence"], style='grid', width=width_length)) writer.write('\n') for k, features in kaldi_io.read_mat_ark(kaldi_string): features = scaler.transform(features) # Add single batch dimension features = torch.from_numpy(features).to(DEVICE).unsqueeze(0) # Generate an caption embedding encoded_feature, hiddens = encodermodel(features) sampled_ids = decodermodel.sample(encoded_feature, states=hiddens, maxlength=sample_length) # (1, max_seq_length) -> (max_seq_length) sampled_ids = sampled_ids[0].cpu().numpy() # Convert word_ids to words sampled_caption = [] for word_id in sampled_ids: word = vocab.idx2word[word_id] sampled_caption.append(word) if word == '<end>': break sentence = ''.join(sampled_caption) # Print out the image and the generated caption writer.write( tp.row([k, sentence], style='grid', width=width_length)) writer.write('\n') writer.flush() writer.write(tp.bottom(2, style='grid', width=width_length))
def update_display(self, iteration, disp_level, col_width=12): # pragma: no cover """ Prints information about the optimization procedure to standard output Parameters ---------- iteration : int The current iteration. Must either a positive integer or -1, which indicates the end of the algorithm disp_level : int An integer which controls how much information to display, ranging from 0 (nothing) to 3 (lots of stuff) col_width : int The width of each column in the data table, used if disp_level > 1 """ # exit and print nothing if disp_level is zero if disp_level == 0: return else: # simple update, no table if disp_level == 1 and iteration >= 0: print('[Iteration %i]' % iteration) # fancy table updates if disp_level > 1: # get the metadata from this iteration data = valmap(last, self.metadata) # choose what keys to use keys = ['Time (s)', 'Primal resid', 'Dual resid', 'rho'] # initial update. print out table headers if iteration == 1: print(tableprint.header(keys, width=col_width)) # print data print( tableprint.row([data[k] for k in keys], width=col_width, format_spec='4g')) if iteration == -1: print(tableprint.bottom(len(keys), width=col_width) + '\n') # print convergence statement if iteration == -1 and self.converged: print('Converged after %i iterations!' % len(self.metadata['Primal resid']))
def buildTable(a, b, c, d): n = len(a) print(tp.header(['i', 'ai', 'bi', 'ci', 'di'], 10)) num = 1 for i in range(1, n - 1): print(tp.row([num, "{:.3f}".format(a[i]), "{:.3f}".format(b[i]), \ "{:.3f}".format(c[i]), "{:.3f}".format(d[i-1])], 10)) num += 1 print(tp.row([num, "{:.3f}".format(a[n-1]), "{:.3f}".format(b[0]), \ "{:.3f}".format(c[n-1]), "{:.3f}".format(d[n-1])], 10)) print(tp.bottom(5, 10))
def _display_result(sample_mean, sample_variance, standard_deviation): print('\n') print(tp.header([ 'sample mean', 'sample variance', 'standard deviation', ])) print(tp.row([ sample_mean, sample_variance, standard_deviation, ]))
def _printfields(fields, print_header=True): names, vals = [], [] for name, val, _ in fields: names.append(name) if val is None: # Display None as empty vals.append('') else: vals.append(val) # Print header if print_header: print(tableprint.header(names, width=11)) # Print value row print(tableprint.row(vals, width=11))
def geocode_crime_data(data_entries): """ Geocodes a list of crime data entries """ print(tableprint.header(['City Name', 'Lat', 'Long'], width=30)) for entry in data_entries: coords = geocode_city_name(entry['city_name']) entry['coords'] = coords # Add a delay for API limiting time.sleep(0.1) print(tableprint.bottom(3, width=30)) return data_entries
def run_explicit_euler_method(): """ Run implicit euler interpolation calculating """ step = 0.05 x_0, y_0 = 0, 1 segment_end = 1 x_nodes, y_nodes = explicit_euler_interpolation(given_function, x_0, y_0, segment_start=x_0, segment_end=segment_end, step=step) display_plot_async(x_nodes, y_nodes, "Explicit Eulerian interpolation") print('\nExplicit Euler method results') print(tp.header([ 'X', 'Y', 'Interpolated', ])) for x, y in zip(x_nodes, y_nodes): print(tp.row([x, onhand_solution(x), y]))
def _display_result(observable_chi, critical_chi): print('\nhypothesis of a normal distribution') print(tp.header(['Observable chi', 'Critical chi', ])) print(tp.row([observable_chi, critical_chi, ]))
def main(): parser = argparse.ArgumentParser() parser.add_argument( "serial_number", type=int, help= "the 10-digit serial number found under the magnetic backplate of your Wave Plus", ) parser.add_argument( "--sample-period", type=int, default=300, help= "the number of seconds between reading the current values. Default: %(default)s", ) parser.add_argument("--pipe", action="store_true", help="pipe the results to a file") parser.add_argument( "--statusbar", action="store_true", help="print air quality status suitable for statusbar", ) parser.add_argument( "--mac-addr", help="the MAC address of the Wave Plus device", ) args = parser.parse_args() if len(str(args.serial_number)) != 10: print("ERROR: Invalid SN format.") parser.print_usage() sys.exit(1) if args.sample_period <= 0: print("ERROR: Invalid SAMPLE-PERIOD. Must be larger than zero.") parser.print_usage() sys.exit(1) try: waveplus = WavePlus(args.serial_number, args.mac_addr) header = [ "Humidity", "Radon ST avg", "Radon LT avg", "Temperature", "Pressure", "CO2 level", "VOC level", ] if args.pipe: print(*header, sep=",") elif not args.statusbar: print(tableprint.header(header, width=TABLEPRINT_WIDTH)) while True: waveplus.connect() sensors = waveplus.read() data = {var: sensors.get_variable(var) for var in VARIABLES} if args.statusbar: statusbar_print(data) sys.exit(0) if args.pipe: print(*data.values(), sep=",") else: print( tableprint.row(list(map(str, data.values())), width=TABLEPRINT_WIDTH)) waveplus.disconnect() time.sleep(args.sample_period) finally: waveplus.disconnect()
def sample(self, experiment_path: str, feature_file: str, feature_scp: str, output: str = "output_word.txt", **kwargs): """Generate captions given experiment model""" """kwargs: {'max_length': int, 'method': str, 'beam_size': int}""" import tableprint as tp dump = torch.load(os.path.join(experiment_path, "saved.pth"), map_location="cpu") # Load previous training config config = dump["config"] vocab_size = len(torch.load(config["vocab_file"])) model = self._get_model(config, vocab_size) model.load_state_dict(dump["model"]) # Some scaler (sklearn standardscaler) scaler = dump["scaler"] vocabulary = torch.load(config["vocab_file"]) zh = config["zh"] model = model.to(self.device) dataset = SJTUDatasetEval(feature=feature_file, eval_scp=feature_scp, transform=scaler.transform) dataloader = torch.utils.data.DataLoader(dataset, shuffle=False, collate_fn=collate_fn((1, )), batch_size=16, num_workers=0) width_length = 80 pbar = ProgressBar(persist=False, ascii=True) writer = open(os.path.join(experiment_path, output), "w") writer.write( tp.header(["InputUtterance", "Output Sentence"], width=[len("InputUtterance"), width_length])) writer.write('\n') sentences = [] def _sample(engine, batch): # batch: [keys, feats, feat_lens] with torch.no_grad(): model.eval() keys = batch[0] output = self._forward(model, batch, mode="sample", **kwargs) seqs = output["seqs"].cpu().numpy() for idx, seq in enumerate(seqs): caption = self._convert_idx2sentence(seq, vocabulary, zh=zh) if zh: sentence = " ".join(caption) else: sentence = caption writer.write( tp.row([keys[idx], sentence], width=[len("InputUtterance"), width_length]) + "\n") sentences.append(sentence) sample_engine = Engine(_sample) pbar.attach(sample_engine) sample_engine.run(dataloader) writer.write( tp.bottom(2, width=[len("InputUtterance"), width_length]) + "\n") writer.write("Unique sentence number: {}\n".format(len( set(sentences)))) writer.close()
for serial in device_serials: waveplus_devices.append(WavePlus(int(serial))) if (Mode == 'terminal'): print("\nPress ctrl+C to exit program\n") # print "Device serial number: %s" %(SerialNumber) header = [ 'Timestamp', 'Sensor Serial Number', 'Humidity - %rH', 'Radon ST avg - Bq/m3', 'Radon LT avg - Bq/m3', 'T - degC', 'P - hPa', 'CO2 - ppm', 'VOC - ppb' ] if (Mode == 'terminal'): print(tableprint.header(header, width=12)) elif (Mode == 'pipe'): print(header) elif (Mode == 'file'): file = open(outfile, 'a+') file.write(','.join(header) + "\n") file.close() while True: for waveplus in waveplus_devices: tries = 0 while tries <= MaxRetries: try: tries = tries + 1 connected = waveplus.connect() if not connected:
def on_training_started(engine, outputfun=sys.stdout.write, header=[]): outputfun("<== Training Started ==>") for line in tp.header(header, style="grid").split("\n"): outputfun(line)
def main(config='config/train.yaml', **kwargs): """Trains a model on the given features and vocab. :config: A training configuration. Note that all parameters in the config can also be manually adjusted with --ARG VALUE :returns: None """ config_parameters = parse_config_or_kwargs(config, **kwargs) outputdir = os.path.join( config_parameters['outputpath'], config_parameters['model'], datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S-%f')) try: os.makedirs(outputdir) except IOError: pass logger = genlogger(outputdir, 'train.log') logger.info("Storing data at: {}".format(outputdir)) logger.info("<== Passed Arguments ==>") # Print arguments into logs for line in pformat(config_parameters).split('\n'): logger.info(line) # seed setting seed=config_parameters['seed'] # 1~5 np.random.seed(seed) torch.manual_seed(seed) kaldi_string = parsecopyfeats( config_parameters['features'], **config_parameters['feature_args']) scaler = getattr(pre, config_parameters['scaler'])(**config_parameters['scaler_args']) logger.info("<== Estimating Scaler ({}) ==>".format(scaler.__class__.__name__)) inputdim = -1 for kid, feat in kaldi_io.read_mat_ark(kaldi_string): scaler.partial_fit(feat) inputdim = feat.shape[-1] assert inputdim > 0, "Reading inputstream failed" logger.info( "Features: {} Input dimension: {}".format( config_parameters['features'], inputdim)) logger.info("<== Labels ==>") # Can be label, DAT, DADA ... default is 'label' target_label_name = config_parameters.get('label_type', 'label') if target_label_name == 'label': label_df = pd.read_csv(config_parameters['labels'], sep=' ', names=['speaker', 'filename', 'physical', 'system', 'label']) else: # 'DAT' or 'DADA' label_df = pd.read_csv(config_parameters['labels'], sep=' ', names=['speaker', 'filename', 'physical', 'system', 'label', 'domain']) label_encoder = pre.LabelEncoder() if target_label_name == 'label': label_encoder.fit(label_df[target_label_name].values.astype(str)) # Labelencoder needs an iterable to work, so just put a list around it and fetch again the 0-th element ( just the encoded string ) label_df['class_encoded'] = label_df[target_label_name].apply(lambda x: label_encoder.transform([x])[0]) train_labels = label_df[['filename', 'class_encoded']].set_index('filename').loc[:, 'class_encoded'].to_dict() else: # 'DAT' or 'DADA' label_encoder_sub = pre.LabelEncoder() label_encoder.fit(label_df['label'].values.astype(str)) label_df['lab_encoded'] = label_df['label'].apply(lambda x: label_encoder.transform([x])[0]) label_encoder_sub.fit(label_df['domain'].values.astype(str)) label_df['domain_encoded'] = label_df['domain'].apply(lambda x: label_encoder_sub.transform([x])[0]) train_labels = label_df[['filename', 'lab_encoded', 'domain_encoded']].set_index('filename').to_dict('index') train_labels = {k:np.array(list(v.values())) for k, v in train_labels.items()} # outdomain outdomain = config_parameters['outdomain'] outdomain_label = label_encoder_sub.transform([outdomain])[0] logger.info("Outdomain: {}, Outdomain label: {}".format(outdomain, outdomain_label)) if target_label_name == 'label': train_dataloader, cv_dataloader = create_dataloader_train_cv(kaldi_string, train_labels, transform=scaler.transform, target_label_name=target_label_name, **config_parameters['dataloader_args']) else: #'DAT' or 'DADA' outdomain_train_dataloader, indomain_train_dataloader, cv_dataloader = create_dataloader_train_cv(kaldi_string, train_labels, transform=scaler.transform, target_label_name=target_label_name, outdomain_label=outdomain_label, **config_parameters['dataloader_args']) if target_label_name == 'label': model = getattr(models, config_parameters['model'])(inputdim=inputdim, outputdim=len(label_encoder.classes_), **config_parameters['model_args']) else: # 'DAT' or 'DADA' model = getattr(models, config_parameters['model'])(inputdim=inputdim, outputdim1=len(label_encoder.classes_), outputdim2=len(label_encoder_sub.classes_), **config_parameters['model_args']) logger.info("<== Model ==>") for line in pformat(model).split('\n'): logger.info(line) optimizer = getattr(torch.optim, config_parameters['optimizer'])(model.parameters(), **config_parameters['optimizer_args']) scheduler = getattr(torch.optim.lr_scheduler, config_parameters['scheduler'])(optimizer, **config_parameters['scheduler_args']) criterion = getattr(loss, config_parameters['loss'])(**config_parameters['loss_args']) trainedmodelpath = os.path.join(outputdir, 'model.th') model = model.to(device) criterion_improved = criterion_improver(config_parameters['improvecriterion']) header = [ 'Epoch', 'Lr', 'Loss(T)', 'Loss(CV)', "Acc(T)", "Acc(CV)", ] for line in tp.header(header, style='grid').split('\n'): logger.info(line) for epoch in range(1, config_parameters['epochs']+1): if target_label_name == 'label': train_loss, train_acc = runepoch(train_dataloader, None, model, criterion, target_label_name, optimizer, dotrain=True, epoch=epoch) else: # 'DAT' or 'DADA' train_loss, train_acc = runepoch(outdomain_train_dataloader, indomain_train_dataloader, model, criterion, target_label_name, optimizer, dotrain=True, epoch=epoch) cv_loss, cv_acc = runepoch(cv_dataloader, None, model, criterion, target_label_name, dotrain=False, epoch=epoch) logger.info( tp.row( (epoch,) + (optimizer.param_groups[0]['lr'],) + (str(train_loss), str(cv_loss), str(train_acc), str(cv_acc)), style='grid')) epoch_meanloss = cv_loss[0] if type(cv_loss)==tuple else cv_loss if epoch % config_parameters['saveinterval'] == 0: torch.save({'model': model, 'scaler': scaler, 'encoder': label_encoder, 'config': config_parameters}, os.path.join(outputdir, 'model_{}.th'.format(epoch))) # ReduceOnPlateau needs a value to work schedarg = epoch_meanloss if scheduler.__class__.__name__ == 'ReduceLROnPlateau' else None scheduler.step(schedarg) if criterion_improved(epoch_meanloss): torch.save({'model': model, 'scaler': scaler, 'encoder': label_encoder, 'config': config_parameters}, trainedmodelpath) if optimizer.param_groups[0]['lr'] < 1e-7: break logger.info(tp.bottom(len(header), style='grid')) logger.info("Results are in: {}".format(outputdir))
def on_epoch_begin(self, epoch, logs={}): tp.banner(f"Epoch {epoch}") print(tp.header(['iter', 'loss']))
def train(model, experiment, monitor, num_epochs, augment=False): """Train the given network against the given data Parameters ---------- model : keras.models.Model or glms.GLM A GLM or Keras Model object experiment : experiments.Experiment An Experiment object monitor : io.Monitor Saves the model parameters and plots of performance progress num_epochs : int Number of epochs to train for reduce_lr_every : int How often to reduce the learning rate reduce_rate : float A fraction (constant) to multiply the learning rate by """ assert isinstance(model, (Model, GLM)), "'model' must be a GLM or Keras model" # initialize training iteration iteration = 0 train_start = time() # loop over epochs try: for epoch in range(num_epochs): tp.banner('Epoch #{} of {}'.format(epoch + 1, num_epochs)) print(tp.header(["Iteration", "Loss", "Runtime"]), flush=True) # loop over data batches for this epoch for X, y in experiment.train(shuffle=True): # update on save_every, assuming it is positive if (monitor is not None) and (iteration % monitor.save_every == 0): # performs validation, updates performance plots, saves results to dropbox monitor.save(epoch, iteration, X, y, model.predict) # train on the batch tstart = time() loss = model.train_on_batch({'stim':X, 'loss':y})[0] elapsed_time = time() - tstart # update iteration += 1 print(tp.row([iteration, float(loss), tp.humantime(elapsed_time)]), flush=True) print(tp.bottom(3)) except KeyboardInterrupt: print('\nCleaning up') # allows the monitor to perform any post-training visualization if monitor is not None: elapsed_time = time() - train_start monitor.cleanup(iteration, elapsed_time) tp.banner('Training complete!')
def check_grad(f_df, xref, stepsize=1e-6, tol=1e-6, width=15, style='round', out=sys.stdout): """ Compares the numerical gradient to the analytic gradient Parameters ---------- f_df : function The analytic objective and gradient function to check x0 : array_like Parameter values to check the gradient at stepsize : float, optional Stepsize for the numerical gradient. Too big and this will poorly estimate the gradient. Too small and you will run into precision issues (default: 1e-6) tol : float, optional Tolerance to use when coloring correct/incorrect gradients (default: 1e-5) width : int, optional Width of the table columns (default: 15) style : string, optional Style of the printed table, see tableprint for a list of styles (default: 'round') """ CORRECT = u'\x1b[32m\N{CHECK MARK}\x1b[0m' INCORRECT = u'\x1b[31m\N{BALLOT X}\x1b[0m' obj, grad = wrap(f_df, xref, size=0) x0 = destruct(xref) df = grad(x0) # header out.write(tp.header(["Numerical", "Analytic", "Error"], width=width, style=style) + "\n") out.flush() # helper function to parse a number def parse_error(number): # colors failure = "\033[91m" passing = "\033[92m" warning = "\033[93m" end = "\033[0m" base = "{}{:0.3e}{}" # correct if error < 0.1 * tol: return base.format(passing, error, end) # warning elif error < tol: return base.format(warning, error, end) # failure else: return base.format(failure, error, end) # check each dimension num_errors = 0 for j in range(x0.size): # take a small step in one dimension dx = np.zeros(x0.size) dx[j] = stepsize # compute the centered difference formula df_approx = (obj(x0 + dx) - obj(x0 - dx)) / (2 * stepsize) df_analytic = df[j] # absolute error abs_error = np.linalg.norm(df_approx - df_analytic) # relative error error = abs_error if np.allclose(abs_error, 0) else abs_error / \ (np.linalg.norm(df_analytic) + np.linalg.norm(df_approx)) num_errors += error >= tol errstr = CORRECT if error < tol else INCORRECT out.write(tp.row([df_approx, df_analytic, parse_error(error) + ' ' + errstr], width=width, style=style) + "\n") out.flush() out.write(tp.bottom(3, width=width, style=style) + "\n") return num_errors
def main(features: str, vocab_file: str, config='config/trainconfig.yaml', **kwargs): """Trains a model on the given features and vocab. :features: str: Input features. Needs to be kaldi formatted file :vocab_file:str: Vocabulary generated by using build_vocab.py :config: A training configuration. Note that all parameters in the config can also be manually adjusted with --ARG=VALUE :returns: None """ config_parameters = parse_config_or_kwargs(config, **kwargs) outputdir = os.path.join( config_parameters['encodermodel'] + '_' + config_parameters['decodermodel'], datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')) try: os.makedirs(outputdir) except IOError: pass logger = genlogger(outputdir, 'train.log') logger.info("Storing data at: {}".format(outputdir)) logger.info("<== Passed Arguments ==>") # Print arguments into logs for line in pformat(config_parameters).split('\n'): logger.info(line) kaldi_string = parsecopyfeats( features, **config_parameters['feature_args']) scaler = getattr( pre, config_parameters['scaler'])( **config_parameters['scaler_args']) inputdim = -1 logger.info( "<== Estimating Scaler ({}) ==>".format( scaler.__class__.__name__)) for kid, feat in kaldi_io.read_mat_ark(kaldi_string): scaler.partial_fit(feat) inputdim = feat.shape[-1] assert inputdim > 0, "Reading inputstream failed" vocabulary = torch.load(vocab_file) vocab_size = len(vocabulary) logger.info( "Features: {} Input dimension: {} Vocab Size: {}".format( features, inputdim, vocab_size)) if 'load_pretrained' in config_parameters and config_parameters['load_pretrained']: encodermodeldump = torch.load( config_parameters['load_pretrained'], map_location=lambda storage, loc: storage) pretrainedmodel = encodermodeldump['encodermodel'] encodermodel = models.PreTrainedCNN( inputdim=inputdim, pretrained_model=pretrainedmodel, ** config_parameters['encodermodel_args']) else: encodermodel = getattr( models, config_parameters['encodermodel'])( inputdim=inputdim, **config_parameters['encodermodel_args']) decodermodel = getattr( models, config_parameters['decodermodel'])( vocab_size=vocab_size, **config_parameters['decodermodel_args']) logger.info("<== EncoderModel ==>") for line in pformat(encodermodel).split('\n'): logger.info(line) logger.info("<== DecoderModel ==>") for line in pformat(decodermodel).split('\n'): logger.info(line) params = list(encodermodel.parameters()) + list(decodermodel.parameters()) train_dataloader, cv_dataloader = create_dataloader_train_cv( kaldi_string, config_parameters['captions_file'], vocab_file, transform=scaler.transform, **config_parameters['dataloader_args']) optimizer = getattr( torch.optim, config_parameters['optimizer'])( params, **config_parameters['optimizer_args']) scheduler = getattr( torch.optim.lr_scheduler, config_parameters['scheduler'])( optimizer, **config_parameters['scheduler_args']) criterion = torch.nn.CrossEntropyLoss() trainedmodelpath = os.path.join(outputdir, 'model.th') encodermodel = encodermodel.to(device) decodermodel = decodermodel.to(device) criterion_improved = criterion_improver( config_parameters['improvecriterion']) for line in tp.header( ['Epoch', 'MeanLoss(T)', 'StdLoss(T)', 'Loss(CV)', 'StdLoss(CV)', "Acc(T)", "Acc(CV)", "Forcing?"], style='grid').split('\n'): logger.info(line) teacher_forcing_ratio = config_parameters['teacher_forcing_ratio'] for epoch in range(1, config_parameters['epochs']+1): use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False train_loss_mean_std, train_acc = trainepoch( train_dataloader, encodermodel, decodermodel, criterion, optimizer, vocabulary, use_teacher_forcing) cv_loss_mean_std, cv_acc = sample_cv( cv_dataloader, encodermodel, decodermodel, criterion) logger.info( tp.row( (epoch,) + train_loss_mean_std + cv_loss_mean_std + (train_acc, cv_acc, use_teacher_forcing), style='grid')) epoch_meanloss = cv_loss_mean_std[0] if epoch % config_parameters['saveinterval'] == 0: torch.save({'encodermodel': encodermodel, 'decodermodel': decodermodel, 'scaler': scaler, 'config': config_parameters}, os.path.join(outputdir, 'model_{}.th'.format(epoch))) # ReduceOnPlateau needs a value to work schedarg = epoch_meanloss if scheduler.__class__.__name__ == 'ReduceLROnPlateau' else None scheduler.step(schedarg) if criterion_improved(epoch_meanloss): torch.save({'encodermodel': encodermodel, 'decodermodel': decodermodel, 'scaler': scaler, 'config': config_parameters}, trainedmodelpath) else: dump = torch.load(trainedmodelpath) encodermodel.load_state_dict(dump['encodermodel'].state_dict()) decodermodel.load_state_dict(dump['decodermodel'].state_dict()) if optimizer.param_groups[0]['lr'] < 1e-6: break logger.info(tp.bottom(8, style='grid')) # Sample results from sample import sample sample( data_path=features, encoder_path=trainedmodelpath, vocab_path=vocab_file, output=os.path.join( outputdir, 'output_word.txt'))
try: #---- Connect to device ----# wave = Wave(SerialNumber) wave.scan() if (Mode == 'terminal'): print "\nPress ctrl-C to exit program\n" print "Device serial number: %s" % (SerialNumber) header = [ 'Datetime', 'Humidity', 'Temperature', 'Radon ST avg', 'Radon LT avg' ] if (Mode == 'terminal'): print tableprint.header(header, width=20) elif (Mode == 'pipe'): print header while True: wave.connect() # read current values date_time = wave.read(SENSOR_IDX_DATETIME) humidity = wave.read(SENSOR_IDX_HUMIDITY) temperature = wave.read(SENSOR_IDX_TEMPERATURE) radon_st_avg = wave.read(SENSOR_IDX_RADON_ST_AVG) radon_lt_avg = wave.read(SENSOR_IDX_RADON_LT_AVG) data = [date_time, humidity, temperature, radon_st_avg, radon_lt_avg] # Print data
waveplus = WavePlus(SerialNumber) ledController = LedController(LedMode) if (Mode == 'terminal'): print("\nPress ctrl+C to exit program\n") print(f"Device serial number: {SerialNumber}") header = [ 'Date, time', 'Humidity', 'Radon ST avg', 'Radon LT avg', 'Temperature', 'Pressure', 'CO2 level', 'VOC level' ] COLUMN_WIDTH = 12 if (Mode == 'terminal'): print(tableprint.header(header, width=COLUMN_WIDTH)) elif (Mode == 'pipe'): print(header) MAX_FAILURES = 5 failure_count = 0 while True: ledController.OnCommsStart() try: waveplus.connect() failure_count = 0 except: ledController.OnCommsEnd() if failure_count < MAX_FAILURES:
def check_grad(f_df, xref, stepsize=1e-6, tol=1e-6, width=15, style='round', out=sys.stdout): """ Compares the numerical gradient to the analytic gradient Parameters ---------- f_df : function The analytic objective and gradient function to check x0 : array_like Parameter values to check the gradient at stepsize : float, optional Stepsize for the numerical gradient. Too big and this will poorly estimate the gradient. Too small and you will run into precision issues (default: 1e-6) tol : float, optional Tolerance to use when coloring correct/incorrect gradients (default: 1e-5) width : int, optional Width of the table columns (default: 15) style : string, optional Style of the printed table, see tableprint for a list of styles (default: 'round') """ CORRECT = u'\x1b[32m\N{CHECK MARK}\x1b[0m' INCORRECT = u'\x1b[31m\N{BALLOT X}\x1b[0m' obj, grad = wrap(f_df, xref, size=0) x0 = destruct(xref) df = grad(x0) # header out.write( tp.header(["Numerical", "Analytic", "Error"], width=width, style=style) + "\n") out.flush() # helper function to parse a number def parse_error(number): # colors failure = "\033[91m" passing = "\033[92m" warning = "\033[93m" end = "\033[0m" base = "{}{:0.3e}{}" # correct if error < 0.1 * tol: return base.format(passing, error, end) # warning elif error < tol: return base.format(warning, error, end) # failure else: return base.format(failure, error, end) # check each dimension num_errors = 0 for j in range(x0.size): # take a small step in one dimension dx = np.zeros(x0.size) dx[j] = stepsize # compute the centered difference formula df_approx = (obj(x0 + dx) - obj(x0 - dx)) / (2 * stepsize) df_analytic = df[j] # absolute error abs_error = np.linalg.norm(df_approx - df_analytic) # relative error error = abs_error if np.allclose(abs_error, 0) else abs_error / \ (np.linalg.norm(df_analytic) + np.linalg.norm(df_approx)) num_errors += error >= tol errstr = CORRECT if error < tol else INCORRECT out.write( tp.row([df_approx, df_analytic, parse_error(error) + ' ' + errstr], width=width, style=style) + "\n") out.flush() out.write(tp.bottom(3, width=width, style=style) + "\n") return num_errors
# shift = self.mean * (now - self.last_computed_sys_time) # tti[0] = tti[0] + shift # tti[1] = tti[1] + shift # tti[2] = tti[1] + shift tti.append(now) return tti samples = NTPMonitor(5, sys.argv[1], 3, 300, .999999999) width = 22 print( tableprint.header([ 'Last Offset (us)', 'Last Delay', 'Mean Offset (us)', 'RMS Offset', 'Offset St. Dev. (us)', 'Interval Width (us)', 'Interval Lower', 'Interval Upper' ], style='clean', width=width)) print( tableprint.row([ samples.last.offset * 1e+6, np.average([x.offset for x in samples.data[-30:]]) * 1e6, samples.mean * 1e+6, samples.rms_offset * 1e6, samples.stdev * 1e+6, samples.tt_interval()[3] * 1e6, samples.tt_interval()[0], samples.tt_interval()[1] ], width=width, style='clean',
try: #---- Initialize ----# waveplus = WavePlus(SerialNumber) if (Mode == 'terminal'): print "\nPress ctrl+C to exit program\n" print "Device serial number: %s" % (SerialNumber) header = [ 'Humidity', 'Radon ST avg', 'Radon LT avg', 'Temperature', 'Pressure', 'CO2 level', 'VOC level' ] if (Mode == 'terminal'): print tableprint.header(header, width=12) elif (Mode == 'pipe'): print header while True: waveplus.connect() # read values sensors = waveplus.read() # extract humidity = str(sensors.getValue(SENSOR_IDX_HUMIDITY)) + " " + str( sensors.getUnit(SENSOR_IDX_HUMIDITY)) radon_st_avg = str( sensors.getValue(SENSOR_IDX_RADON_SHORT_TERM_AVG)) + " " + str(
def start(self): print('\n'.join((self.hr, tp.header(self.column_names, self.width), self.hr)), flush=True)
def table_header(self, header, *args, **kwargs): self.write(tableprint.header(header, *args, **kwargs))
def main(config='config/ReLU/0Pool/crnn_maxpool.yaml', **kwargs): """Trains a model on the given features and vocab. :features: str: Input features. Needs to be kaldi formatted file :config: A training configuration. Note that all parameters in the config can also be manually adjusted with --ARG=VALUE :returns: None """ config_parameters = parse_config_or_kwargs(config, **kwargs) outputdir = os.path.join( config_parameters['outputpath'], config_parameters['model'], datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%f')) try: os.makedirs(outputdir) except IOError: pass logger = genlogger(outputdir, 'train.log') logger.info("Storing data at: {}".format(outputdir)) logger.info("<== Passed Arguments ==>") # Print arguments into logs for line in pformat(config_parameters).split('\n'): logger.info(line) kaldi_string = parsecopyfeats( config_parameters['features'], **config_parameters['feature_args']) scaler = getattr( pre, config_parameters['scaler'])( **config_parameters['scaler_args']) inputdim = -1 logger.info( "<== Estimating Scaler ({}) ==>".format( scaler.__class__.__name__)) for kid, feat in kaldi_io.read_mat_ark(kaldi_string): scaler.partial_fit(feat) inputdim = feat.shape[-1] assert inputdim > 0, "Reading inputstream failed" logger.info( "Features: {} Input dimension: {}".format( config_parameters['features'], inputdim)) logger.info("<== Labels ==>") label_df = pd.read_csv(config_parameters['labels'], sep='\t') label_df.event_labels = label_df.event_labels.str.split(',') label_df = label_df.set_index('filename') uniquelabels = list(np.unique( [item for row in label_df.event_labels.values for item in row])) many_hot_encoder = ManyHotEncoder( label_list=uniquelabels, time_resolution=1 ) label_df['manyhot'] = label_df['event_labels'].apply( lambda x: many_hot_encoder.encode(x, 1).data.flatten()) utt_labels = label_df.loc[:, 'manyhot'].to_dict() train_dataloader, cv_dataloader = create_dataloader_train_cv( kaldi_string, utt_labels, transform=scaler.transform, **config_parameters['dataloader_args']) model = getattr( models, config_parameters['model'])( inputdim=inputdim, output_size=len(uniquelabels), **config_parameters['model_args']) logger.info("<== Model ==>") for line in pformat(model).split('\n'): logger.info(line) optimizer = getattr( torch.optim, config_parameters['optimizer'])( model.parameters(), **config_parameters['optimizer_args']) scheduler = getattr( torch.optim.lr_scheduler, config_parameters['scheduler'])( optimizer, **config_parameters['scheduler_args']) criterion = getattr(losses, config_parameters['loss'])( **config_parameters['loss_args']) trainedmodelpath = os.path.join(outputdir, 'model.th') model = model.to(device) criterion_improved = criterion_improver( config_parameters['improvecriterion']) header = [ 'Epoch', 'UttLoss(T)', 'UttLoss(CV)', "UttAcc(T)", "UttAcc(CV)", "mAUC(CV)"] for line in tp.header( header, style='grid').split('\n'): logger.info(line) poolingfunction_name = config_parameters['poolingfunction'] pooling_function = parse_poolingfunction(poolingfunction_name) for epoch in range(1, config_parameters['epochs']+1): train_utt_loss_mean_std, train_utt_acc, train_auc_utt = runepoch( train_dataloader, model, criterion, optimizer, dotrain=True, poolfun=pooling_function) cv_utt_loss_mean_std, cv_utt_acc, cv_auc_utt = runepoch( cv_dataloader, model, criterion, dotrain=False, poolfun=pooling_function) logger.info( tp.row( (epoch,) + (train_utt_loss_mean_std[0], cv_utt_loss_mean_std[0], train_utt_acc, cv_utt_acc, cv_auc_utt), style='grid')) epoch_meanloss = cv_utt_loss_mean_std[0] if epoch % config_parameters['saveinterval'] == 0: torch.save({'model': model, 'scaler': scaler, 'encoder': many_hot_encoder, 'config': config_parameters}, os.path.join(outputdir, 'model_{}.th'.format(epoch))) # ReduceOnPlateau needs a value to work schedarg = epoch_meanloss if scheduler.__class__.__name__ == 'ReduceLROnPlateau' else None scheduler.step(schedarg) if criterion_improved(epoch_meanloss): torch.save({'model': model, 'scaler': scaler, 'encoder': many_hot_encoder, 'config': config_parameters}, trainedmodelpath) if optimizer.param_groups[0]['lr'] < 1e-7: break logger.info(tp.bottom(len(header), style='grid')) logger.info("Results are in: {}".format(outputdir)) return outputdir