def on_epoch_end(self, epoch, logs=None): self._verbose_print("Calculating metrics...") last_weights_path = self._load_weights_for_model() images, gt_boxes, gt_class_ids, gt_masks, results = detect( self.inference_model, self.dataset) metrics = compute_metrics(images, gt_boxes, gt_class_ids, gt_masks, results) pprint.pprint(metrics) # Images for i, img in enumerate(images): if img.shape[2] != 3: img = cv.cvtColor(img, cv.COLOR_GRAY2BGR) visualize_result(img, results[i], gt_masks[i], scores=True) neptune.log_image(f'image_epoch_{epoch}', img[..., ::-1]) # Metrics for key, value in metrics: neptune.log_metric(key, epoch, value) # Save best result name, mAP = metrics[0] if mAP > self.best_mAP: self.best_mAP = mAP self.best_epoch = epoch self.best_model = last_weights_path
def get_bayes_scikit_score(X_train,y_train,X_test,y_test, X_val=None, y_val= None, max_evals = 25, folds=5): model = BaesianSklearnSelector('classification', X_test=X_test, y_test = y_test, max_evals= max_evals) model.fit(X_train, y_train) score = accuracy_score(y_val, model.predict(X_val)) neptune.log_metric(f'skopt-{max_evals}-iterations', score) return score
def main(arguments): with open(arguments.filepath, 'r') as fp: json_exp = json.load(fp) neptune.init(api_token=arguments.api_token, project_qualified_name=arguments.project_name) with neptune.create_experiment( name=json_exp['name'], description=json_exp['description'], params=json_exp['params'], properties=json_exp['properties'], tags=json_exp['tags'], upload_source_files=json_exp['upload_source_files']): for name, channel_xy in json_exp['log_metric'].items(): for x, y in zip(channel_xy['x'], channel_xy['y']): neptune.log_metric(name, x=x, y=y) for name, channel_xy in json_exp['log_text'].items(): for x, y in zip(channel_xy['x'], channel_xy['y']): neptune.log_text(name, x=x, y=y) for name, channel_xy in json_exp['log_image'].items(): for x, y in zip(channel_xy['x'], channel_xy['y']): neptune.log_image(name, x=x, y=y) for filename in json_exp['log_artifact']: neptune.log_artifact(filename)
def get_grid_score(X_train,y_train,X_test,y_test,folds=5): np.random.seed(200) model = GridSelector('classification',folds=folds, steps=6) model.fit(X_train, y_train) score = accuracy_score(y_test, model.predict(X_test)) neptune.log_metric('grid', score) return score
def test(self, model, test_loader, worker_id, round_no): self.getback_model(model) model.eval() test_loss = 0 correct = 0 with torch.no_grad(): for data, target in test_loader: data, target = data.to(self.device), target.to( self.device, dtype=torch.int64) output = model(data) test_loss += F.nll_loss( output, target, reduction='sum').item() # sum up batch loss pred = output.argmax( 1, keepdim=True) # get the index of the max log-probability correct += pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) test_acc = 100. * correct / len(test_loader.dataset) if self.neptune_enable: neptune.log_metric("test_loss_" + str(worker_id), test_loss) neptune.log_metric("test_acc_" + str(worker_id), test_acc) if self.log_enable: file = open(self.log_file_path + str(worker_id) + "_test", "a") TO_FILE = '{} {} "{{/*Accuracy:}}\\n{}%" {}\n'.format( round_no, test_loss, test_acc, test_acc) file.write(TO_FILE) file.close() logging.info( 'Test Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( test_loss, correct, len(test_loader.dataset), test_acc)) return test_acc
def write_results(config: configure_finetuning.FinetuningConfig, results): """Write evaluation metrics to disk.""" utils.log("Writing results to", config.results_txt) utils.mkdir(config.results_txt.rsplit("/", 1)[0]) utils.write_pickle(results, config.results_pkl) with tf.io.gfile.GFile(config.results_txt, "a") as f: results_str = "" for trial_results in results: for task_name, task_results in trial_results.items(): if task_name == "time" or task_name == "global_step": continue results_str += task_name + ": " + " - ".join([ "{:}: {:.2f}".format(k, v) for k, v in task_results.items() ]) + "\n" # Neptune Metric Logging neptune.append_tag('ft') neptune.append_tag('tensorflow') neptune.set_property('task', task_name) for k, v in task_results.items(): neptune.log_metric(k, v) f.write(results_str) utils.write_pickle(results, config.results_pkl)
def on_epoch_end(self, trainer): for metric, value in trainer.metrics.items(): if 'val' in metric: neptune.log_metric(metric, value, timestamp=trainer.global_step) if self.vis_function: vis = self.vis_function(trainer.out['inputs'], trainer.out['outputs'], trainer.out['targets']) for name, value in vis.items(): if value.shape[0] > 512: value = Image.fromarray(value) value.thumbnail((512, 512)) neptune.log_image(name, value.transpose(1, 2, 0)) cb = self.get_callback(trainer.callbacks, ConfusionMatrix) if cb: train_vis = plot_confusion_matrix(cb.train_matrix, cb.class_names, as_array=True) val_vis = plot_confusion_matrix(cb.val_matrix, cb.class_names, as_array=True) neptune.log_image('train_confusion_matrix', train_vis.transpose(1, 2, 0), timestamp=trainer.global_step) neptune.log_image('val_confusion_matrix', val_vis.transpose(1, 2, 0), timestamp=trainer.global_step)
def training_epoch_end(self, training_step_outputs): self.training = True self.train() # pick a random 50% of data for each epoch # we don't know the right thresholds and right learning rate # and find the thresholds train_acc = np.mean([x['train_acc'] for x in training_step_outputs]) train_acc = torch.tensor(train_acc, dtype=torch.float32) print("train_acc", train_acc) train_loss = torch.stack([x['loss'] for x in training_step_outputs]).mean() neptune.log_metric('train_loss', train_loss) neptune.log_metric('train acc', train_acc) # self.logger.experiment.add_scalar("Loss/Train", avg_loss, self.epoch) self.eval() return { 'log': { 'train_loss': train_loss, 'train_acc': train_acc }, 'progress_bar': { 'train_loss': train_loss, 'train_acc': train_acc } }
def train_epochs(self, epochs, scheduler=None): for _ in range(epochs): epoch_name = f"{self.leaf_model.model_prefix}-{self.epoch}" losses, accs = train_one_epoch(self.leaf_model, self.train_dataloader, log_steps=self.log_steps, epoch_name=epoch_name, steps_offset=self.steps_offset, neptune=self.neptune, grad_norm=self.grad_norm, fp16=self.fp16) self.steps_offset += len(self.train_dataloader) self.epoch += 1 val_loss, val_acc = validate_one_epoch(self.leaf_model, self.val_dataloader) print( f"Validation after step {self.steps_offset}: loss {val_loss}, acc {val_acc}" ) if self.neptune: neptune.log_metric("loss/val", y=val_loss, x=self.steps_offset) neptune.log_metric("acc/val", y=val_acc, x=self.steps_offset) self.leaf_model.save_checkpoint(f"{epoch_name}", epoch_name=f"{epoch_name}") if scheduler is not None: if scheduler is not None: scheduler.step(np.mean(losses))
def dump_tabular(self): """ Write all of the diagnostics from the current iteration. Writes both to stdout, and to the output file. """ if proc_id() == 0: vals = [] key_lens = [len(key) for key in self.log_headers] max_key_len = max(15, max(key_lens)) keystr = '%' + '%d' % max_key_len fmt = "| " + keystr + "s | %15s |" n_slashes = 22 + max_key_len print("-" * n_slashes) for key in self.log_headers: val = self.log_current_row.get(key, "") valstr = "%8.3g" % val if hasattr(val, "__float__") else val print(fmt % (key, valstr)) vals.append(val) neptune.log_metric(key, val) print("-" * n_slashes, flush=True) if self.output_file is not None: if self.first_row: self.output_file.write("\t".join(self.log_headers) + "\n") self.output_file.write("\t".join(map(str, vals)) + "\n") self.output_file.flush() self.log_current_row.clear() self.first_row = False
def run_train(self, train_generator, val_generator, n_epoches, weights_file, factor, start_lr, min_lr, lr_patience, overall_patience, loss_delta=0.): self.best_loss = 100 self.best_metric = 0 self.best_epoch = 0 self.curr_lr_loss = 100 self.best_lr_epoch = 0 self.train_model.to(self.device) #params = [p for p in self.train_model.parameters() if p.requires_grad] optimizer = optim.AdamW(params=self.train_model.parameters(), lr=start_lr) for epoch in range(n_epoches): print('!!!! Epoch {}'.format(epoch)) train_loss = self.train_epoch(optimizer, train_generator) print(f'Train loss: {train_loss}, lr: {get_lr(optimizer)}') neptune.log_metric('Train loss', train_loss) neptune.log_metric('Lr', get_lr(optimizer)) if not self.on_epoch_end(epoch, optimizer, val_generator, weights_file, factor, min_lr, lr_patience, overall_patience, loss_delta): break
def training_epoch_end(self, training_step_outputs): self.training = True self.train() train_acc = np.mean([x['train_acc'] for x in training_step_outputs]) train_acc = torch.tensor(train_acc, dtype=torch.float32) print("train_acc", train_acc) train_loss = torch.stack([x['loss'] for x in training_step_outputs]).mean() if self.log: neptune.log_metric('train_loss', train_loss) neptune.log_metric('train acc', train_acc) # self.logger.experiment.add_scalar("Loss/Train", avg_loss, self.epoch) torch.cuda.empty_cache() self.eval() return { 'log': { 'train_loss': train_loss, 'train_acc': train_acc }, 'progress_bar': { 'train_loss': train_loss, 'train_acc': train_acc } }
def evaluate_single(): neptune.create_experiment(name = "Bayesian_hyperopt cv") path = r'datasets/regression' names = os.listdir(path) datasets = [{"name":x,"target_column":"class"} for x in names] failed_names = [] for dataset in tqdm.tqdm(datasets): try: data = pd.read_csv(path + '/' + dataset["name"]) change_df_column(data, dataset['target_column'], 'class') X, y = data.drop(columns=['class']), data['class'] X,y = preproces_data(X,y) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2) # model = BayesSelector(objective,cv=5, max_evals=10) # model = BaesianSklearnSelector(objective,X_test = X_test, y_test = y_test, max_evals=10) # model = GridSelector(objective) # model = LGBMRegressor() model = BaesianSklearnSelector(objective,X_test = X_test, y_test = y_test, max_evals=100) model.fit(X_train, y_train) score_xgb = r2_score(y_test, model.predict(X_test)) neptune.log_metric(dataset['name'], score_xgb) except Exception as ex: print(f'{dataset["name"]} failed') print(ex) failed_names.append(dataset["name"])
def train_one_epoch(config, epoch, device, model, optimizer, criterion, loader): print('\nEpoch: %d' % epoch) model.train() train_total, train_loss = 0, 0 for batch_idx, (x, x_occu, y, y_occu) in enumerate(loader): x, x_occu, y, y_occu = x.to(device), x_occu.to(device), y.to(device), y_occu.to(device) optimizer.zero_grad() y_pred, mu, log_var = model(x, x_occu, y, y_occu, train=True) loss = criterion(mu, log_var, y_pred, y) loss.backward() optimizer.step() # train_ade += ade * x.size(0) # train_fde += fde * x.size(0) train_total += x.size(0) train_loss += loss.item() * x.size(0) if config['neptune']: # neptune.log_metric('train_batch_ADE', ade) # neptune.log_metric('train_batch_FDE', fde) neptune.log_metric('train_batch_Loss', loss.item()) # progress_bar(batch_idx, len(loader), 'Lr: %.4e | Loss: %.3f | ADE[m]: %.3f | FDE[m]: %.3f' # % (get_lr(optimizer), train_loss / train_total, train_ade / train_total, train_fde / train_total)) progress_bar(batch_idx, len(loader), 'Lr: %.4e | Loss: %.3f' % (get_lr(optimizer), train_loss / train_total))
def update(self, **kwargs): itr = 0 for name, value in kwargs.items(): if name == 'itr': itr = value continue try: self.metrics[name].append(value) self.rmetrics[name].append(value) except: self.metrics[name] = [] self.metrics[name].append(value) self.rmetrics[name] = [] self.rmetrics[name].append(value) if itr % self.log_interval == 0: if self.use_neptune: for key in self.rmetrics.keys(): mean = np.mean(self.rmetrics[key]) self.rmetrics[key] = [] neptune.log_metric(f'{key}_{self.phase}', itr + self.epoch * self.total_batches, mean) else: for key in self.rmetrics.keys(): mean = np.mean(self.rmetrics[key]) self.rmetrics[key] = [] print(f' - {key}: {mean}')
def log2neptune(trainer, logfile: str) -> None: filename = os.path.join(trainer.out, logfile) log_info = json.load(open(filename, "r")) log_info = log_info[-1] iteration = log_info["iteration"] for k, v in log_info.items(): neptune.log_metric(k, iteration, v)
def train(self): for epoch in tqdm(range(self.num_epochs)): for actions in self.dataset_loader: loss = self.generator_handler.train_on_action_batch(actions=actions, device=self.device) neptune.log_metric("loss", loss) self.generator_handler.save(self.save_dir)
def main(env_name, n_epochs, eval_frequency, actor_net_dim, critic_net_dim, dsicriminator_net_dim, lr, gamma, tau, grad_clip, batch_size, entropy_weight, min_buffer_size, clip, ppo_updates, expert, activation, value_coef, betas, max_steps, tag, record): seed = np.random.randint(0, 1000000) import pybulletgym discriminator_updates = 1 expert, activation = initiate_run( env_name, actor_net_dim, critic_net_dim, dsicriminator_net_dim, lr, gamma, tau, grad_clip, batch_size, entropy_weight, min_buffer_size, clip, ppo_updates, discriminator_updates, expert, activation, value_coef, betas, max_steps, seed, tag, record) env = Env(env_name) actor = Actor(env, actor_net_dim, activation, env.env.action_space.high, env.env.action_space.low) critic = Critic(env, critic_net_dim, activation) discriminator = Discriminator(env, dsicriminator_net_dim, lr, batch_size, activation, betas) agent = Agent(gamma, clip, actor, critic, lr, batch_size, grad_clip, entropy_weight, value_coef, betas) memory = PPOMemory(gamma, tau) args = [ min_buffer_size, eval_frequency, ppo_updates, discriminator_updates, expert, seed ] gail = GAIL(env, actor, critic, discriminator, agent, memory, *args) epoch_to_best = gail.update(n_epochs, max_steps, record) if record: neptune.log_metric('best_epoch', epoch_to_best) neptune.stop()
def evaluate(config, device, model, optimizer, criterion, loader): model.eval() # eval_ade, eval_fde, eval_total = 0, 0, 0 eval_total, eval_loss = 0, 0 for batch_idx, (x, x_occu, y, y_occu) in enumerate(loader): x, x_occu, y, y_occu = x.to(device), x_occu.to(device), y.to(device), y_occu.to(device) y_pred, mu, log_var = model(x, x_occu, y, y_occu, train=True) loss = criterion(mu, log_var, y_pred, y) eval_total += x.size(0) eval_loss += loss.item() * x.size(0) progress_bar(batch_idx, len(loader), 'Lr: %.4e | Loss: %.3f' % (get_lr(optimizer), eval_loss / eval_total)) # progress_bar(batch_idx, len(loader), 'Lr: %.4e | ADE[m]: %.3f | FDE[m]: %.3f' # % (get_lr(optimizer), eval_ade / eval_total, eval_fde / eval_total)) if config['neptune']: neptune.log_metric('val_Loss', eval_loss / eval_total) # neptune.log_metric('{}_ADE'.format(loader.dataset.mode), eval_ade / eval_total) # neptune.log_metric('{}_FDE'.format(loader.dataset.mode), eval_fde / eval_total) return eval_loss / eval_total
def __call__(self, trainer: BaseTrainer, batch_x: torch.Tensor, batch_y: torch.Tensor, loss: torch.Tensor, metric_results: tp.DefaultDict[str, tp.List[tp.Any]], *args, **kwargs): neptune.log_metric('validation loss', loss.item()) for key, value in metric_results.items(): neptune.log_metric(key, value[-1])
def validation_epoch_end(self, validation_step_outputs): self.training = False self.eval() val_loss = torch.stack([x['val_loss'] for x in validation_step_outputs]).mean() # val_tot = [x['val_acc'] for x in validation_step_outputs] # val_acc = np.mean(val_tot) print("HERE\n\n\n\nValidation in each step\n") # print([x['val_acc'] for x in validation_step_outputs]) val_acc = np.mean([x['val_acc'] for x in validation_step_outputs]) val_acc = torch.tensor(val_acc, dtype=torch.float32) print("val_loss", val_loss) print("val_acc", val_acc) if self.log: neptune.log_metric('val_loss', val_loss) neptune.log_metric('val acc', val_acc) self.epoch += 1 self.train() return { 'log': { 'val_loss': val_loss, 'val_acc': val_acc }, 'progress_bar': { 'val_loss': val_loss, 'val_acc': val_acc } }
def train_evaluate(search_params): hyperparameters = {} pick_kwargs = {} for k in list(search_params.keys()): if k in ['w_dfh', 'w_sharpe', 'w_100d', 'v_100d', 'v_dfh', 'v_rfl']: pick_kwargs[k] = search_params[k] else: hyperparameters[k] = search_params[k] hyperparameters['pick_kwargs'] = pick_kwargs print('------------') print(json.dumps(hyperparameters, indent=2, sort_keys=True)) sim = Sim(neptune=neptune, period='2y', timedelay=100, window=100, timestep=1, budget=5000, stockPicks=5, avoidDowntrends=True, sellAllOnCrash=False, **hyperparameters) stats = sim.run() analysis = Analysis(neptune=neptune, stats=stats, positions=sim.portfolio.holdings, prices=sim.downloader.prices) #analysis.chart() output, advanced_stats, obj_stats = analysis.positionStats() for k in list(obj_stats.keys()): neptune.log_metric(k, obj_stats[k]) print(output) #neptune.log_artifact('data/output_1y.pkl') sharpe = analysis.sharpe() stats = sim.portfolio.summary() if math.isnan(sharpe) or math.isinf(sharpe) or sharpe <= -2 or sharpe >= 5: sharpe = -5 #neptune.log_metric('sharpe', sharpe) #neptune.log_metric('start_value', 5000) #neptune.log_metric('end_value', stats['total_value']) report = { 'hyperparameters': hyperparameters, 'sharpe': sharpe, 'end_value': stats['total_value'], 'gains': (stats['total_value'] - 5000.0) / 5000.0 } neptune.log_text('report', json.dumps(report, indent=2, sort_keys=True)) return sharpe
def validate(enc, dec, device, val_loader, wordmap, epoch): ''' Calculate validation metric :param val_loader: pytorch loader of images :param wordmap: dictionary mapping from word to word index :param epoch: current epoch of training :return: None ''' enc.eval() dec.eval() dec = dec.to(device) enc = enc.to(device) references = list() # True captions hypotheses = list() # Predicted captions with torch.no_grad(): for batch_n, (imgs, caps, caplens, allcaps) in enumerate(val_loader): print(batch_n) imgs = imgs.to(device) caps = caps.to(device) caplens = caplens.to(device) enc_output = enc(imgs) dec_out, captions, captions_lengths, sort_ind = dec(captions=caps, encoder_out=enc_output, captions_lengths=caplens) scores_copy = dec_out.clone() allcaps = allcaps[sort_ind] # Resort because captions were sorted in decoder for j in range(allcaps.shape[0]): img_caps = allcaps[j].tolist() img_captions = list( map(lambda c: [w for w in c if w not in {wordmap['<start>'], wordmap['<pad>']}], img_caps)) # remove <start> and pads references.append(img_captions) # Take predicted captions for each image _, preds = torch.max(scores_copy, dim=2) preds = preds.tolist() temp_preds = list() for j, p in enumerate(preds): temp_preds.append(preds[j][:captions_lengths[j]]) # remove pads preds = temp_preds hypotheses.extend(preds) # Calculate BLEU-4 scores bleu4 = corpus_bleu(references, hypotheses) # Log score to neptune and print metric neptune.log_metric('bleu4', bleu4) print('Epoch {}, BLEU4'.format(epoch), bleu4)
def log_metric(self, metric_name, y, **kwargs): if 'x' not in kwargs: self.last_x += 1 x = self.last_x else: x = kwargs['x'] neptune.log_metric(metric_name, x, y) print(f'{x} | {metric_name} = {y}')
def lr_scheduler(epoch): if epoch < 20: new_lr = PARAMS['learning_rate'] else: new_lr = PARAMS['learning_rate'] * np.exp(0.05 * (20 - epoch)) neptune.log_metric('learning_rate', new_lr) return new_lr
def on_epoch_end(self, epoch: int, state: DotDict): if state.core.mode == "val": neptune.log_metric('val_epoch_loss', state.core.loss.item()) else: try: neptune.log_metric('train_epoch_loss', state.core.loss.item()) except Exception as e: pass
def objective(trial, params): # Suggest values of the hyperparameters using a trial object. n_bot_layers = trial.suggest_int('n_bot_layers', 2, 5) n_top_layers = trial.suggest_int('n_top_layers', 2, 4) bot_layers = [] top_layers = [] arch_sparse_feature_size = trial.suggest_int( 'arch_sparse_feature_size', 16, 32) for i in range(n_bot_layers): if i == 0: bot_layers.append( params["den_fea"] ) # This value is related to the number of numerical columns (fixed by input data) elif i == (n_bot_layers - 1): bot_layers.append( arch_sparse_feature_size ) # This value is related to the arch_sparse_feature_size else: bot_features = trial.suggest_int( 'n_bot_units_l{}'.format(i), 32, 512) bot_layers.append(bot_features) for i in range(n_top_layers): if i == (n_top_layers - 1): top_layers.append( 1 ) # This value should always be 1, as it is a binary classification else: top_features = trial.suggest_int( 'n_top_units_l{}'.format(i), 32, 512) top_layers.append(top_features) arch_mlp_bot = '-'.join(str(x) for x in bot_layers) arch_mlp_top = '-'.join(str(x) for x in top_layers) learning_rate = trial.suggest_float('learning_rate', 0.001, 0.1) #loss_function = trial.suggest_categorical('loss_function', ['mse', 'bce']) # Assigning trial hyper-parameters to params params["arch_sparse_feature_size"] = arch_sparse_feature_size params["arch_mlp_bot"] = arch_mlp_bot params["arch_mlp_top"] = arch_mlp_top params["learning_rate"] = learning_rate # Run DLRM and get results dlrm_model = DLRM_Model(**params) validation_results = dlrm_model.run() for key in validation_results: if key not in ['classification_report', 'confusion_matrix']: neptune.log_metric(key, validation_results[key]) # Print trial (if verbose) if self.verbose: print('Parameters: ', params, '/n Results: ', validation_results) return validation_results[ 'best_pre_auc_test'] # ['best_auc_test'] Need to decide which metric is best
def lr_lambda(self, iteration: int) -> float: if iteration < self.num_warmup: step = (self.max_factor - self.min_factor) / float(self.num_warmup) fac = self.min_factor + (1 + iteration) * step else: it = 1 + (iteration - self.num_warmup) // self.mini_epoch_sz fac = self.max_factor / max(1.0, np.sqrt(it / self.temp)) fac = max(fac, self.min_factor) neptune.log_metric('InvSqrtLR_factor', x=global_step, y=fac) return fac
def log(self): score = self.evaluate(rd_filtered=False) neptune.log_metric(f"{self.prefix}record_unfiltered_score", score) if self.record_filtered: filtered_score = self.evaluate(rd_filtered=True) neptune.log_metric(f"{self.prefix}record_filtered_score", filtered_score) self.t += 1
def step(self, scoring_function, device, pool): apprentice_smis, apprentice_scores = self.update_storage_by_apprentice(scoring_function, device, pool) expert_smis, expert_scores = self.update_storage_by_expert(scoring_function, pool) loss, fit_size = self.train_apprentice_step(device) neptune.log_metric("apprentice_loss", loss) neptune.log_metric("fit_size", fit_size) return apprentice_smis + expert_smis, apprentice_scores + expert_scores