def display_macro_page(): """displays macro lexical information page""" metrics_list = Metrics.query.all() wl_avg_data = Metrics.get_wl_average_data(metrics_list) wl_range_data = Metrics.get_wl_range_data(metrics_list) ll_avg_data = Metrics.get_ll_average_data(metrics_list) ll_range_data = Metrics.get_ll_range_data(metrics_list) pl_lines_data = Metrics.get_pl_lines_data(metrics_list) pl_char_data = Metrics.get_pl_char_data(metrics_list) pl_word_data = Metrics.get_pl_words_data(metrics_list) sl_avg_data = Metrics.get_stanza_length_data(metrics_list) stanza_num_data = Metrics.get_stanza_num_data(metrics_list) sl_range_data = Metrics.get_stanza_range_data(metrics_list) return render_template("macro.html", wl_avg_data=wl_avg_data, wl_range_data=wl_range_data, ll_avg_data=ll_avg_data, ll_range_data=ll_range_data, pl_lines_data=pl_lines_data, pl_char_data=pl_char_data, pl_word_data=pl_word_data, sl_avg_data=sl_avg_data, sl_range_data=sl_range_data, stanza_num_data=stanza_num_data)
def send_metric(): from model import Metrics, Client, get_or_create if request.method == 'POST': data = request.json if data.get('client') and data.get('value'): client = get_or_create(db.session, Client, name=data['client']) Metrics.create_metric(db.session, client.id, data['value']) return 'OK', 200 return 'FAIL', 500
def display_sentiment_page(): """displays sentiment information page""" metrics_list = Metrics.query.all() pos_neg = Metrics.get_pos_neg_data(metrics_list) obj_abs = Metrics.get_obj_abs_data(metrics_list) common = Metrics.get_common_data(metrics_list) gender = Metrics.get_gender_data(metrics_list) active = Metrics.get_active_data(metrics_list) return render_template("sentiment.html", pos_neg=pos_neg, obj_abs=obj_abs, common=common, gender=gender, active=active)
def display_micro_page(): """displays micro lexical information page""" metrics_list = Metrics.query.all() rhyme_rep = Metrics.get_rhyme_rep_data(metrics_list) lex_div = Metrics.get_lex_data(metrics_list) filler = Metrics.get_filler_data(metrics_list) narrator = Metrics.get_narrator_data(metrics_list) alliteration = Metrics.get_alliteration_data(metrics_list) return render_template("micro.html", rhyme_rep=rhyme_rep, lex_div=lex_div, filler=filler, narrator=narrator, alliteration=alliteration)
def index(): from model import Metrics local_metrics = {} total_metrics = {} metrics = Metrics.get_metrics(limit=100) if metrics: local_metrics = Metrics.get_metrics_info(db.session, limit=100) total_metrics = Metrics.get_metrics_info(db.session, limit=None) return render_template('index.html', title='Система сбора информации', metrics=metrics, total_metrics=total_metrics, local_metrics=local_metrics)
def test_one_epoch(net, dataloader, device, epoch, epochlength, wandblog=True, dst_path=None, dst_format=None): """Go through testing data once, measure performance and send result to weights and biases Arguments --------- net : torch neural net dataloader : torch dataloader device : str Options: 'cpu', 'cuda' epoch : int Current epoch epochlength : int Total number of samples in one epoch including testing. [wandblog] : bool Default: True Send monitoring data to weights and biases. !![return_pred] : bool!! Not usable because of memory consumption Default: False If True, predictions are returned. [dst_path] : str path Default: None, i.e. Do not store preductions. Path to destination folder to store predictions at. [dst_format] : str Format of storage. Options are decided in utils.store(). Returns ------- infodict : dict Dict of info about process which is sent to weights and biases to monitor process there. """ net.eval() cuminfodict = { "epoch": [], "test_fnr": [], "test_fpr": [], "test_voe": [], "test_rvd": [], "test_dice": [], "test_iou": [], "test_dice_numerator": [], "test_dice_denominator": [], "test_classification_accuracy": [], # "test_conmat": [], } for i, sample in enumerate(dataloader): vol = sample['vol'].to(device, non_blocking=True) lab_seg = sample['lab'].to(device, non_blocking=True) pred_soft = net(vol) pred = torch.round(pred_soft) # onehot_lab = utils.one_hot(lab_seg, nclasses=3) ## Log metrics metrics = Metrics(pred, lab_seg, mode='test') infodict = metrics.get_metric_dict() for key in infodict: infodict[key] = infodict[key].detach().cpu().numpy() infodict.update({"epoch": epoch}) utils.update_cumu_dict(cuminfodict, infodict) ## Classification accuracy if (config["label_type"] == "binary") and wandblog: pred = pred.view(pred.size(0), -1).sum(-1) > 0 lab = lab_seg.view(lab_seg.size(0), -1).sum(-1) > 0 acc = (pred == lab).float().mean().detach().cpu().numpy() wandb.log({ "Test Positive Predictions": pred.sum(), "Test Positive Labels": lab.sum(), "Test Accuracy": float(acc) }) ## Store prediction if dst_path is not None: # to_store = torch.cat([vol, lab, pred]) utils.store(pred_soft, dst_path, sample['store_idx'], format=dst_format, epoch=epoch, focus=config["focus"]) if wandblog: for i in range(pred_soft.size(0)): wandb.log({ "Example Prediction": [ wandb.Image( pred_soft[i, 0, ...].detach().cpu().numpy() * 255, caption="Prediction " + str(config["runid"]) + str(epoch) + str(sample["store_idx"][i])) ] }) wandb.log({ "Example Image": [ wandb.Image(vol[i, 0, ...].cpu().numpy() * 255, caption="Image " + str(config["runid"]) + str(epoch) + str(sample["store_idx"][i])) ] }) wandb.log({ "Example Label": [ wandb.Image(lab_seg[i, 0, ...].cpu().numpy() * 255, caption="Label " + str(config["runid"]) + str(epoch) + str(sample["store_idx"][i])) ] }) ## Infologging for key in cuminfodict: cuminfodict[key] = np.mean(cuminfodict[key], axis=0) if wandblog: wandb.log(cuminfodict) return cuminfodict
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) #collect batch norm update operations with tf.control_dependencies(update_ops): optimizer = tf.train.MomentumOptimizer(per_sample_learning_rate, momentum=0.9, use_nesterov=True) gradients = optimizer.compute_gradients(target_vars.opt_loss) adjusted_gradients = [] for (grad,x) in gradients: adjusted_grad = grad if x.name in lr_adjusted_variables and grad is not None: adj_factor = lr_adjusted_variables[x.name] adjusted_grad = grad * adj_factor trainlog("Adjusting gradient for " + x.name + " by " + str(adj_factor)) adjusted_gradients.append((adjusted_grad,x)) train_step = optimizer.apply_gradients(adjusted_gradients) metrics = Metrics(model,target_vars,include_debug_stats=True) def reduce_norm(x, axis=None, keepdims=False): return tf.sqrt(tf.reduce_mean(tf.square(x), axis=axis, keepdims=keepdims)) relative_update_by_var = dict([ (v.name,per_sample_learning_rate * reduce_norm(grad) / (1e-10 + reduce_norm(v))) for (grad,v) in adjusted_gradients if grad is not None ]) total_parameters = 0 for variable in tf.trainable_variables(): shape = variable.get_shape() variable_parameters = 1 for dim in shape: variable_parameters *= dim.value total_parameters += variable_parameters trainlog("Model variable %s, %d parameters" % (variable.name,variable_parameters))
def main(): args = parse_args() # logging defination logger = logging.getLogger() logger.setLevel(logging.INFO) model_name = time.strftime("%Y%m%d%H%M", time.localtime(time.time())) log_dir = os.path.join( os.getcwd(), args.logdir, ) if not os.path.exists(log_dir): os.mkdir(log_dir) log_file = os.path.join(log_dir, model_name + ".log") fh = logging.FileHandler(log_file, mode="w") fh.setLevel(logging.DEBUG) formatter = logging.Formatter( "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s" ) fh.setFormatter(formatter) logger.addHandler(fh) # argument validation args.cuda = args.cuda and torch.cuda.is_available() device = torch.device("cuda:0" if args.cuda else "cpu") if args.sparse and args.wd != 0: logger.error('Sparsity and weight decay are incompatible, pick one!') exit() logger.debug(args) torch.manual_seed(args.seed) random.seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) torch.backends.cudnn.benchmark = True # download dataset download_sick(args.datadir) download_wordvecs(args.glove) # build vocabulary filenames = [ os.path.join(args.datadir, "SICK_train.txt"), os.path.join(args.datadir, "SICK_trial.txt"), os.path.join(args.datadir, "SICK_test_annotated.txt") ] build_vocab(filenames, os.path.join(args.datadir, "vocab.txt")) # preparing vocabulary vocabulary = Vocab(filename=os.path.join(args.datadir, "vocab.txt"), data=[ Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD, Constants.EOS_WORD ]) logger.info('==> SICK vocabulary size : %d ' % vocabulary.size()) # preparing dataset train_set = SICKDataset(vocabulary, args.seq_len, args.num_classes, os.path.join(args.datadir, "SICK_train.txt")) logger.info('==> Size of train data : %d ' % len(train_set)) dev_set = SICKDataset(vocabulary, args.seq_len, args.num_classes, os.path.join(args.datadir, "SICK_trial.txt")) logger.info('==> Size of dev data : %d ' % len(dev_set)) test_set = SICKDataset( vocabulary, args.seq_len, args.num_classes, os.path.join(args.datadir, "SICK_test_annotated.txt")) logger.info('==> Size of test data : %d ' % len(test_set)) # preparing model model = RNNSimilarity(vocab_size=vocabulary.size(), embedding_dim=args.embedding_dim, mem_dim=args.mem_dim, hid_dim=args.hid_dim, num_layers=args.num_layers, rnn_type=args.rnn_type, dropout=args.dropout, bidirectional=args.bidirectional, seq_len=args.seq_len, num_classes=args.num_classes, sparsity=args.sparse, freeze=args.freeze_embed, name=model_name) criterion = nn.KLDivLoss() # preparing embeddings # for words common to dataset vocab and GLOVE, use GLOVE vectors # for other words in dataset vocab, use random normal vectors emb_file = os.path.join(args.datadir, 'sick_embed.pth') if os.path.isfile(emb_file): emb = torch.load(emb_file) else: # load glove embeddings and vocab glove_vocab, glove_emb = load_word_vectors( os.path.join(args.glove, 'glove.840B.300d')) logger.info('==> GLOVE vocabulary size: %d ' % glove_vocab.size()) emb = torch.zeros(vocabulary.size(), glove_emb.size(1), dtype=torch.float, device=device) emb.normal_(0, 0.05) # zero out the embeddings for padding and other special words if they are absent in vocab for idx, item in enumerate([ Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD, Constants.EOS_WORD ]): emb[idx].zero_() for word in vocabulary.labelToIdx.keys(): if glove_vocab.getIndex(word): emb[vocabulary.getIndex(word)] = glove_emb[ glove_vocab.getIndex(word)] torch.save(emb, emb_file) # plug these into embedding matrix inside model model.word_emb.weight.data.copy_(emb) # preparing optimizer model.to(device), criterion.to(device) if args.optim == 'adam': optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd) elif args.optim == 'adagrad': optimizer = optim.Adagrad(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd) elif args.optim == 'sgd': optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd) else: raise TypeError("Unknown optimizer type %s" % str(args.optim)) metrics = Metrics(args.num_classes) # create trainer object for training and testing trainer = Trainer(args, model, criterion, optimizer, device) best = -float('inf') peason_list = [] mse_list = [] loss_list = [] for epoch in range(args.epochs): train_loss = trainer.train(train_set) train_loss, train_pred, train_target = trainer.test(train_set) dev_loss, dev_pred, dev_target = trainer.test(dev_set) test_loss, test_pred, test_target = trainer.test(test_set) train_pearson = metrics.pearson(train_pred, train_target) train_mse = metrics.mse(train_pred, train_target) logger.info( '==> Epoch {}, Train \tLoss: {}\tPearson: {}\tMSE: {}'.format( epoch + 1, train_loss, train_pearson, train_mse)) dev_pearson = metrics.pearson(dev_pred, dev_target) dev_mse = metrics.mse(dev_pred, dev_target) logger.info( '==> Epoch {}, Dev \tLoss: {}\tPearson: {}\tMSE: {}'.format( epoch + 1, dev_loss, dev_pearson, dev_mse)) test_pearson = metrics.pearson(test_pred, test_target) test_mse = metrics.mse(test_pred, test_target) logger.info( '==> Epoch {}, Test \tLoss: {}\tPearson: {}\tMSE: {}'.format( epoch + 1, test_loss, test_pearson, test_mse)) # drawing data peason_list.append((train_pearson, dev_pearson, test_pearson)) mse_list.append((train_mse, dev_mse, test_mse)) loss_list.append((train_loss, dev_loss, test_loss)) if best < test_pearson: best = test_pearson checkpoint = { 'model': trainer.model.state_dict(), 'optim': trainer.optimizer, 'pearson': test_pearson, 'mse': test_mse, 'args': args, 'epoch': epoch } logger.info( '==> New optimum found, checkpointing everything now...') torch.save(checkpoint, '%s.pt' % os.path.join(args.logdir, model.name)) # draw the picture draw(peason_list, "Peason value", "peason", model_name, args.logdir) draw(mse_list, "MSE value", "mse", model_name, args.logdir) draw(loss_list, "Loss value", "loss", model_name, args.logdir)
def train_one_epoch(net, optimizer, critic, dataloader, device, epoch, epochlength, wandblog=True): """Go through training data once and adjust weighs of net. Arguments --------- net : torch neural net optimizer : torch optimizer critic : torch loss object dataloader : torch dataloader device : str Options: 'cpu', 'cuda' epoch : int Current epoch epochlength : int Total number of samples in one epoch including testing. [wandblog] : bool Default: True Update monitoring values to weights and biases. !![return_pred] : bool!! Not usable because of memory consumption Default: False If True, prediction is returned Returns ------- infodict : dict Dict of info about process which is sent to weights and biases to monitor process there. [pred] : List[torch.Tensor] Returned only if return_pred is set to True. """ net.train() cuminfodict = { "epoch": [], "loss": [], "train_FNR": [], "train_FPR": [], "train_RVD": [], "train_dice": [], "train_dice_numerator": [], "train_dice_denominator": [], "train_iou": [], "train_conmat": [] } alpha = config["alpha"] for i, sample in enumerate(dataloader): optimizer.zero_grad() vol = sample['vol'].to(device, non_blocking=True) lab = sample['lab'].to(device, non_blocking=True) ## Convert lab to class labels lab = (lab == 1).view(lab.size(0), lab.size(1), -1).any(-1).float() pred, pred_img = net.forward(vol, pooling="gap") ##### Comment out for VNet2d or VNet2dAsDrawn ##### loss = critic(pred, lab) ################################################### # #### Uncomment for VNet2d or VNet2dAsDrawn ##### # losses = [] # for output_part in outputs: # losses.append(critic(output_part, lab)) # loss = sum(losses[:-1])*alpha + losses[-1] # alpha *= config["alpha_decay_rate"] ################################################ ####### Erasing discriminative features ######## if config["erase_discriminative_features"] and config[ "label_type"] == "binary": erased_input = torch.where(pred_img > config["tau"], vol, torch.zeros_like(pred_img)) erased_output, _ = net.forward(erased_input, pooling="gap") loss += critic(erased_output, lab) ################################################ loss.backward() optimizer.step() # onehot_lab = utils.one_hot(lab, nclasses=3) ## Monitoring in loop (once per batch) metrics = Metrics(torch.round(pred), lab) diceparts = metrics.get_dice_coefficient() infodict = { "epoch": epoch, # + i/epochlength, "loss": loss.item(), "train_FNR": metrics.get_FNR().detach().cpu().numpy(), "train_FPR": metrics.get_FPR().detach().cpu().numpy(), "train_RVD": metrics.get_RVD().detach().cpu().numpy(), "train_dice": diceparts[0].detach().cpu().numpy(), "train_dice_numerator": diceparts[1].detach().cpu().numpy(), "train_dice_denominator": diceparts[2].detach().cpu().numpy(), "train_iou": metrics.get_jaccard_index().detach().cpu().numpy(), "train_conmat": metrics.get_conmat().detach().cpu().numpy() } utils.update_cumu_dict(cuminfodict, infodict) ## Classification accuracy # if (config["label_type"] == 'binary') and wandblog: if wandblog: pred = (pred > config["tau"]).view(pred.size(0), -1).sum(-1) > 0 lab = lab.view(lab.size(0), -1).sum(-1) > 0 acc = (pred == lab).float().mean().detach().cpu().numpy() wandb.log({ "preds": pred.sum(), "labs": lab.sum(), "Accuracy": float(acc), "detailed_loss": [loss.item()] }) ## Monitoring after loop (once per epoch) ## Infologging for key in cuminfodict: cuminfodict[key] = np.mean(cuminfodict[key], axis=0) if wandblog: wandb.log(cuminfodict) return cuminfodict