Ejemplo n.º 1
0
def display_macro_page():
    """displays macro lexical information page"""

    metrics_list = Metrics.query.all()

    wl_avg_data = Metrics.get_wl_average_data(metrics_list)
    wl_range_data = Metrics.get_wl_range_data(metrics_list)

    ll_avg_data = Metrics.get_ll_average_data(metrics_list)
    ll_range_data = Metrics.get_ll_range_data(metrics_list)

    pl_lines_data = Metrics.get_pl_lines_data(metrics_list)
    pl_char_data = Metrics.get_pl_char_data(metrics_list)
    pl_word_data = Metrics.get_pl_words_data(metrics_list)

    sl_avg_data = Metrics.get_stanza_length_data(metrics_list)
    stanza_num_data = Metrics.get_stanza_num_data(metrics_list)
    sl_range_data = Metrics.get_stanza_range_data(metrics_list)

    return render_template("macro.html", wl_avg_data=wl_avg_data,
                           wl_range_data=wl_range_data, ll_avg_data=ll_avg_data,
                           ll_range_data=ll_range_data, pl_lines_data=pl_lines_data,
                           pl_char_data=pl_char_data, pl_word_data=pl_word_data,
                           sl_avg_data=sl_avg_data, sl_range_data=sl_range_data,
                           stanza_num_data=stanza_num_data)
Ejemplo n.º 2
0
def send_metric():
    from model import Metrics, Client, get_or_create

    if request.method == 'POST':
        data = request.json
        if data.get('client') and data.get('value'):
            client = get_or_create(db.session, Client, name=data['client'])
            Metrics.create_metric(db.session, client.id, data['value'])
            return 'OK', 200
    return 'FAIL', 500
Ejemplo n.º 3
0
def display_sentiment_page():
    """displays sentiment information page"""

    metrics_list = Metrics.query.all()
    pos_neg = Metrics.get_pos_neg_data(metrics_list)
    obj_abs = Metrics.get_obj_abs_data(metrics_list)
    common = Metrics.get_common_data(metrics_list)
    gender = Metrics.get_gender_data(metrics_list)
    active = Metrics.get_active_data(metrics_list)

    return render_template("sentiment.html", pos_neg=pos_neg, obj_abs=obj_abs,
                           common=common, gender=gender, active=active)
Ejemplo n.º 4
0
def display_micro_page():
    """displays micro lexical information page"""

    metrics_list = Metrics.query.all()

    rhyme_rep = Metrics.get_rhyme_rep_data(metrics_list)
    lex_div = Metrics.get_lex_data(metrics_list)
    filler = Metrics.get_filler_data(metrics_list)
    narrator = Metrics.get_narrator_data(metrics_list)
    alliteration = Metrics.get_alliteration_data(metrics_list)

    return render_template("micro.html", rhyme_rep=rhyme_rep, lex_div=lex_div,
                           filler=filler, narrator=narrator,
                           alliteration=alliteration)
Ejemplo n.º 5
0
def index():
    from model import Metrics

    local_metrics = {}
    total_metrics = {}
    metrics = Metrics.get_metrics(limit=100)
    if metrics:
        local_metrics = Metrics.get_metrics_info(db.session, limit=100)
        total_metrics = Metrics.get_metrics_info(db.session, limit=None)

    return render_template('index.html',
                           title='Система сбора информации',
                           metrics=metrics,
                           total_metrics=total_metrics,
                           local_metrics=local_metrics)
Ejemplo n.º 6
0
def test_one_epoch(net,
                   dataloader,
                   device,
                   epoch,
                   epochlength,
                   wandblog=True,
                   dst_path=None,
                   dst_format=None):
    """Go through testing data once, measure performance
    and send result to weights and biases

        Arguments
        ---------
            net : torch neural net
            dataloader : torch dataloader
            device : str
                Options: 'cpu', 'cuda'
            epoch : int
                Current epoch
            epochlength : int
                Total number of samples in one epoch including testing.
            [wandblog] : bool
                Default: True
                Send monitoring data to weights and biases.
            !![return_pred] : bool!! Not usable because of memory consumption
                Default: False
                If True, predictions are returned.
            [dst_path] : str path
                Default: None, i.e. Do not store preductions.
                Path to destination folder to store predictions at.
            [dst_format] : str
                Format of storage. Options are decided in utils.store().


        Returns
        -------
            infodict : dict
                Dict of info about process which is sent to weights and biases
                to monitor process there.
            
    """

    net.eval()
    cuminfodict = {
        "epoch": [],
        "test_fnr": [],
        "test_fpr": [],
        "test_voe": [],
        "test_rvd": [],
        "test_dice": [],
        "test_iou": [],
        "test_dice_numerator": [],
        "test_dice_denominator": [],
        "test_classification_accuracy": [],
        # "test_conmat": [],
    }

    for i, sample in enumerate(dataloader):
        vol = sample['vol'].to(device, non_blocking=True)
        lab_seg = sample['lab'].to(device, non_blocking=True)
        pred_soft = net(vol)
        pred = torch.round(pred_soft)
        # onehot_lab = utils.one_hot(lab_seg, nclasses=3)

        ## Log metrics
        metrics = Metrics(pred, lab_seg, mode='test')
        infodict = metrics.get_metric_dict()
        for key in infodict:
            infodict[key] = infodict[key].detach().cpu().numpy()
        infodict.update({"epoch": epoch})

        utils.update_cumu_dict(cuminfodict, infodict)

        ## Classification accuracy
        if (config["label_type"] == "binary") and wandblog:
            pred = pred.view(pred.size(0), -1).sum(-1) > 0
            lab = lab_seg.view(lab_seg.size(0), -1).sum(-1) > 0
            acc = (pred == lab).float().mean().detach().cpu().numpy()
            wandb.log({
                "Test Positive Predictions": pred.sum(),
                "Test Positive Labels": lab.sum(),
                "Test Accuracy": float(acc)
            })

        ## Store prediction
        if dst_path is not None:
            # to_store = torch.cat([vol, lab, pred])
            utils.store(pred_soft,
                        dst_path,
                        sample['store_idx'],
                        format=dst_format,
                        epoch=epoch,
                        focus=config["focus"])
            if wandblog:
                for i in range(pred_soft.size(0)):
                    wandb.log({
                        "Example Prediction": [
                            wandb.Image(
                                pred_soft[i, 0, ...].detach().cpu().numpy() *
                                255,
                                caption="Prediction " + str(config["runid"]) +
                                str(epoch) + str(sample["store_idx"][i]))
                        ]
                    })
                    wandb.log({
                        "Example Image": [
                            wandb.Image(vol[i, 0, ...].cpu().numpy() * 255,
                                        caption="Image " +
                                        str(config["runid"]) + str(epoch) +
                                        str(sample["store_idx"][i]))
                        ]
                    })
                    wandb.log({
                        "Example Label": [
                            wandb.Image(lab_seg[i, 0, ...].cpu().numpy() * 255,
                                        caption="Label " +
                                        str(config["runid"]) + str(epoch) +
                                        str(sample["store_idx"][i]))
                        ]
                    })
    ## Infologging
    for key in cuminfodict:
        cuminfodict[key] = np.mean(cuminfodict[key], axis=0)
    if wandblog:
        wandb.log(cuminfodict)

    return cuminfodict
Ejemplo n.º 7
0
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) #collect batch norm update operations
with tf.control_dependencies(update_ops):
  optimizer = tf.train.MomentumOptimizer(per_sample_learning_rate, momentum=0.9, use_nesterov=True)
  gradients = optimizer.compute_gradients(target_vars.opt_loss)
  adjusted_gradients = []
  for (grad,x) in gradients:
    adjusted_grad = grad
    if x.name in lr_adjusted_variables and grad is not None:
      adj_factor = lr_adjusted_variables[x.name]
      adjusted_grad = grad * adj_factor
      trainlog("Adjusting gradient for " + x.name + " by " + str(adj_factor))

    adjusted_gradients.append((adjusted_grad,x))
  train_step = optimizer.apply_gradients(adjusted_gradients)

metrics = Metrics(model,target_vars,include_debug_stats=True)

def reduce_norm(x, axis=None, keepdims=False):
  return tf.sqrt(tf.reduce_mean(tf.square(x), axis=axis, keepdims=keepdims))
relative_update_by_var = dict([
  (v.name,per_sample_learning_rate * reduce_norm(grad) / (1e-10 + reduce_norm(v))) for (grad,v) in adjusted_gradients if grad is not None
])

total_parameters = 0
for variable in tf.trainable_variables():
  shape = variable.get_shape()
  variable_parameters = 1
  for dim in shape:
    variable_parameters *= dim.value
  total_parameters += variable_parameters
  trainlog("Model variable %s, %d parameters" % (variable.name,variable_parameters))
Ejemplo n.º 8
0
def main():
    args = parse_args()
    # logging defination
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    model_name = time.strftime("%Y%m%d%H%M", time.localtime(time.time()))
    log_dir = os.path.join(
        os.getcwd(),
        args.logdir,
    )
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)
    log_file = os.path.join(log_dir, model_name + ".log")
    fh = logging.FileHandler(log_file, mode="w")
    fh.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"
    )
    fh.setFormatter(formatter)
    logger.addHandler(fh)
    # argument validation
    args.cuda = args.cuda and torch.cuda.is_available()
    device = torch.device("cuda:0" if args.cuda else "cpu")
    if args.sparse and args.wd != 0:
        logger.error('Sparsity and weight decay are incompatible, pick one!')
        exit()
    logger.debug(args)
    torch.manual_seed(args.seed)
    random.seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)
        torch.backends.cudnn.benchmark = True
    # download dataset
    download_sick(args.datadir)
    download_wordvecs(args.glove)
    # build vocabulary
    filenames = [
        os.path.join(args.datadir, "SICK_train.txt"),
        os.path.join(args.datadir, "SICK_trial.txt"),
        os.path.join(args.datadir, "SICK_test_annotated.txt")
    ]
    build_vocab(filenames, os.path.join(args.datadir, "vocab.txt"))
    # preparing vocabulary
    vocabulary = Vocab(filename=os.path.join(args.datadir, "vocab.txt"),
                       data=[
                           Constants.PAD_WORD, Constants.UNK_WORD,
                           Constants.BOS_WORD, Constants.EOS_WORD
                       ])
    logger.info('==> SICK vocabulary size : %d ' % vocabulary.size())
    # preparing dataset
    train_set = SICKDataset(vocabulary, args.seq_len, args.num_classes,
                            os.path.join(args.datadir, "SICK_train.txt"))
    logger.info('==> Size of train data   : %d ' % len(train_set))
    dev_set = SICKDataset(vocabulary, args.seq_len, args.num_classes,
                          os.path.join(args.datadir, "SICK_trial.txt"))
    logger.info('==> Size of dev data   : %d ' % len(dev_set))
    test_set = SICKDataset(
        vocabulary, args.seq_len, args.num_classes,
        os.path.join(args.datadir, "SICK_test_annotated.txt"))
    logger.info('==> Size of test data   : %d ' % len(test_set))

    # preparing model
    model = RNNSimilarity(vocab_size=vocabulary.size(),
                          embedding_dim=args.embedding_dim,
                          mem_dim=args.mem_dim,
                          hid_dim=args.hid_dim,
                          num_layers=args.num_layers,
                          rnn_type=args.rnn_type,
                          dropout=args.dropout,
                          bidirectional=args.bidirectional,
                          seq_len=args.seq_len,
                          num_classes=args.num_classes,
                          sparsity=args.sparse,
                          freeze=args.freeze_embed,
                          name=model_name)
    criterion = nn.KLDivLoss()
    # preparing embeddings

    # for words common to dataset vocab and GLOVE, use GLOVE vectors
    # for other words in dataset vocab, use random normal vectors
    emb_file = os.path.join(args.datadir, 'sick_embed.pth')
    if os.path.isfile(emb_file):
        emb = torch.load(emb_file)
    else:
        # load glove embeddings and vocab
        glove_vocab, glove_emb = load_word_vectors(
            os.path.join(args.glove, 'glove.840B.300d'))
        logger.info('==> GLOVE vocabulary size: %d ' % glove_vocab.size())
        emb = torch.zeros(vocabulary.size(),
                          glove_emb.size(1),
                          dtype=torch.float,
                          device=device)
        emb.normal_(0, 0.05)
        # zero out the embeddings for padding and other special words if they are absent in vocab
        for idx, item in enumerate([
                Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD,
                Constants.EOS_WORD
        ]):
            emb[idx].zero_()
        for word in vocabulary.labelToIdx.keys():
            if glove_vocab.getIndex(word):
                emb[vocabulary.getIndex(word)] = glove_emb[
                    glove_vocab.getIndex(word)]
        torch.save(emb, emb_file)
    # plug these into embedding matrix inside model
    model.word_emb.weight.data.copy_(emb)
    # preparing optimizer
    model.to(device), criterion.to(device)
    if args.optim == 'adam':
        optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                      model.parameters()),
                               lr=args.lr,
                               weight_decay=args.wd)
    elif args.optim == 'adagrad':
        optimizer = optim.Adagrad(filter(lambda p: p.requires_grad,
                                         model.parameters()),
                                  lr=args.lr,
                                  weight_decay=args.wd)
    elif args.optim == 'sgd':
        optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                     model.parameters()),
                              lr=args.lr,
                              weight_decay=args.wd)
    else:
        raise TypeError("Unknown optimizer type %s" % str(args.optim))
    metrics = Metrics(args.num_classes)
    # create trainer object for training and testing
    trainer = Trainer(args, model, criterion, optimizer, device)
    best = -float('inf')
    peason_list = []
    mse_list = []
    loss_list = []
    for epoch in range(args.epochs):
        train_loss = trainer.train(train_set)
        train_loss, train_pred, train_target = trainer.test(train_set)
        dev_loss, dev_pred, dev_target = trainer.test(dev_set)
        test_loss, test_pred, test_target = trainer.test(test_set)
        train_pearson = metrics.pearson(train_pred, train_target)
        train_mse = metrics.mse(train_pred, train_target)
        logger.info(
            '==> Epoch {}, Train \tLoss: {}\tPearson: {}\tMSE: {}'.format(
                epoch + 1, train_loss, train_pearson, train_mse))
        dev_pearson = metrics.pearson(dev_pred, dev_target)
        dev_mse = metrics.mse(dev_pred, dev_target)
        logger.info(
            '==> Epoch {}, Dev \tLoss: {}\tPearson: {}\tMSE: {}'.format(
                epoch + 1, dev_loss, dev_pearson, dev_mse))
        test_pearson = metrics.pearson(test_pred, test_target)
        test_mse = metrics.mse(test_pred, test_target)
        logger.info(
            '==> Epoch {}, Test \tLoss: {}\tPearson: {}\tMSE: {}'.format(
                epoch + 1, test_loss, test_pearson, test_mse))

        # drawing data
        peason_list.append((train_pearson, dev_pearson, test_pearson))
        mse_list.append((train_mse, dev_mse, test_mse))
        loss_list.append((train_loss, dev_loss, test_loss))
        if best < test_pearson:
            best = test_pearson
            checkpoint = {
                'model': trainer.model.state_dict(),
                'optim': trainer.optimizer,
                'pearson': test_pearson,
                'mse': test_mse,
                'args': args,
                'epoch': epoch
            }
            logger.info(
                '==> New optimum found, checkpointing everything now...')
            torch.save(checkpoint,
                       '%s.pt' % os.path.join(args.logdir, model.name))
    # draw the picture
    draw(peason_list, "Peason value", "peason", model_name, args.logdir)
    draw(mse_list, "MSE value", "mse", model_name, args.logdir)
    draw(loss_list, "Loss value", "loss", model_name, args.logdir)
Ejemplo n.º 9
0
def train_one_epoch(net,
                    optimizer,
                    critic,
                    dataloader,
                    device,
                    epoch,
                    epochlength,
                    wandblog=True):
    """Go through training data once and adjust weighs of net.

        Arguments
        ---------
            net : torch neural net
            optimizer : torch optimizer
            critic : torch loss object
            dataloader : torch dataloader
            device : str
                Options: 'cpu', 'cuda'
            epoch : int
                Current epoch
            epochlength : int
                Total number of samples in one epoch including testing.
            [wandblog] : bool
                Default: True
                Update monitoring values to weights and biases.
            !![return_pred] : bool!! Not usable because of memory consumption
                Default: False
                If True, prediction is returned

        Returns
        -------
            infodict : dict
                Dict of info about process which is sent to weights and biases
                to monitor process there.
            [pred] : List[torch.Tensor]
                Returned only if return_pred is set to True.            
    """
    net.train()
    cuminfodict = {
        "epoch": [],
        "loss": [],
        "train_FNR": [],
        "train_FPR": [],
        "train_RVD": [],
        "train_dice": [],
        "train_dice_numerator": [],
        "train_dice_denominator": [],
        "train_iou": [],
        "train_conmat": []
    }
    alpha = config["alpha"]
    for i, sample in enumerate(dataloader):
        optimizer.zero_grad()
        vol = sample['vol'].to(device, non_blocking=True)
        lab = sample['lab'].to(device, non_blocking=True)
        ## Convert lab to class labels
        lab = (lab == 1).view(lab.size(0), lab.size(1), -1).any(-1).float()

        pred, pred_img = net.forward(vol, pooling="gap")
        ##### Comment out for VNet2d or VNet2dAsDrawn #####
        loss = critic(pred, lab)
        ###################################################

        # #### Uncomment for VNet2d or VNet2dAsDrawn #####
        # losses = []
        # for output_part in outputs:
        #     losses.append(critic(output_part, lab))

        # loss = sum(losses[:-1])*alpha + losses[-1]
        # alpha *= config["alpha_decay_rate"]
        ################################################

        ####### Erasing discriminative features ########
        if config["erase_discriminative_features"] and config[
                "label_type"] == "binary":
            erased_input = torch.where(pred_img > config["tau"], vol,
                                       torch.zeros_like(pred_img))
            erased_output, _ = net.forward(erased_input, pooling="gap")
            loss += critic(erased_output, lab)
        ################################################

        loss.backward()
        optimizer.step()
        # onehot_lab = utils.one_hot(lab, nclasses=3)

        ## Monitoring in loop (once per batch)
        metrics = Metrics(torch.round(pred), lab)
        diceparts = metrics.get_dice_coefficient()
        infodict = {
            "epoch": epoch,  # + i/epochlength,
            "loss": loss.item(),
            "train_FNR": metrics.get_FNR().detach().cpu().numpy(),
            "train_FPR": metrics.get_FPR().detach().cpu().numpy(),
            "train_RVD": metrics.get_RVD().detach().cpu().numpy(),
            "train_dice": diceparts[0].detach().cpu().numpy(),
            "train_dice_numerator": diceparts[1].detach().cpu().numpy(),
            "train_dice_denominator": diceparts[2].detach().cpu().numpy(),
            "train_iou": metrics.get_jaccard_index().detach().cpu().numpy(),
            "train_conmat": metrics.get_conmat().detach().cpu().numpy()
        }
        utils.update_cumu_dict(cuminfodict, infodict)

        ## Classification accuracy
        # if (config["label_type"] == 'binary') and wandblog:

        if wandblog:
            pred = (pred > config["tau"]).view(pred.size(0), -1).sum(-1) > 0
            lab = lab.view(lab.size(0), -1).sum(-1) > 0
            acc = (pred == lab).float().mean().detach().cpu().numpy()
            wandb.log({
                "preds": pred.sum(),
                "labs": lab.sum(),
                "Accuracy": float(acc),
                "detailed_loss": [loss.item()]
            })
    ## Monitoring after loop (once per epoch)
    ## Infologging
    for key in cuminfodict:
        cuminfodict[key] = np.mean(cuminfodict[key], axis=0)
    if wandblog:
        wandb.log(cuminfodict)

    return cuminfodict