Esempio n. 1
0
def train(symbol, train_iter, valid_iter, data_names, label_names):
    devs = mx.cpu() if args.gpus is None or args.gpus is '' else [mx.gpu(int(i)) for i in args.gpus.split(',')]
    module = mx.mod.Module(symbol, data_names=data_names, label_names=label_names, context=devs)
    module.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label)
    module.init_params(mx.initializer.Uniform(0.1))
    module.init_optimizer(optimizer=args.optimizer, optimizer_params={'learning_rate': args.lr})

    for epoch in range(1, args.num_epochs+1):
        train_iter.reset()
        val_iter.reset()
        for batch in train_iter:
            module.forward(batch, is_train=True)  # compute predictions
            module.backward()  # compute gradients
            module.update() # update parameters

        train_pred = module.predict(train_iter).asnumpy()
        train_label = train_iter.label[0][1].asnumpy()
        print('\nMetrics: Epoch %d, Training %s' % (epoch, metrics.evaluate(train_pred, train_label)))

        val_pred = module.predict(val_iter).asnumpy()
        val_label = val_iter.label[0][1].asnumpy()
        print('Metrics: Epoch %d, Validation %s' % (epoch, metrics.evaluate(val_pred, val_label)))

        if epoch % args.save_period == 0 and epoch > 1:
            module.save_checkpoint(prefix=os.path.join("../models/", args.model_prefix), epoch=epoch, save_optimizer_states=False)
        if epoch == args.num_epochs:
            module.save_checkpoint(prefix=os.path.join("../models/", args.model_prefix), epoch=epoch, save_optimizer_states=False)
Esempio n. 2
0
def evaluateAll():
    testds = open("../dataset/sick/test.txt")
    levendistances = []
    scores = []
    for l in testds.readlines():
        splits = l.split("\t")
        sen1 = splits[1]
        sen2 = splits[2]
        score = float("%.1f" % float(splits[3]))
        scores.append(score)
        levendistances.append(-levendistance.leven(sen1, sen2))
    calibrated = metrics.calibration(levendistances)
    metrics.evaluate(calibrated, scores)

    partitionScores = [[], [], [], []]
    calibratedScores = [[], [], [], []]
    for i in range(len(scores)):
        if scores[i] == 5.0:
            partitionScores[3].append(scores[i])
            calibratedScores[3].append(calibrated[i])
        else:
            position = int(scores[i]) - 1
            partitionScores[position].append(scores[i])
            calibratedScores[position].append(calibrated[i])
    print partitionScores[1][1:4]
    print calibratedScores[1][1:4]
    for i in range(4):
        metrics.evaluate(partitionScores[i], calibratedScores[i])
def runAllModels(X_train,X_test,y_test,y_train):
    y_pred= xgb(X_train,y_train,X_test)
    plotter(y_test,y_pred)
    cm,fscore,a=evaluate(y_test,y_pred)
    y_pred = rf(X_train,y_train,X_test)
    plotter(y_test,y_pred)
    cm,fscore,a=evaluate(y_test,y_pred)
    y_pred = nn(X_train,y_train,X_test)
    plotter(y_test,y_pred)
    cm,fscore,a=evaluate(y_test,y_pred)
    y_pred = svm(X_train,y_train,X_test)
    plotter(y_test,y_pred)
    cm,fscore,a=evaluate(y_test,y_pred)
Esempio n. 4
0
def predict(numt):
    lda = models.LdaModel.load("../dataset/sick/model.lda")
    #lda = models.LdaModel.load("../dataset/sick/modeltfidf.lda")
    dictionary = corpora.Dictionary.load("../dataset/sick/sick.dict")
    testds = open("../dataset/sick/test.txt")

    def splitSent(sent):
        words = re.split(",| ", sent)
        wordlist = []
        for word in words:
            if word == "":
                continue
            else:
                wordlist.append(word)
        return wordlist

    simscores = []
    scores = []
    for l in testds.readlines():
        items = l.split("\t")
        sent1 = items[1]
        txt1 = dictionary.doc2bow(splitSent(sent1))
        sent2 = items[2]
        txt2 = dictionary.doc2bow(splitSent(sent2))
        corpus = [txt1, txt2]
        index = similarities.MatrixSimilarity(lda[corpus], num_features=numt)
        sim = index[lda[txt2]]
        simscores.append(sim[0])

        score = float("%.1f" % float(items[3]))
        scores.append(score)
    calibrated = metrics.calibration(simscores)
    #print calibrated
    #print scores
    metrics.evaluate(calibrated, scores)

    partitionScores = [[], [], [], []]
    calibratedScores = [[], [], [], []]
    for i in range(len(scores)):
        if scores[i] == 5.0:
            partitionScores[3].append(scores[i])
            calibratedScores[3].append(calibrated[i])
        else:
            position = int(scores[i]) - 1
            partitionScores[position].append(scores[i])
            calibratedScores[position].append(calibrated[i])
    print partitionScores[1][1:4]
    print calibratedScores[1][1:4]
    for i in range(4):
        metrics.evaluate(partitionScores[i], calibratedScores[i])
Esempio n. 5
0
def evaluate(sess,testmodel):
    (databatch1, masks1, databatch2, masks2, labelsbatch) = batcher.next_test_batch()
    feed_dict = {
        testmodel.input_data_s1: databatch1,
        testmodel.input_data_s2: databatch2,
        testmodel.mask_s1: masks1,
        testmodel.mask_s2: masks2,
        testmodel.target: labelsbatch
    }
    result=sess.run(testmodel.prediction, feed_dict=feed_dict)
    newScores = []
    for item in result:
        newScores.append(item[0])
    print newScores
    scores=batcher.test_score()
    metrics.evaluate(newScores, scores)
Esempio n. 6
0
def evaluate_all(prediction_path, annotation_path, yaml_path, mode='coarse'):

    metrics = {'coarse': {}}

    df_dict = evaluate(prediction_path, annotation_path, yaml_path, mode)

    micro_auprc, eval_df = micro_averaged_auprc(df_dict, return_df=True)
    macro_auprc, class_auprc = macro_averaged_auprc(df_dict,
                                                    return_classwise=True)

    # Get index of first threshold that is at least 0.5
    thresh_0pt5_idx = (eval_df['threshold'] >= 0.5).nonzero()[0][0]

    metrics[mode]["micro_auprc"] = micro_auprc
    metrics[mode]["micro_f1"] = eval_df["F"][thresh_0pt5_idx]
    metrics[mode]["macro_auprc"] = macro_auprc

    print("{} level evaluation:".format(mode.capitalize()))
    print("======================")
    print(" * Micro AUPRC:           {}".format(metrics[mode]["micro_auprc"]))
    print(" * Micro F1-score (@0.5): {}".format(metrics[mode]["micro_f1"]))
    print(" * Macro AUPRC:           {}".format(metrics[mode]["macro_auprc"]))
    print(" * Coarse Tag AUPRC:")

    metrics[mode]["class_auprc"] = {}
    for coarse_id, auprc in class_auprc.items():
        coarse_name = taxonomy['coarse'][int(coarse_id)]
        metrics[mode]["class_auprc"][coarse_name] = auprc
        print("      - {}: {}".format(coarse_name, auprc))
Esempio n. 7
0
def predict(predict_conf):
    # load data
    _, data = load_pkl_data(predict_conf.path_data)

    # load model meta data
    meta = load_pkl_data(predict_conf.path_meta)
    meta_image_shape = meta['ModelConf'].img_shape
    meta_re_sample_type = meta['ModelConf'].img_re_sample
    meta_text_len = meta['ModelConf'].text_length
    meta_label_num = len(meta['label2id'])
    meta_id2label = {v: k for k, v in meta['label2id'].items()}

    # load model
    model = keras.models.load_model(predict_conf.path_model, custom_objects={
        "CoAttentionParallel": CoAttentionParallel
    })

    # prepare data
    _, _, data_test = prepare_data(data, meta_image_shape, meta_re_sample_type,
                                   meta_text_len, meta_label_num, 0, 0)

    # predict with trained model
    x_test, y_test = data_test
    y_predict = model.predict(x_test)
    y_true = y_test.tolist()

    # save predictions
    save_pkl_data(predict_conf.path_predictions, [y_predict, y_test])

    # print metric results
    scores = evaluate(y_true, y_predict, predict_conf.threshold)
    label_names = [meta_id2label[i] for i in range(len(meta_id2label))]
    display_scores(scores, label_names)
def eval(model, args, epoch, train_loss):
    eval_examples = processor.get_dev_examples(args.data_dir)
    eval_features = convert_examples_to_features(
        eval_examples, label_list, args.max_seq_length, tokenizer)
    logger.info("***** Running evaluation *****")
    logger.info("  Num examples = %d", len(eval_examples))
    logger.info("  Batch size = %d", args.eval_batch_size)
    all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
    all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long)
    eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
    # Run prediction for full data
    eval_sampler = SequentialSampler(eval_data)
    eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size)

    model.eval()
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0
    tp, ap, bp = 0., 0., 0.
    logits_total, labels_total = [], []
    for input_ids, input_mask, segment_ids, label_ids in eval_dataloader:
        input_ids = input_ids.to(device)
        input_mask = input_mask.to(device)
        segment_ids = segment_ids.to(device)
        label_ids = label_ids.to(device)

        with torch.no_grad():
            tmp_eval_loss = model(input_ids, segment_ids, input_mask, label_ids)
            logits = model(input_ids, segment_ids, input_mask)

        logits = logits.detach().cpu().numpy()
        label_ids = label_ids.to('cpu').numpy()
        logits_total.append(logits)
        labels_total.extend(label_ids.tolist())

    logits_total = np.vstack(logits_total)
    logits_total = logits_total[:, 1].tolist()
    probs_pred_file = os.path.join(args.output_dir, 'probs_pred.txt')
    with open(probs_pred_file, 'w') as f:
        for logit, label in zip(logits_total, labels_total):
            f.write(str(logit) + '\t' + str(label) + '\n')
    import metrics
    map, mrr, p1, r1, r2, r5 = metrics.evaluate(probs_pred_file, 10)

    result = {'map': map,
              'mrr': mrr,
              'p1': p1,
              'r1': r1,
              'r2': r2,
              'r5': r5,
              }

    output_eval_file = os.path.join(args.output_dir, "test_results.txt")
    with open(output_eval_file, "a") as writer:
        logger.info("***** Eval results *****")
        for key in sorted(result.keys()):
            logger.info("  %s = %s", key, str(result[key]))
            writer.write("%s = %s\n" % (key, str(result[key])))
        writer.write('-' * 30 + '\n')
Esempio n. 9
0
def get_info(file_path):
    tmp = file_path.split("/")
    filename = tmp[-1]
    sub_data = tmp[-2]
    data = tmp[-3]
    loss_type = tmp[-4]
    algo = tmp[-5]
    tuning_algo = tmp[-6]
    df = pd.read_csv(file_path)

    actual = df.iloc[:, 0].values

    predicts = df.iloc[:, 1:].values
    predict_mean = np.mean(predicts, axis=1)
    predict_std = np.std(predicts, axis=1)
    mean_std = np.mean(predict_std)
    result = {
        "filename": filename,
        "data": data,
        "sub_data": sub_data,
        "algo": algo,
        "loss_type": loss_type,
        "tuning_algo": tuning_algo,
        "mean_std": mean_std
    }
    res_eval = evaluate(actual,
                        predict_mean,
                        metrics=('mae', 'rmse', 'mape', 'smape', "std_ae",
                                 'std_ape', "jsd"))
    result.update(res_eval)
    return result
Esempio n. 10
0
def main():
    path = 'D:\\data\\M3\\M3Other\\N2836.csv'
    data = np.genfromtxt(path)
    print('Data len: {0}'.format(len(data)))
    predict_points = 8

    model = Model()

    ts = tsutils.TimeSeries(data, test_size=predict_points, scaler=processing.StandardScaler())

    x_train, y_train, t_train = ts.train_data(input_window=model.input_window, output_window=model.output_window, expand=True)
    model.train(x_train, y_train, epochs=200)

    #x_test, y_test, t_test = ts.train_data(input_window=model.input_window, output_window=model.output_window)

    ctx = np.expand_dims(ts.get_test_context(model.input_window, expand=True), axis=0)
    y_pred = tsutils.free_run_batch(model.predict, ctx, predict_points, ts, batch_size=1)
    y_true = ts.get_test_data()

    y_pred_flat = ts.inverse_y(np.squeeze(y_pred))
    y_true_flat = ts.inverse_y(np.squeeze(y_true))

    print(metrics.evaluate(y_true_flat, y_pred_flat, metrics=('smape', 'mae', 'umbrae')))

    '''
Esempio n. 11
0
def train(symbol, train_iter, valid_iter, data_names, label_names):
    devs = mx.cpu() if args.gpus is None or args.gpus is '' else [
        mx.gpu(int(i)) for i in args.gpus.split(',')
    ]
    module = mx.mod.Module(symbol,
                           data_names=data_names,
                           label_names=label_names,
                           context=devs)
    module.bind(data_shapes=train_iter.provide_data,
                label_shapes=train_iter.provide_label)
    module.init_params(mx.initializer.Uniform(0.1))
    module.init_optimizer(optimizer=args.optimizer,
                          optimizer_params={'learning_rate': args.lr})

    for epoch in range(1, args.num_epochs + 1):
        train_iter.reset()
        val_iter.reset()
        for batch in train_iter:
            module.forward(batch, is_train=True)  # compute predictions
            module.backward()  # compute gradients
            module.update()  # update parameters

        train_pred = module.predict(train_iter).asnumpy()
        train_label = train_iter.label[0][1].asnumpy()
        print('\nMetrics: Epoch %d, Training %s' %
              (epoch, metrics.evaluate(train_pred, train_label)))

        val_pred = module.predict(val_iter).asnumpy()
        val_label = val_iter.label[0][1].asnumpy()
        print('Metrics: Epoch %d, Validation %s' %
              (epoch, metrics.evaluate(val_pred, val_label)))

        if epoch % args.save_period == 0 and epoch > 1:
            module.save_checkpoint(prefix=os.path.join("../models/",
                                                       args.model_prefix),
                                   epoch=epoch,
                                   save_optimizer_states=False)
        if epoch == args.num_epochs:
            module.save_checkpoint(prefix=os.path.join("../models/",
                                                       args.model_prefix),
                                   epoch=epoch,
                                   save_optimizer_states=False)
Esempio n. 12
0
def main():
    args = get_args()
    truth = pd.read_csv(args.truth)
    predicts = pd.read_csv(args.predicts)
    truth['spans'] = truth.spans.apply(literal_eval)
    predicts['spans'] = predicts.spans.apply(literal_eval)

    predictions = predicts['spans'].tolist()
    gold = truth['spans'].tolist()
    f1, p, r = evaluate(gold, predictions)
    print("F1-Score :", f1)
    print("Precision:", p)
    print("Recall   :", r)
Esempio n. 13
0
    def _eval(outputs_cat):
        if 'lcnt' in outputs_cat:
            lcnts = _norm_cnt_lst(outputs_cat['lcnt'])
            gt = _norm_cnt_lst(outputs_cat['label-count'])
            cmb = zip(lcnts, gt)
            random.shuffle(cmb)
            print('lcnt', *cmb[:5])

        metric, score = metrics.evaluate(outputs_cat)
        metric['num_samples'] = outputs_cat.values()[0].shape[0]
        metric['timestamp'] = time.time()

        return metric, score
Esempio n. 14
0
 def predict(self,validationX,validationY,error_buffer=5):
     rmse,error_by_index = evaluate(validationX,validationY,self.model)
     
     if error_buffer > error_by_index.shape[0]:
         error_buffer = error_by_index.shape[0]
     
     self.smallest_error = error_by_index[0:error_buffer,1].astype(numpy.int32)
     self.greatest_error = error_by_index[(len(error_by_index)-error_buffer):len(error_by_index),1].astype(numpy.int32)
     self.error_buffer = error_buffer
     #print("self.smallest_error",self.smallest_error)
     #print("self.smallest_error.shape",self.smallest_error.shape)
     
     return rmse
Esempio n. 15
0
def run_tuning(model, config_init, config_train, dataset: DataSets):
    x_train, x_test, y_train, y_test = dataset.get_data()
    # y_train = y_train.reshape((-1, y_train.shape[-1]))

    model = getattr(model_zoo, model)(**config_init)
    model.fit(x_train, y_train, **config_train)

    pred = model.predict(x_test)
    # pred = np.reshape(pred, (-1, y_test.shape[-1]))
    #
    # y_test = np.reshape(y_test, (-1, y_test.shape[-1]))

    # plt.figure()
    # start = 0
    # end = 2000
    # step = 1
    # if step == 1:
    #     plt.plot(pred[start:end, 0])
    # for i in range(start, end, step):
    #     plt.plot(range(i, i+step), pred[i])
    # plt.plot(range(start, end), y_test[start:end, 0, 0], label='actual')
    # plt.legend()
    # plt.show()
    # return 1

    pred_invert = dataset.invert_transform(pred)
    y_test_invert = dataset.invert_transform(y_test)

    output = np.concatenate([pred, y_test, pred_invert, y_test_invert], axis=1)
    df = pd.DataFrame(
        output,
        columns=['predict', 'actual', 'predict_invert', 'actual_invert'])

    filename = "/home/tienthien/Desktop/Mine/gan_timeseries/logs/tuning/gru_gan/"
    for k, v in config_init.items():
        if k == 'model_dir':
            continue
        if not isinstance(v, dict):
            filename += "{}_".format(v)
        else:
            for k1, v1 in v.items():
                filename += "{}_".format(v1)
    filename += ".csv"
    df.to_csv(filename, index=False)

    result_metrics = evaluate(y_test_invert,
                              pred_invert,
                              metrics=('mae', 'rmse'))

    return result_metrics['mae']
Esempio n. 16
0
def main():
    submit_dir = sys.argv[1]
    gt_dir = sys.argv[2]

    time_start = time.time()

    filelist = [
        filename for filename in sorted(os.listdir(gt_dir))
        if filename.endswith('txt')
    ]

    # check complete and valid submission
    for filename in filelist:
        if not os.path.exists(os.path.join(submit_dir, filename)):
            sys.exit('Could not find submission file {0}'.format(filename))
        preds = np.loadtxt(os.path.join(submit_dir, filename), dtype=np.int)

        # x in [0, 1024), y in [0, 512)
        if not ((np.alltrue(
                np.logical_and(preds[:, 0] >= 0, preds[:, 0] < WIDTH))) and
                (np.alltrue(
                    np.logical_and(preds[:, 1] >= 0, preds[:, 1] < HEIGHT)))):
            sys.exit('Invalid submission file {0}'.format(filename))

        # a pair of ceiling and floor junctions should share the same x coordinate
        if not np.alltrue(preds[::2, 0] == preds[1::2, 0]):
            sys.exit('Invalid submission file {0}'.format(filename))

        # x coordinates should be a monotonically non-decreasing sequence
        if not np.alltrue(preds[::2, 0][1:] - preds[::2, 0][:-1] >= 0):
            sys.exit('Invalid submission file {0}'.format(filename))

    # compute final results
    results = np.zeros((len(filelist), 3))
    for index, filename in enumerate(sorted(filelist)):
        preds = np.loadtxt(os.path.join(submit_dir, filename), dtype=np.int)
        gts = np.loadtxt(os.path.join(gt_dir, filename), dtype=np.int)
        Fs = evaluate(gts, preds, THRES)
        results[index] = np.array(
            (np.mean(Fs['junction']), np.mean(Fs['wireframe']),
             np.mean(Fs['plane'])))

    total_time = time.time() - time_start

    print(f"F (Mean):\t{np.mean(results):.4f}\n"
          f"F (Junction):\t{np.mean(results[:, 0]):.4f}\n"
          f"F (Wireframe):\t{np.mean(results[:, 1]):.4f}\n"
          f"F (Plane):\t{np.mean(results[:, 2]):.4f}\n"
          f"\nTotal time:\t{total_time:.4f} (s)")
Esempio n. 17
0
def train(split, x, y, x_index, embeddings, log_dir):
    f1_scores = []
    for i, (train_index, test_index) in enumerate(split):
        fold_dir = "%s/fold_%d" % (log_dir, i + 1)
        os.makedirs(fold_dir, exist_ok=True)
        print("training fold %d" % (i + 1))
        weights_path = "%s/weights.best.h5" % fold_dir

        np.save("%s/train_index.npy" % fold_dir, train_index)
        np.save("%s/test_index.npy" % fold_dir, test_index)

        callbacks = [
            TensorBoard(fold_dir),
            F1score(),
            ModelCheckpoint(weights_path,
                            monitor='f1',
                            verbose=1,
                            save_best_only=True,
                            save_weights_only=True,
                            mode='max'),
            EarlyStopping(patience=5, monitor='f1', mode='max')
        ]

        x_train = [d[x_index[train_index]] for d in x]
        y_train = y[train_index]
        x_test = [d[x_index[test_index]] for d in x]
        y_test = y[test_index]
        model = build_model(embeddings)
        model.fit(x_train,
                  y_train,
                  batch_size=BATCH_SIZE,
                  epochs=NB_EPOCHS,
                  verbose=2,
                  callbacks=callbacks,
                  validation_data=[x_test, y_test])

        print("testing fold %d" % (i + 1))
        model.load_weights(weights_path)
        scores = model.predict(x_test, verbose=False)
        predictions = scores.argmax(-1)
        f1 = evaluate(y_test, predictions, "%s/result.json" % fold_dir)
        print("f1_score: %.2f" % f1)
        f1_scores.append(f1)
    f1_avg = np.average(f1_scores)
    max_f1 = max(f1_scores)
    best_fold = int(np.argmax(f1_scores)) + 1
    best_weights = "%s/fold_%d/weights.best.h5" % (log_dir, best_fold)
    result = make_dict(f1_avg, max_f1, best_fold, best_weights)
    print(result)
def test(model, dataset, computing_device):
    with torch.no_grad():
        temp_loss = 0
        temp_acc = 0
        temp_precision = 0
        temp_recall = 0
        temp_BCR = 0
        temp_acc_list = np.zeros(14)
        temp_precision_list = np.zeros(14)
        temp_recall_list = np.zeros(14)
        temp_BCR_list = np.zeros(14)
        for minibatch_count, (images, labels) in enumerate(dataset, 0):
            images, labels = images.to(computing_device), labels.to(
                computing_device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            (acc,pre,rec,BCR),(acc_list,pre_list,rec_list,BCR_list) = evaluate(outputs.cpu().data.numpy(),\
                                                                               label = labels.cpu().data.numpy())

            temp_acc_list += acc_list
            temp_precision_list += pre_list
            temp_recall_list += rec_list
            temp_BCR_list += BCR_list

            temp_loss += loss
            temp_acc += acc
            temp_precision += pre
            temp_recall += rec
            temp_BCR += BCR

        temp_acc_list /= (minibatch_count + 1)
        temp_precision_list /= (minibatch_count + 1)
        temp_recall_list /= (minibatch_count + 1)
        temp_BCR_list /= (minibatch_count + 1)

        temp_loss = temp_loss / (minibatch_count + 1)
        temp_acc = temp_acc / (minibatch_count + 1)
        temp_precision = temp_precision / (minibatch_count + 1)
        temp_recall = temp_recall / (minibatch_count + 1)
        temp_BCR = temp_BCR / (minibatch_count + 1)
        print(temp_BCR_list)
        print(
            "loss after %d minibatch is %.3f,acc is %.3f,precision is %.3f,recall is %.3f,BCR is %.3f"
            % (minibatch_count, temp_loss, temp_acc, temp_precision,
               temp_recall, temp_BCR))
        return (temp_loss, (temp_acc, temp_precision, temp_recall, temp_BCR),
                (temp_acc_list, temp_precision_list, temp_recall_list,
                 temp_BCR_list))
Esempio n. 19
0
def validate(valid_loader, model):
    model.eval()
    recalls = []
    mrrs = []
    with torch.no_grad():
        for seq, target, lens in tqdm(valid_loader):
            seq = seq.to(device)
            target = target.to(device)
            outputs = model(seq, lens)
            logits = F.softmax(outputs, dim=1)
            recall, mrr = evaluate(logits, target, k=topk)
            recalls.append(recall)
            mrrs.append(mrr)

    mean_recall = np.mean(recalls)
    mean_mrr = np.mean(mrrs)
    return mean_recall, mean_mrr
Esempio n. 20
0
def run_test(model, config_init, config_train, dataset: DataSets):
    x_train, x_test, y_train, y_test = dataset.get_data()
    # y_train = y_train.reshape((-1, y_train.shape[-1]))

    name = model
    model = getattr(model_zoo, model)(**config_init)
    model.fit(x_train, y_train, **config_train)

    plt.figure()
    preds = []
    pred_raw = []
    for i in range(10):
        pred = model.predict(x_test)
        # print(pred.shape)
        pred = np.reshape(pred, (-1, y_test.shape[-1]))
        pred_raw.append(pred)
        pred = dataset.invert_transform(pred)
        preds.append(pred)
    model.close_session()

    y_test = np.reshape(y_test, (-1, y_test.shape[-1]))
    plt.plot(y_test, label='actual')
    y_test = dataset.invert_transform(y_test)

    pred_raw_concat = np.concatenate(pred_raw, axis=1)
    pred_raw = pred_raw_concat.mean(axis=1)
    pred_raw = np.expand_dims(pred_raw, axis=1)
    plt.plot(pred_raw, label='predict')

    plt.legend()

    pred_concat = np.concatenate(preds, axis=1)
    pred = pred_concat.mean(axis=1)
    pred = np.expand_dims(pred, axis=1)
    print(pred.shape)

    result_eval = evaluate(y_test,
                           pred,
                           metrics=['mae', 'rmse', 'mape', 'smape', 'jsd'])

    print("Result test:", result_eval)

    plot(y_test, preds, pred, title=name)
    plot_distribution(y_test, preds, pred)
    plt.show()
Esempio n. 21
0
def forecasting(model_name, resultsDict, predictionsDict, df, df_training,
                df_testcase):
    #index = len(df_training)
    yhat = list()

    for t in tqdm(range(len(df_testcase.Ambient_Temp))):
        temp_train = df[:len(df_training) + t]

        if model_name == "SES":
            model = SimpleExpSmoothing(temp_train.Ambient_Temp)
        elif model_name == "HWES":
            model = ExponentialSmoothing(temp_train.Ambient_Temp)
        elif model_name == "AR":
            model = AR(temp_train.Ambient_Temp)
        # elif model_name == "MA":
        #     model = ARMA(temp_train.Ambient_Temp, order=(0, 1))
        # elif model_name == "ARMA":
        #     model = ARMA(temp_train.Ambient_Temp, order=(1, 1))
        elif model_name == "ARIMA":
            model = ARIMA(temp_train.Ambient_Temp, order=(1, 0, 0))
        elif model_name == "SARIMAX":
            model = SARIMAX(temp_train.Ambient_Temp,
                            order=(1, 0, 0),
                            seasonal_order=(0, 0, 0, 3))

        model_fit = model.fit()

        if model_name == "SES" or "HWES":
            predictions = model_fit.predict(start=len(temp_train),
                                            end=len(temp_train))
        elif model_name == "AR" or "ARIMA" or "SARIMAX":
            predictions = model_fit.predict(start=len(temp_train),
                                            end=len(temp_train),
                                            dynamic=False)
        yhat = yhat + [predictions]

    yhat = pd.concat(yhat)
    resultsDict[model_name] = metrics.evaluate(df_testcase.Ambient_Temp,
                                               yhat.values)
    predictionsDict[model_name] = yhat.values
    plt.plot(df_testcase.Ambient_Temp.values, label='Original')
    plt.plot(yhat.values, color='red', label=model_name + ' predicted')
    plt.legend()
    plt.show()
Esempio n. 22
0
def get_stats(data, ratio=100):
    count_1, count_2, total_a2 = 0, 0, 0
    exact_match, f1_score = 0, 0
    for a1, a2 in get_ann(data):
        total_a2 += len(a2)
        if not a2:
            count_1 += 1
        else:
            count_2 += 1
            assert len(a1) == len(a2), (a1, a2)
            em, f1 = metrics.evaluate(a1, a2)
            exact_match += em
            f1_score += f1
    return {
        'single-ann': count_1,
        'double-ann': count_2,
        'em': round(exact_match / total_a2, 2),
        'f1': round(f1_score / total_a2, 2)
    }
Esempio n. 23
0
def val(student, val_load, i):

    classes = [
        "1_engine", "2_machinery-impact", "3_non-machinery-impact",
        "4_powered-saw", "5_alert-signal", "6_music", "7_human-voice", "8_dog"
    ]

    student.eval()
    predictions = pd.DataFrame(columns=[
        "audio_filename", "1_engine", "2_machinery-impact",
        "3_non-machinery-impact", "4_powered-saw", "5_alert-signal", "6_music",
        "7_human-voice", "8_dog"
    ])
    with torch.no_grad():
        for j, sample in enumerate(tqdm(val_load)):
            student_input = sample['student'].to(device)
            target = sample['target'].to(device)
            filenames = sample['filename']
            student_input = student_input.float()
            target = target.float()
            output, _ = student(student_input)
            output = nn.Sigmoid()(output)
            for k in range(output.shape[0]):
                curr = output[k].detach().cpu().numpy()
                temp = {}
                temp["audio_filename"] = filenames[k]
                for p, class_name in enumerate(classes):
                    temp[class_name] = curr[p]
                predictions = predictions.append(temp, ignore_index=True)

    predictions.to_csv('pred/predictions_{}.csv'.format(i), index=False)
    df_dict = evaluate('pred/predictions_{}.csv'.format(i),
                       'annotations-dev.csv', 'dcase-ust-taxonomy.yaml',
                       "coarse")

    micro_auprc, eval_df = micro_averaged_auprc(df_dict, return_df=True)
    macro_auprc, class_auprc = macro_averaged_auprc(df_dict,
                                                    return_classwise=True)
    return micro_auprc
Esempio n. 24
0
def run(model, config_init, config_train, dataset: DataSets, filename, plot_pred=True, plot_dis=False):
    x_train, x_test, y_train, y_test = dataset.get_data()

    model = getattr(model_zoo, model)(**config_init)
    model.fit(x_train, y_train, **config_train)

    preds = []
    num_predict = 100
    for i in range(num_predict):
        pred = np.reshape(model.predict(x_test), (-1, y_test.shape[-1]))
        preds.append(pred)

    preds_invert = []
    for pred in preds:
        preds_invert.append(dataset.invert_transform(pred))

    model.close_session()

    preds_invert = np.concatenate(preds_invert, axis=1)
    pred_mean = np.mean(preds_invert, axis=1)
    pred_std = np.std(preds_invert, axis=1)

    y_test = np.reshape(y_test, (-1, y_test.shape[-1]))
    actual_invert = dataset.invert_transform(y_test)

    result_eval = evaluate(actual_invert.reshape(pred_mean.shape), pred_mean, ["mae", 'smape', 'jsd'])

    if filename is not None:
        df = pd.DataFrame(np.concatenate([actual_invert, preds_invert], axis=1),
                          columns=['actual'] + [f"predict{i}" for i in range(num_predict)])
        df.to_csv(filename + ".csv", index=False)

    if plot_pred:
        plot_predict(actual_invert, pred_mean, pred_std, title=str(result_eval), path=filename)
    if plot_dis:
        plot_distribution(actual_invert, pred_mean, title=str(result_eval), path=None)
    del model, preds_invert, pred_mean, pred_std, dataset
    return result_eval['mae']
Esempio n. 25
0
	def np_evaluate(self):
		self.logger.info('NP Canonicalizing Evaluation');

		cesi_clust2ent = {}
		for rep, cluster in self.ent_clust.items():
			cesi_clust2ent[rep] = set(cluster)
		cesi_ent2clust = invertDic(cesi_clust2ent, 'm2os')

		cesi_ent2clust_u = {}
		for trp in self.side_info.triples:
			sub_u, sub = trp['triple_unique'][0], trp['triple'][0]
			cesi_ent2clust_u[sub_u] = cesi_ent2clust[self.side_info.ent2id[sub]]
		cesi_clust2ent_u = invertDic(cesi_ent2clust_u, 'm2os')

		eval_results = evaluate(cesi_ent2clust_u, cesi_clust2ent_u, self.true_ent2clust, self.true_clust2ent)

		self.logger.info('Macro F1: {}, Micro F1: {}, Pairwise F1: {}'.format(eval_results['macro_f1'], eval_results['micro_f1'], eval_results['pair_f1']))
		self.logger.info('CESI: #Clusters: %d, #Singletons %d'    % (len(cesi_clust2ent_u), 	len([1 for _, clust in cesi_clust2ent_u.items()    if len(clust) == 1])))
		self.logger.info('Gold: #Clusters: %d, #Singletons %d \n' % (len(self.true_clust2ent),  len([1 for _, clust in self.true_clust2ent.items() if len(clust) == 1])))

		# Dump the final results
		fname = self.p.out_path + self.p.file_results
		with open(fname, 'w') as f: f.write(json.dumps(eval_results))
def test(model, queryloader, galleryloader, ranks=[1, 5, 10, 20]):
    batch_time = AverageMeter()

    model.eval()

    with torch.no_grad():
        qf, q_pids, q_camids = [], [], []
        for batch, (imgs, pids, camids) in enumerate(queryloader):
            imgs = imgs.cuda()

            end = time.time()

            fc0_preds = model(imgs)

            batch_time.update(time.time() - end)

            output_fc = "fc0"
            fc0 = fc0_preds[output_fc]
            fc0 = fc0.data.cpu()

            qf.append(fc0)
            q_pids.extend(pids)
            q_camids.extend(camids)
        qf = torch.cat(qf, 0)
        q_pids = np.asarray(q_pids)
        q_camids = np.asarray(q_camids)

        print("Extracted features for query set, obtained {}-by-{} matrix".
              format(qf.size(0), qf.size(1)))

        gf, g_pids, g_camids = [], [], []
        end = time.time()
        for batch, (imgs, pids, camids) in enumerate(galleryloader):
            imgs = imgs.cuda()

            end = time.time()

            fc0_preds = model(imgs)

            output_fc = "fc0"
            fc0 = fc0_preds[output_fc]

            batch_time.update(time.time() - end)

            fc0 = fc0.data.cpu()
            gf.append(fc0)
            g_pids.extend(pids)
            g_camids.extend(camids)
        gf = torch.cat(gf, 0)
        g_pids = np.asarray(g_pids)
        g_camids = np.asarray(g_camids)

        print("Extracted features for gallery set, obtained {}-by-{} matrix".
              format(gf.size(0), gf.size(1)))
        ##############################################################################################################################
    print("==> BatchTime(s)/BatchSize(img): {:.3f}/{}".format(
        batch_time.avg, test_batch))
    print("==> BatchTime(s)/BatchSize(img): {:.3f}/{}".format(
        batch_time.avg, test_batch))

    m, n = qf.size(0), gf.size(0)
    distmat = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \
              torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t()
    distmat.addmm_(1, -2, qf, gf.t())
    distmat = distmat.numpy()

    print("Computing CMC and mAP")
    cmc, mAP = evaluate(distmat,
                        q_pids,
                        g_pids,
                        q_camids,
                        g_camids,
                        use_metric_cuhk03=use_metric_cuhk03)

    print("Results ----------")
    print("mAP: {:.1%}".format(mAP))
    print("CMC curve")
    for r in ranks:
        print("Rank-{:<3}: {:.1%}".format(r, cmc[r - 1]))
    print("------------------")

    return cmc[0]
Esempio n. 27
0
        train_loss = np.mean(train_loss)
        
        t1 = time()

        # evaluate the performance of the model with following xxx 
        model.eval()

        val_preds, val_labels, val_keys = [], [], []
        for batch_idx_list in val_batch_sampler:
            his_input_title, pred_input_title, labels, keys = \
                val_dataset._get_batch(batch_idx_list)
            preds = model.predict(his_input_title, pred_input_title)

            val_preds.extend(tensorToScalar(preds))
            val_labels.extend(labels)
            val_keys.extend(keys)

        #import pdb; pdb.set_trace()
        auc, mrr, ndcg_5, ndcg_10 = evaluate(val_labels, val_preds, val_keys)
        t2 = time()

        # save model when auc > max_auc
        if epoch == 1:
            max_auc = auc
        if auc > max_auc:
            torch.save(model.state_dict(), train_model_path)
        max_auc = max(auc, max_auc)

        log.record('Training Stage: Epoch:%d, compute loss cost:%.4fs, evaluation cost:%.4fs' % (epoch, (t1-t0), (t2-t1)))
        log.record('Train loss:{:.4f}'.format(train_loss))
        log.record('auc:%.4f, mrr:%.4f, ndcg@5:%.4f, ndcg@10:%.4f' % (auc, mrr, ndcg_5, ndcg_10))
Esempio n. 28
0
def train(annotation_path,
          taxonomy_path,
          train_feature_dir,
          val_feature_dir,
          output_dir,
          load_checkpoint,
          load_checkpoint_path,
          exp_id,
          label_mode,
          batch_size=32,
          n_epochs=100,
          kernel_size=3,
          layer_depth=[64, 128, 256, 512],
          chs=1,
          max_ckpt=20,
          lr=1e-3,
          hidden_layer_size=256,
          snapshot=5,
          num_hidden_layers=1,
          standardize=True,
          timestamp=None):
    """
    Train and evaluate a MIL MLP model.
    Parameters
    ----------
    annotation_path
    emb_dir
    output_dir
    label_mode
    batch_size
    num_epochs
    patience
    learning_rate
    hidden_layer_size
    l2_reg
    standardize
    timestamp
    random_state

    Returns
    -------
    """

    # Load annotations and taxonomy
    print("* Loading dataset.")
    annotation_data = pd.read_csv(annotation_path).sort_values(
        'audio_filename')
    with open(taxonomy_path, 'r') as f:
        taxonomy = yaml.load(f, Loader=yaml.Loader)

    annotation_data_trunc = annotation_data[[
        'audio_filename', 'latitude', 'longitude', 'week', 'day', 'hour'
    ]].drop_duplicates()
    file_list = annotation_data_trunc['audio_filename'].to_list()
    latitude_list = annotation_data_trunc['latitude'].to_list()
    longitude_list = annotation_data_trunc['longitude'].to_list()
    week_list = annotation_data_trunc['week'].to_list()
    day_list = annotation_data_trunc['day'].to_list()
    hour_list = annotation_data_trunc['hour'].to_list()

    full_fine_target_labels = [
        "{}-{}_{}".format(coarse_id, fine_id, fine_label)
        for coarse_id, fine_dict in taxonomy['fine'].items()
        for fine_id, fine_label in fine_dict.items()
    ]
    fine_target_labels = [
        x for x in full_fine_target_labels
        if x.split('_')[0].split('-')[1] != 'X'
    ]
    coarse_target_labels = [
        "_".join([str(k), v]) for k, v in taxonomy['coarse'].items()
    ]

    print("* Preparing training data.")

    # For fine, we include incomplete labels in targets for computing the loss
    fine_target_list = get_file_targets(annotation_data,
                                        full_fine_target_labels)
    coarse_target_list = get_file_targets(annotation_data,
                                          coarse_target_labels)
    train_file_idxs, valid_file_idxs = get_subset_split(annotation_data)

    if label_mode == "fine":
        target_list = fine_target_list
        labels = fine_target_labels
        num_classes = len(labels)
        y_true_num = len(full_fine_target_labels)
    elif label_mode == "coarse":
        target_list = coarse_target_list
        labels = coarse_target_labels
        num_classes = len(labels)
        y_true_num = num_classes
    else:
        raise ValueError("Invalid label mode: {}".format(label_mode))




    X_train_meta, y_train, X_valid_meta, y_valid_meta, scaler \
        = prepare_data(train_file_idxs, valid_file_idxs,
                       latitude_list, longitude_list,
                       week_list, day_list, hour_list,
                       target_list, standardize=standardize)

    print('X_train meta shape', X_train_meta.shape)
    print('y_train shape', y_train.shape)
    print('X_valid_meta shape', X_valid_meta.shape)
    print('y_valid shape', y_valid_meta.shape)

    meta_dims = X_train_meta.shape[2]

    X_train = load_train_data(file_list, train_file_idxs, train_feature_dir)
    X_valid = load_train_data(file_list, valid_file_idxs, val_feature_dir)
    _, frames, bins = X_train.shape
    print('X_train shape', X_train.shape)
    print('X_valid shape', X_valid.shape)

    (mean_train,
     std_train) = calculate_scalar_of_tensor(np.concatenate(X_train, axis=0))

    model = CNN9_Res_train(kernel_size, layer_depth, num_classes,
                           hidden_layer_size)

    if not timestamp:
        timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")

    model_path = os.path.join(output_dir, 'exp' + exp_id)

    if scaler is not None:
        scaler_path = os.path.join(model_path, 'stdizer.pkl')
        with open(scaler_path, 'wb') as f:
            pk.dump(scaler, f)

    if label_mode == "fine":
        full_coarse_to_fine_terminal_idxs = np.cumsum(
            [len(fine_dict) for fine_dict in taxonomy['fine'].values()])
        incomplete_fine_subidxs = [
            len(fine_dict) - 1 if 'X' in fine_dict else None
            for fine_dict in taxonomy['fine'].values()
        ]
        coarse_to_fine_end_idxs = np.cumsum([
            len(fine_dict) - 1 if 'X' in fine_dict else len(fine_dict)
            for fine_dict in taxonomy['fine'].values()
        ])

        # Create loss function that only adds loss for fine labels for which
        # the we don't have any incomplete labels
        def masked_loss(y_true, y_pred):
            loss = None
            for coarse_idx in range(len(full_coarse_to_fine_terminal_idxs)):
                true_terminal_idx = full_coarse_to_fine_terminal_idxs[
                    coarse_idx]
                true_incomplete_subidx = incomplete_fine_subidxs[coarse_idx]
                pred_end_idx = coarse_to_fine_end_idxs[coarse_idx]

                if coarse_idx != 0:
                    true_start_idx = full_coarse_to_fine_terminal_idxs[
                        coarse_idx - 1]
                    pred_start_idx = coarse_to_fine_end_idxs[coarse_idx - 1]
                else:
                    true_start_idx = 0
                    pred_start_idx = 0

                if true_incomplete_subidx is None:
                    true_end_idx = true_terminal_idx

                    sub_true = y_true[:, true_start_idx:true_end_idx]
                    sub_pred = y_pred[:, pred_start_idx:pred_end_idx]

                else:
                    # Don't include incomplete label
                    true_end_idx = true_terminal_idx - 1
                    true_incomplete_idx = true_incomplete_subidx + true_start_idx
                    assert true_end_idx - true_start_idx == pred_end_idx - pred_start_idx
                    assert true_incomplete_idx == true_end_idx

                    # 1 if not incomplete, 0 if incomplete
                    mask = K.expand_dims(1 - y_true[:, true_incomplete_idx])

                    # Mask the target and predictions. If the mask is 0,
                    # all entries will be 0 and the BCE will be 0.
                    # This has the effect of masking the BCE for each fine
                    # label within a coarse label if an incomplete label exists
                    sub_true = y_true[:, true_start_idx:true_end_idx] * mask
                    sub_pred = y_pred[:, pred_start_idx:pred_end_idx] * mask

                if loss is not None:
                    loss += K.sum(K.binary_crossentropy(sub_true, sub_pred))
                else:
                    loss = K.sum(K.binary_crossentropy(sub_true, sub_pred))

            return loss

        loss_func = masked_loss
    else:

        def unmasked_loss(y_true, y_pred):

            loss = None
            loss = K.sum(K.binary_crossentropy(y_true, y_pred))
            return loss

        loss_func = unmasked_loss

    ###     placeholder
    x = tf.placeholder(tf.float32, shape=[None, frames, bins, chs], name='x')
    meta_x = tf.placeholder(tf.float32, shape=[None, meta_dims], name='meta_x')
    y = tf.placeholder(tf.float32, shape=[None, y_true_num], name='y')
    is_training = tf.placeholder(tf.bool, shape=None, name='is_training')

    ###     net output
    output = model.forward(input_tensor=x,
                           input_meta=meta_x,
                           is_training=is_training)
    sigmoid_output = tf.nn.sigmoid(output, name='sigmoid_output')
    loss = loss_func(y, sigmoid_output)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    learning_rate = tf.Variable(float(lr), trainable=False, dtype=tf.float32)
    learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9)
    with tf.control_dependencies(update_ops):
        #        train_op = tf.train.MomentumOptimizer(learning_rate=lr,momentum=momentum).minimize(loss)
        train_op = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(loss)

    ###     start session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    saver = tf.train.Saver(max_to_keep=max_ckpt)
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    if load_checkpoint:
        saver.restore(sess, load_checkpoint_path)

    ###     tensorboard summary

    train_summary_dir = os.path.join(model_path, 'summaries', 'train')
    train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

    loss_all = tf.placeholder(tf.float32, shape=None, name='loss_all')

    tf.add_to_collection("loss", loss_all)

    loss_summary = tf.summary.scalar('loss', loss_all)

    val_summary_dir = os.path.join(model_path, 'summaries', 'val')
    val_micro_auprc_summary_writer = tf.summary.FileWriter(
        os.path.join(val_summary_dir, 'micro_auprc'), sess.graph)
    val_macro_auprc_summary_writer = tf.summary.FileWriter(
        os.path.join(val_summary_dir, 'macro_auprc'), sess.graph)
    val_val_micro_F1score_summary_writer = tf.summary.FileWriter(
        os.path.join(val_summary_dir, 'micro_F1score'), sess.graph)
    val_summary = tf.placeholder(tf.float32, shape=None, name='loss_all')
    tf.add_to_collection("val_summary", val_summary)
    val_summary_op = tf.summary.scalar('val_summary', val_summary)

    ###     train loop
    print("* Training model.")
    class_auprc_dict = {}
    for epoch in range(n_epochs):
        train_loss = 0
        n_batch = 0
        for X_train_batch, X_meta_batch, y_train_batch in gen_train_batch(
                X_train, X_train_meta, y_train, batch_size):

            X_meta_batch = X_meta_batch.reshape(-1, meta_dims)
            X_train_batch = scale(X_train_batch, mean_train, std_train)
            X_train_batch = X_train_batch.reshape(-1, frames, bins, chs)
            _, train_loss_batch = sess.run(
                [train_op, loss],
                feed_dict={
                    x: X_train_batch,
                    meta_x: X_meta_batch,
                    y: y_train_batch,
                    is_training: True
                })
            train_loss += train_loss_batch
            n_batch += 1
        train_loss = train_loss / n_batch
        train_summary_op = tf.summary.merge([loss_summary])
        train_summaries = sess.run(train_summary_op,
                                   feed_dict={loss_all: train_loss})
        train_summary_writer.add_summary(train_summaries, epoch)

        print("step %d" % (epoch))
        print("   train loss: %f" % (train_loss))

        pre = []
        if ((epoch + 1) % snapshot == 0
                and epoch > 0) or epoch == n_epochs - 1:
            sess.run(learning_rate_decay_op)

            for val_data_batch, val_meta_batch in gen_val_batch(
                    X_valid, X_valid_meta, batch_size):

                val_meta_batch = val_meta_batch.reshape(-1, meta_dims)
                val_data_batch = scale(val_data_batch, mean_train, std_train)
                val_data_batch = val_data_batch.reshape(-1, frames, bins, chs)
                prediction = sess.run(sigmoid_output,
                                      feed_dict={
                                          x: val_data_batch,
                                          meta_x: val_meta_batch,
                                          is_training: False
                                      })
                pre.extend(prediction)
            # print(len(pre))
            generate_output_file(pre, valid_file_idxs, model_path, file_list,
                                 label_mode, taxonomy)
            submission_path = os.path.join(model_path, "output.csv")
            df_dict = metrics.evaluate(prediction_path=submission_path,
                                       annotation_path=annotation_path,
                                       yaml_path=taxonomy_path,
                                       mode=label_mode)
            val_micro_auprc, eval_df = metrics.micro_averaged_auprc(
                df_dict, return_df=True)
            val_macro_auprc, class_auprc = metrics.macro_averaged_auprc(
                df_dict, return_classwise=True)
            thresh_idx_05 = (eval_df['threshold'] >= 0.5).nonzero()[0][0]
            val_micro_F1score = eval_df['F'][thresh_idx_05]

            val_summaries = sess.run(val_summary_op,
                                     feed_dict={val_summary: val_micro_auprc})
            val_micro_auprc_summary_writer.add_summary(val_summaries, epoch)
            val_summaries = sess.run(val_summary_op,
                                     feed_dict={val_summary: val_macro_auprc})
            val_macro_auprc_summary_writer.add_summary(val_summaries, epoch)
            val_summaries = sess.run(
                val_summary_op, feed_dict={val_summary: val_micro_F1score})
            val_val_micro_F1score_summary_writer.add_summary(
                val_summaries, epoch)
            class_auprc_dict['class_auprc_' + str(epoch)] = class_auprc
            print('official')
            print('micro', val_micro_auprc)
            print('micro_F1', val_micro_F1score)
            print('macro', val_macro_auprc)

            print('-----save:{}-{}'.format(
                os.path.join(model_path, 'ckeckpoint', 'model'), epoch))
            saver.save(sess,
                       os.path.join(model_path, 'ckeckpoint', 'model'),
                       global_step=epoch)

            np.save(os.path.join(model_path, 'class_auprc_dict.npy'),
                    class_auprc_dict)
    sess.close()
Esempio n. 29
0
def _run_experiment(
        gpu_device,
        dataset,
        dataset_path,
        results_path,
        csv_filepath,
        metrics,
        epochs,
        normalization_method,
        past_history_factor,
        max_steps_per_epoch,
        batch_size,
        learning_rate,
        model_name,
        model_index,
        model_args,
):
    import gc
    import tensorflow as tf
    from models import create_model

    tf.keras.backend.clear_session()

    def select_gpu_device(gpu_number):
        gpus = tf.config.experimental.list_physical_devices("GPU")
        if len(gpus) >= 2 and gpu_number is not None:
            device = gpus[gpu_number]
            tf.config.experimental.set_memory_growth(device, True)
            tf.config.experimental.set_visible_devices(device, "GPU")

    select_gpu_device(gpu_device)

    results = read_results_file(csv_filepath, metrics)

    x_train, y_train, x_test, y_test, y_test_denorm, norm_params = read_data(
        dataset_path, normalization_method, past_history_factor
    )
    x_train = tf.convert_to_tensor(x_train)
    y_train = tf.convert_to_tensor(y_train)
    x_test = tf.convert_to_tensor(x_test)
    y_test = tf.convert_to_tensor(y_test)
    y_test_denorm = tf.convert_to_tensor(y_test_denorm)

    forecast_horizon = y_test.shape[1]
    past_history = x_test.shape[1]
    steps_per_epoch = min(
        int(np.ceil(x_train.shape[0] / batch_size)), max_steps_per_epoch,
    )

    optimizer = tf.optimizers.Adam(learning_rate=learning_rate)
    model = create_model(
        model_name,
        x_train.shape,
        output_size=forecast_horizon,
        optimizer=optimizer,
        loss="mae",
        **model_args
    )
    print(model.summary())

    training_time_0 = time.time()
    history = model.fit(
        x_train,
        y_train,
        batch_size=batch_size,
        epochs=epochs,
        steps_per_epoch=steps_per_epoch,
        validation_data=(x_test, y_test),
        shuffle=True,
    )
    training_time = time.time() - training_time_0

    # Get validation metrics
    test_time_0 = time.time()
    test_forecast = model(x_test).numpy()
    test_time = time.time() - test_time_0

    for i in range(test_forecast.shape[0]):
        nparams = norm_params[0]
        test_forecast[i] = denormalize(
            test_forecast[i], nparams, method=normalization_method,
        )
    if metrics:
        test_metrics = evaluate(y_test_denorm, test_forecast, metrics)
    else:
        test_metrics = {}

    # Save results
    predictions_path = "{}/{}/{}/{}/{}/{}/{}/{}/".format(
        results_path,
        dataset,
        normalization_method,
        past_history_factor,
        epochs,
        batch_size,
        learning_rate,
        model_name,
    )
    if not os.path.exists(predictions_path):
        os.makedirs(predictions_path)
    np.save(
        predictions_path + str(model_index) + ".npy", test_forecast,
    )
    results = results.append(
        {
            "DATASET": dataset,
            "MODEL": model_name,
            "MODEL_INDEX": model_index,
            "MODEL_DESCRIPTION": str(model_args),
            "FORECAST_HORIZON": forecast_horizon,
            "PAST_HISTORY_FACTOR": past_history_factor,
            "PAST_HISTORY": past_history,
            "BATCH_SIZE": batch_size,
            "EPOCHS": epochs,
            "STEPS": steps_per_epoch,
            "OPTIMIZER": "Adam",
            "LEARNING_RATE": learning_rate,
            "NORMALIZATION": normalization_method,
            "TEST_TIME": test_time,
            "TRAINING_TIME": training_time,
            **test_metrics,
            "LOSS": str(history.history["loss"]),
            "VAL_LOSS": str(history.history["val_loss"]),
        },
        ignore_index=True,
    )

    results.to_csv(
        csv_filepath, sep=";",
    )

    gc.collect()
    del model, x_train, x_test, y_train, y_test, y_test_denorm, test_forecast
Esempio n. 30
0
        type=int,
        default=1)

    args = parser.parse_args()

    os.makedirs(args.outdir, exist_ok=True)

    print("Starting track...")
    avg_time, avg_cpu_time, frames = track(args.model,
                                           args.folder,
                                           args.outdir,
                                           processes=args.processors)

    print("Evaluating...")
    Success_Average, Precision_Average, NPrecision_Average = evaluate(
        sorted(glob.glob(os.path.join(args.outdir, "*.txt"))),
        sorted(glob.glob(os.path.join(args.folder, "anno", "*.txt"))))

    if not os.path.exists(args.summary):
        with open(args.summary, 'w') as f:
            f.write(
                '"Model","Folder name","Success Average","Precision Average","NPrecision_average","Frames per second","CPU Usage per frame", "Frames computed"\n'
            )

    with open(args.summary, 'a') as f:
        f.write(",".join(
            map(lambda s: '"%s"' % s,
                (args.model, args.folder, Success_Average, Precision_Average,
                 NPrecision_Average, 1 / avg_time, avg_cpu_time, frames))) +
                "\n")
Esempio n. 31
0
                oracle = oracles.authors(adj, labels)
            elif bridges:
                oracle = oracles.bridges(adj)
            elif cuts is not None:
                (source, k) = cuts
                sink = str(min([int(nid) for nid in adj]))
                oracle = oracles.cuts(adj, source, sink, k)
            elif empty:
                oracle = lambda query, cid, nid: False
            elif reversions:
                oracle = oracles.reversions(adj, labels)
            elif sources:
                oracle = oracles.sources(adj, labels)

            # Evaluate.
            result = metrics.evaluate(adj, scoremap, oracle, labels, leaves=leaves, maximum=maximum, testset=authors)

            # Save.
            postfix = 'e'
            if authors is not None: postfix += 'a'
            elif bridges: postfix += 'b'
            elif cuts is not None: postfix += source + 'c' + str(k)
            elif empty: postfix += 'e'
            elif reversions: postfix += 'r'
            elif sources: postfix += 's'
            if leaves: postfix += 'l'
            if maximum is not None: postfix += 'm' + str(maximum)

            evalfile = open(outfile + postfix + '.dat', 'w')
            for (nid, (t_size, o_size, i_size)) in result:
                evalfile.write(nid + ' ' + str(t_size) + ' ' + str(o_size) + ' ' + str(i_size) + ' ' + str(abs(t_size - o_size)) + ' ' + str(max(i_size - o_size, o_size)) + '\n')
Esempio n. 32
0
    #losses = tf.reduce_mean(loss)

    train_op = tf.train.GradientDescentOptimizer(0.03).minimize(losses)
    lenth = len(inputs1)

    with tf.Session() as sess:
        sess.run(init)
        print "\n"
        for epoch in range(num_epoch):
            for i in range(lenth / batch_num):
                (data1, data2, labels) = next_batch(batch_num, inputs1,
                                                    inputs2, originalTraining)
                sess.run(train_op,
                         feed_dict={
                             x1: data1,
                             x2: data2,
                             pivot: labels
                         })
            transform_result = sess.run(prediction,
                                        feed_dict={
                                            x1: test1[:lenthtest],
                                            x2: test2[:lenthtest],
                                            pivot: [[0, 0, 0, 0, 0]]
                                        })
            newScores = []
            for item in transform_result:
                newScores.append(item[0])

            calibrated = metrics.calibration(newScores)
            metrics.evaluate(calibrated, originalScores)