Ejemplo n.º 1
0
def validate(m, X, y):
    if args.task == 1:
        y_pred = m.predict(X).tolist()
    else:
        y_pred1 = m[0].score_samples(X)
        y_pred2 = m[1].score_samples(X)
        y_pred = (y_pred1 > y_pred2).tolist()
    y_true = y.tolist()
    auc, eer = get_metrics(y_pred, y_true)
    return auc, eer
Ejemplo n.º 2
0
def validate(test_model, num_samples, writer, Y_test, batch_size=100):
    start_time = timeit.default_timer()
    batch_index = 0

    cost_total, acc_total, loss_total = 0., 0., 0.

    predictions = None

    while True:
        start, end = batch_index * batch_size, min(
            (batch_index + 1) * batch_size, num_samples)
        batch_index += 1

        pred, cost, loss, acc, sim = test_model(start, end, 0)

        if predictions is None:
            predictions = sim
        else:
            predictions = numpy.concatenate((predictions, sim), axis=0)

        cost_total += cost * (end - start)
        acc_total += acc * (end - start)
        loss_total += loss * (end - start)

        if end >= num_samples - 1:
            break

    cost_total /= num_samples
    acc_total /= num_samples
    loss_total /= num_samples

    writer.write('\tTesting\tAccuracy = %.4f\tCost = %f\tLoss = %f\n' %
                 (acc_total, cost_total, loss_total))
    print '\tTesting\tAccuracy = %.4f\tCost = %f\tLoss = %f' % (
        acc_total, cost_total, loss_total)

    end_time = timeit.default_timer()
    # print 'Test %.3f seconds' % (end_time - start_time)

    labels = numpy.argmax(Y_test, axis=1)

    roc_auc, ap, top1_accu, top5_accu = get_metrics(predictions, labels)
    print '\tROC-AUC = %.4f\tAP = %.4f\tTop-1 Acc = %.4f\tTop-5 Acc = %.4f' % (
        roc_auc, ap, top1_accu, top5_accu)

    return acc_total
Ejemplo n.º 3
0
def validate(test_model, writer, Y_test, batch_size=100, seen='seen'):
    num_samples = Y_test.shape[0]
    batch_index = 0

    cost_total, acc_total, loss_total = 0., 0., 0.

    predictions = None

    while True:
        start, end = batch_index * batch_size, min(
            (batch_index + 1) * batch_size, num_samples)
        batch_index += 1

        pred, cost, loss, acc, sim = test_model(start, end, 0)

        if predictions is None:
            predictions = sim
        else:
            predictions = numpy.concatenate((predictions, sim), axis=0)

        cost_total += cost * (end - start)
        acc_total += acc * (end - start)
        loss_total += loss * (end - start)

        if end >= num_samples - 1:
            break

    cost_total /= num_samples
    acc_total /= num_samples
    loss_total /= num_samples

    labels = numpy.argmax(Y_test, axis=1)

    roc_auc, pr_auc, top1_accu, top5_accu = get_metrics(predictions, labels)
    print '\t' + seen + '\tROC-AUC = %.4f\tPR-AUC = %.4f\tTop-1 Acc = %.4f\tTop-5 Acc = %.4f'\
                        % (roc_auc, pr_auc, top1_accu, top5_accu)
    writer.write(
        '\t' + seen +
        '\tROC-AUC = %.4f\tPR-AUC = %.4f\tTop-1 Acc = %.4f\tTop-5 Acc = %.4f\n'
        % (roc_auc, pr_auc, top1_accu, top5_accu))

    return roc_auc, pr_auc, top1_accu, top5_accu
Ejemplo n.º 4
0
def main():
    dots = "." * 6
    print('loading dataset{}'.format(dots))
    dataset = load_data(DATA_PATH)
    labels = load_labels(LABEL_PATH)

    train_text, test_text, train_labels, test_labels = train_test_split(
        dataset.text, labels.labels, test_size=0.2, random_state=40)

    print('generating embeddings{}'.format(dots))
    print('calling vectorizer api{}'.format(dots))
    matrix_embedding = np.zeros((len(dataset), 400))
    for i in range(len(dataset)):
        text = dataset['text'][i]
        input = {'text': text}
        response = requests.get('http://vectorizer.host/embed', data=input)
        vector_embedding = json.loads(response.text)
        matrix_embedding[i] = vector_embedding

    embedding_size = matrix_embedding.shape[1]

    # specifying exact numbers now, need to convert to variables
    print('initializing model{}'.format(dots))
    model = models.keras_model(embedding_size)

    categorical_labels = to_categorical(labels, num_classes=3)

    print('train test split')
    X_train, X_test, y_train, y_test = train_test_split(matrix_embedding,
                                                        categorical_labels,
                                                        test_size=0.2,
                                                        random_state=40)

    # fit model
    print('fitting model{}'.format(dots))
    model.fit(X_train, y_train, epochs=5, verbose=0)

    # evaluate model
    print('generating predictions{}'.format(dots))
    y_predicted = model.predict_classes(X_test)
    y_string_predict = []
    for prediction in y_predicted:
        if prediction == 0:
            y_string_predict.append('Negative')
        if prediction == 1:
            y_string_predict.append('Neutral')
        if prediction == 2:
            y_string_predict.append('Positive')

    accuracy, precision, recall, f1 = evaluate.get_metrics(
        test_labels, y_predicted)
    print('accuracy: {} precision: {} recall: {} f1: {}'.format(
        accuracy, precision, recall, f1))

    print('plotting confusion matrix{}'.format(dots))
    cm = confusion_matrix(test_labels, y_predicted)
    print('generated confusion matrix')
    print(cm)
    fig = plt.figure(figsize=(10, 10))
    plot = evaluate.plot_confusion_matrix(
        cm,
        classes=['Negative', 'Neutral', 'Positive'],
        normalize=False,
        title='Confusion matrix')
    plt.savefig('tweet_sentiment_class_confusion_matrix.png')
    plt.close()

    print('generating output dataframe{}'.format(dots))
    output_df = pd.DataFrame(test_text)
    output_df.columns = ['Test Tweets']
    output_df['Target Sentiment'] = test_labels
    output_df['Predicted Sentiment'] = y_string_predict
    output_df.to_csv('tweet_sentiment_prediction_table.csv')

    return
Ejemplo n.º 5
0
def dev_prediction():
    '''
    对于开发集的预测
    '''
    dataset = "wavs/dev"

    # default arguments
    sr = 22050
    length = 256
    hop_length = 64

    label: dict = read_label_from_file(frame_size=length / sr,
                                       frame_shift=hop_length / sr)  # a dict keys are file_path items are sequence of VAD

    aucs = []
    eers = []
    prec = []


    pbar = tqdm(enumerate(list(label.keys())))
    for i, path in pbar:

        label_y = label[path]

        wav_path = os.path.join(dataset, path + ".wav")
        data, sample_rate = librosa.load(wav_path, sr=sr)

        pred, weights,frame_time = pipeline(data,sample_rate,length,hop_length)

        # label未对最后一个有声时间段后进行标注,故补充

        n = len(label_y)

        while (n < frame_time.shape[0]):
            label_y.append(0)
            n += 1

        x, y = ROC(weights, label_y)

        pres = precise(pred,label_y)
        prec.append(pres)

        plt.plot(frame_time,pred,'r')
        plt.legend(["signal", "pred"])
        #
        plt.show()
        plt.waitforbuttonpress(10)
        plt.close()

        # m = get_metrics(pred,label_y)

        auc, eer = get_metrics(weights, label_y)

        aucs.append(auc)
        eers.append(eer)
        pbar.set_postfix({f'auc of {path}': f"{auc:0.4f}", f"eer of {path}": f"{eer * 100:0.4f}%",f'precision of {path}': f"{pres*100:0.4f}%"})

    #显示各个指标
    print(
        f"average auc is {float(np.float32(aucs).mean()):0.4f} , average eer is {float(np.float32(eers).mean()) * 100:0.4f}% and average of precision is {float(np.float32(prec).mean())*100:.4f}%")
    plt.subplot(1, 3, 1)
    plt.plot(range(len(list(label.keys()))), aucs, '-')
    plt.title("auc")
    plt.subplot(1, 3, 2)
    plt.plot(range(len(list(label.keys()))), eers, '-')
    plt.title("eer")
    plt.subplot(1, 3, 3)
    plt.plot(range(len(list(label.keys()))), prec, '-')
    plt.title("precision")
    plt.show()
    plt.waitforbuttonpress(10)
    plt.close()
Ejemplo n.º 6
0
        # cv2.waitKey(0)
        # cv2.imwrite('./results_imgs/{}'.format(img_name),img)

    except Exception as e:
        print("Error with ", img_name)
        print(e)
    if idx % 100 == 0:
        print("{} images done".format(idx))

print("Done with the predictions. Finding the stats now")

if eval_files_list_path:
    with open(os.path.join(output_dir, "predictions.pkl"), "wb") as f:
        pickle.dump(predictions_dict, f)

    with open(bbox_dict_path, "rb") as f:
        bbox_coords_dict = pickle.load(f)

    tp, fp, fn, precision, recall, f_score = get_metrics(bbox_coords_dict,
                                                         predictions_dict,
                                                         threshold=0.1)

    result = "Precision: {}\nRecall: {}\nF_score: {}\nTP: {}, FP: {}, FN: {}".format(
        precision, recall, f_score, tp, fp, fn)
    print(result)

    with open(os.path.join(output_dir, "results.txt"), "w") as f:
        f.write(result)

# print('Elapsed time = {}'.format(time.time() - st))
Ejemplo n.º 7
0
def main():
    dots = "." * 6
    print('loading dataset{}'.format(dots))
    dataset = load_data(DATA_PATH)
    labels = dataset[0]

    train_text, test_text, train_labels, test_labels = train_test_split(
        dataset[5], labels, stratify=labels, test_size=0.2, random_state=40)

    print('generating embeddings{}'.format(dots))
    print('calling vectorizer api{}'.format(dots))
    matrix_embedding = np.zeros((len(dataset), 300))
    for i in range(len(dataset)):
        text = dataset[5][i]
        input = {'text': text}
        response = requests.get('http://vectorizer.host/embed', data=input)
        vector_embedding = json.loads(response.text)
        vector_embedding = np.mean(vector_embedding, axis=0)
        matrix_embedding[i] = vector_embedding

    embedding_size = matrix_embedding.shape[1]

    # specifying exact numbers now, need to convert to variables
    print('initializing model{}'.format(dots))
    model = models.keras_model(embedding_size)

    categorical_labels = to_categorical(labels, num_classes=2)

    print('train test split')
    X_train, X_test, y_train, y_test = train_test_split(matrix_embedding,
                                                        categorical_labels,
                                                        stratify=labels,
                                                        test_size=0.2,
                                                        random_state=40)

    # fit model
    print('fitting model{}'.format(dots))
    csv_logger = CSVLogger('log.csv', append=True, separator=';')
    model.fit(X_train, y_train, epochs=5, callbacks=[csv_logger])
    model.save('full_data_epoch5.h5')

    # evaluate model
    print('generating predictions{}'.format(dots))
    y_predicted = model.predict_classes(X_test)

    accuracy, precision, recall, f1 = evaluate.get_metrics(
        test_labels, y_predicted)

    with open("metrics_output.txt", "w") as text_file:
        print('accuracy: {} precision: {} recall: {} f1: {}'.format(
            accuracy, precision, recall, f1),
              file=text_file)

    print('plotting confusion matrix{}'.format(dots))
    cm = confusion_matrix(test_labels, y_predicted)
    print('generated confusion matrix')
    print(cm)
    fig = plt.figure(figsize=(10, 10))
    plot = evaluate.plot_confusion_matrix(cm,
                                          classes=['Negative', 'Positive'],
                                          normalize=False,
                                          title='Confusion matrix')
    plt.savefig('tweet_sentiment_class_confusion_matrix_full_data.png')
    plt.close()

    return