def test_serve():
    model_dir = os.path.join(here, './data/modelDir')

    model = model_fn(model_dir)
    print(model)

    # input = input_fn(pickle.dumps(u"Best movie ever"), 'text/plain')
    # print(input)

    input_data = "Best movie ever"
    predict_fn(input_data=input_data, model=model)
Exemple #2
0
def test_predict():
    test_review = 'The simplest pleasures in life are the best, and this film is one of them. Combining a rather basic storyline of love and adventure this movie transcends the usual weekend fair with wit and unmitigated charm.'
    model_dir = os.path.join(here, './data/modelDir')

    model = model_fn(model_dir)
    print(model)

    data_X, data_len = convert_and_pad(model.word_dict, test_review)
    print('data_X ')
    print(data_X)
    print('data_len')
    print(data_len)

    # Using data_X and data_len we construct an appropriate input tensor. Remember
    # that our model expects input data of the form 'len, review[500]'.
    data_pack = np.hstack((data_len, data_X))
    print('data_pack (shape: {})'.format(data_pack.shape))
    print(data_pack)
    data_pack = data_pack.reshape(1, -1)

    print('data_pack reshaped (shape: {})'.format(data_pack.shape))
    print(data_pack)

    data = torch.from_numpy(data_pack)

    print('data (shape: {})'.format(data.shape))
    print(data)
    input_data = 'The simplest pleasures in life are the best, and this film is one of them. Combining a rather basic storyline of love and adventure this movie transcends the usual weekend fair with wit and unmitigated charm.'
    result = predict_fn(input_data, model)
    print(result)
def predict():
    """ Call Predict function on a loaded LSTM model"""

    if request.method == "POST":
        LOG.info("I am a post")
        if request.form:
            LOG.info("I have form data")
            #print(request.form['kommentar'])
        if request.data:
            LOG.info("I have data")
            LOG.info(request.data)
        if request.json:
            LOG.info("I have json")
            # Do stuff with the data...
            return jsonify({"message": "OK"})
        else:
            LOG.info("fail")

    data = request.data
    LOG.info("Form data is: \n %s" % data.decode('utf-8'))

    # get an output prediction from the pretrained model, model
    result = predict_fn(data.decode('utf-8'), model)
    LOG.info("Prediction value is: %s" % result)
    return str(result)
    # Train the model
    train_losses, train_accuracies, validation_losses, validation_accuracies = train_fn(
        model,
        train_dataloader,
        validation_dataloader,
        args.epochs,
        args.lr,
        device,
        args.best_model_path,
        args.torch_manual_seed,
        freeze_pretrained_encoder=args.freeze_pretrained_encoder)
    print('Training complete')

    # Plot training and validation losses and accuracies for n_epochs
    # plot(args.epochs, train_losses, train_accuracies, validation_losses, validation_accuracies)

    # Get model predictions on test-set data
    test_input = data.encode(test_df,
                             tokenizer,
                             max_len=args.max_sequence_length,
                             testing=True)
    test_data = TensorDataset(test_input['input_word_ids'],
                              test_input['input_mask'])
    test_dataloader = DataLoader(test_data, batch_size=args.batch_size)
    predictions = predict_fn(test_dataloader, device)

    # Save the test-set predictions
    submission = test_df.id.copy().to_frame()
    submission['prediction'] = predictions
    submission.to_csv("test_predictions.csv", index=False)
Exemple #5
0
    images = sorted([s for s in os.listdir(work_dir)])

    # list of visualizations for each frame in the teddst dataset
    frame_visualizations = []

    for image_index, image in enumerate(images):
        curr_img = os.path.join(work_dir, image)

        with open(curr_img, 'rb') as f:
            image = Image.open(f)
            data = np.asarray(image)

            image_data = input_fn(data)

            prediction = predict_fn(image_data, model)
            output_body = output_fn(prediction)

            # prediction in log probabilities as output from the last step
            stream = BytesIO(output_body)
            pred_output = np.load(stream)[0]
            out_exps = np.exp(pred_output)
            out_normalized = (out_exps / sum(out_exps) * 20).astype(int)

            # visualization showing a breakdown of probabilities, what is shown in an image
            frame_visualization = ''.join([
                min(int(out_normalized[x]), 20) * short_classes[x]
                for x in range(0, 5)
            ]).rjust(20, '_')
            frame_visualizations.append(frame_visualization)
def verify(model, data_dir, percentage=1):
    """
    uses a model to predict the categories of a dataset, compare them with the true values and to return appropriate reports
    :param model: the model to analyze
    :param data_dir: the directory containing data
    :param percentage: the percentage of data to analyze (0-1)
    :return: a classification report, a confusion matrix, a map of possibly misclassified data points
    """

    # goes through labels
    label_index = 0

    # sum of accuracy of all predictions. Makes sense only when averaged at the end.
    acc_sum = 0

    # images for which a prediction was made
    images_processed = 0

    images_total = 0

    # directories and label names, sorted alphabetically
    dirs = [
        s for s in sorted(os.listdir(data_dir))
        if os.path.isdir(os.path.join(data_dir, s))
    ]

    # confusion matrix in numpy format
    np_conf_matrix = np.zeros((len(dirs), len(dirs)), dtype='uint')

    # true values and predictions
    y_true, y_pred = [], []

    dubious_preds = []

    # loop all directory / label names
    for dir in dirs:
        curr_img_dir = os.path.join(data_dir, dir)
        images = os.listdir(curr_img_dir)

        # loop on all images in a directory, belonging to a label
        for image_index, image in enumerate(images):
            curr_img = os.path.join(curr_img_dir, image)
            images_total += 1

            # only for a given percentage of images
            if (random.uniform(0, 1) <= percentage):
                with open(curr_img, 'rb') as f:
                    images_processed += 1

                    # goes through predict_fn and output_fn in predict, but only using the model
                    image = Image.open(f)
                    data = np.asarray(image)

                    image_data = input_fn(data)

                    prediction = predict_fn(image_data, model)
                    output_body = output_fn(prediction)

                    # prediction in log probabilities as output from the last step
                    stream = BytesIO(output_body)
                    output = np.load(stream)
                    output_sv = output[0]
                    pred_index = np.argmax(output_sv)

                    # the log probability of a prediction
                    label_log_prob = output_sv[label_index]

                    if (label_log_prob < THRESHOLD):
                        dubious_preds.append((curr_img, label_log_prob))

                    # comparing predictions and labels, updating metrics, confidence metrics and classification report
                    np_conf_matrix[label_index, pred_index] += 1
                    y_true.append(label_index)
                    y_pred.append(pred_index)

                    if (images_processed % 500 == 0):
                        print("{} processed up to {}".format(
                            images_processed, images_total))

        label_index += 1

    report = classification_report(y_true=y_true, y_pred=y_pred)
    dubious_preds.sort(key=lambda x: x[1])
    return report, np_conf_matrix, dubious_preds