def test_serve(): model_dir = os.path.join(here, './data/modelDir') model = model_fn(model_dir) print(model) # input = input_fn(pickle.dumps(u"Best movie ever"), 'text/plain') # print(input) input_data = "Best movie ever" predict_fn(input_data=input_data, model=model)
def test_predict(): test_review = 'The simplest pleasures in life are the best, and this film is one of them. Combining a rather basic storyline of love and adventure this movie transcends the usual weekend fair with wit and unmitigated charm.' model_dir = os.path.join(here, './data/modelDir') model = model_fn(model_dir) print(model) data_X, data_len = convert_and_pad(model.word_dict, test_review) print('data_X ') print(data_X) print('data_len') print(data_len) # Using data_X and data_len we construct an appropriate input tensor. Remember # that our model expects input data of the form 'len, review[500]'. data_pack = np.hstack((data_len, data_X)) print('data_pack (shape: {})'.format(data_pack.shape)) print(data_pack) data_pack = data_pack.reshape(1, -1) print('data_pack reshaped (shape: {})'.format(data_pack.shape)) print(data_pack) data = torch.from_numpy(data_pack) print('data (shape: {})'.format(data.shape)) print(data) input_data = 'The simplest pleasures in life are the best, and this film is one of them. Combining a rather basic storyline of love and adventure this movie transcends the usual weekend fair with wit and unmitigated charm.' result = predict_fn(input_data, model) print(result)
def predict(): """ Call Predict function on a loaded LSTM model""" if request.method == "POST": LOG.info("I am a post") if request.form: LOG.info("I have form data") #print(request.form['kommentar']) if request.data: LOG.info("I have data") LOG.info(request.data) if request.json: LOG.info("I have json") # Do stuff with the data... return jsonify({"message": "OK"}) else: LOG.info("fail") data = request.data LOG.info("Form data is: \n %s" % data.decode('utf-8')) # get an output prediction from the pretrained model, model result = predict_fn(data.decode('utf-8'), model) LOG.info("Prediction value is: %s" % result) return str(result)
# Train the model train_losses, train_accuracies, validation_losses, validation_accuracies = train_fn( model, train_dataloader, validation_dataloader, args.epochs, args.lr, device, args.best_model_path, args.torch_manual_seed, freeze_pretrained_encoder=args.freeze_pretrained_encoder) print('Training complete') # Plot training and validation losses and accuracies for n_epochs # plot(args.epochs, train_losses, train_accuracies, validation_losses, validation_accuracies) # Get model predictions on test-set data test_input = data.encode(test_df, tokenizer, max_len=args.max_sequence_length, testing=True) test_data = TensorDataset(test_input['input_word_ids'], test_input['input_mask']) test_dataloader = DataLoader(test_data, batch_size=args.batch_size) predictions = predict_fn(test_dataloader, device) # Save the test-set predictions submission = test_df.id.copy().to_frame() submission['prediction'] = predictions submission.to_csv("test_predictions.csv", index=False)
images = sorted([s for s in os.listdir(work_dir)]) # list of visualizations for each frame in the teddst dataset frame_visualizations = [] for image_index, image in enumerate(images): curr_img = os.path.join(work_dir, image) with open(curr_img, 'rb') as f: image = Image.open(f) data = np.asarray(image) image_data = input_fn(data) prediction = predict_fn(image_data, model) output_body = output_fn(prediction) # prediction in log probabilities as output from the last step stream = BytesIO(output_body) pred_output = np.load(stream)[0] out_exps = np.exp(pred_output) out_normalized = (out_exps / sum(out_exps) * 20).astype(int) # visualization showing a breakdown of probabilities, what is shown in an image frame_visualization = ''.join([ min(int(out_normalized[x]), 20) * short_classes[x] for x in range(0, 5) ]).rjust(20, '_') frame_visualizations.append(frame_visualization)
def verify(model, data_dir, percentage=1): """ uses a model to predict the categories of a dataset, compare them with the true values and to return appropriate reports :param model: the model to analyze :param data_dir: the directory containing data :param percentage: the percentage of data to analyze (0-1) :return: a classification report, a confusion matrix, a map of possibly misclassified data points """ # goes through labels label_index = 0 # sum of accuracy of all predictions. Makes sense only when averaged at the end. acc_sum = 0 # images for which a prediction was made images_processed = 0 images_total = 0 # directories and label names, sorted alphabetically dirs = [ s for s in sorted(os.listdir(data_dir)) if os.path.isdir(os.path.join(data_dir, s)) ] # confusion matrix in numpy format np_conf_matrix = np.zeros((len(dirs), len(dirs)), dtype='uint') # true values and predictions y_true, y_pred = [], [] dubious_preds = [] # loop all directory / label names for dir in dirs: curr_img_dir = os.path.join(data_dir, dir) images = os.listdir(curr_img_dir) # loop on all images in a directory, belonging to a label for image_index, image in enumerate(images): curr_img = os.path.join(curr_img_dir, image) images_total += 1 # only for a given percentage of images if (random.uniform(0, 1) <= percentage): with open(curr_img, 'rb') as f: images_processed += 1 # goes through predict_fn and output_fn in predict, but only using the model image = Image.open(f) data = np.asarray(image) image_data = input_fn(data) prediction = predict_fn(image_data, model) output_body = output_fn(prediction) # prediction in log probabilities as output from the last step stream = BytesIO(output_body) output = np.load(stream) output_sv = output[0] pred_index = np.argmax(output_sv) # the log probability of a prediction label_log_prob = output_sv[label_index] if (label_log_prob < THRESHOLD): dubious_preds.append((curr_img, label_log_prob)) # comparing predictions and labels, updating metrics, confidence metrics and classification report np_conf_matrix[label_index, pred_index] += 1 y_true.append(label_index) y_pred.append(pred_index) if (images_processed % 500 == 0): print("{} processed up to {}".format( images_processed, images_total)) label_index += 1 report = classification_report(y_true=y_true, y_pred=y_pred) dubious_preds.sort(key=lambda x: x[1]) return report, np_conf_matrix, dubious_preds