def test_rouge(): data_paths = ("arxiv/inputs/", "arxiv/human-abstracts/", "arxiv/labels/") glove_dir = "embeddings" embedding_size = 300 model_path = sys.argv[1] weight_matrix, word2idx = create_embeddings(f"{glove_dir}/glove.6B.{embedding_size}d.txt") test_set = load_data(word2idx, data_paths, data_type="test") test_docs = load_test_docs(data_paths, data_type="test") print_rouge(model_path, test_set, test_docs)
def test_model(): # Perform a forward cycle with fictitious data model = ExtSummModel.load("model/extsumm.bin") data_paths = ("arxiv/inputs/", "arxiv/human-abstracts/", "arxiv/labels/") # (doc, start_end, abstract, label) test_set = load_data(data_paths, data_type="test") # find the test accuracy of the model accuracy = model.predict_and_eval(test_set) print(f"Testing accuracy: {accuracy}")
def dtree_predict(params): global boost_dt global train_dtree if train_dtree == False and not os.path.isfile('./models/dtree.pkl'): boost_dt = dtree_train() else: boost_dt = load_data('./models/dtree.pkl') print('DTree After:', boost_dt.coef_) features = prepare_test_features(params) pred = boost_dt.predict(features) return pred[0]
def svm_predict(params): global svm_clf global train_svm if train_svm == False and not os.path.isfile('./models/svm.pkl'): svm_clf = svm_train() else: svm_clf = load_data('./models/svm.pkl') print('SVM After:', svm_clf.coef_) features = prepare_test_features(params) pred = svm_clf.predict(features) return pred[0]
def svm_test(): global svm_clf global train_svm if train_svm == False and not os.path.isfile('./models/svm.pkl'): svm_train() else: svm_clf = load_data('./models/svm.pkl') features, test_modified = prepare_test_dataset() svm_test = svm_clf.predict(features) svm_df = pd.DataFrame(test_modified['PassengerId']).join(pd.DataFrame(svm_test, columns=['Survived'])) display(svm_df.head()) svm_df.to_csv(path_or_buf='./csv/svm_predictions.csv', index=False)
def dtree_test(): global boost_dt global train_dtree if train_dtree == False and not os.path.isfile('./models/dtree.pkl'): boost_dt = dtree_train() else: boost_dt = load_data('./models/dtree.pkl') features, test_modified = prepare_test_dataset() dt_test = boost_dt.predict(features) dt_df = pd.DataFrame(test_modified['PassengerId']).join(pd.DataFrame(dt_test, columns=['Survived'])) display(dt_df.head()) dt_df.to_csv(path_or_buf='./csv/dt_predictions.csv', index=False)
def predict_model(image_directory, labels_path, season, asset_type, model, crop): labels = pd.read_csv(labels_path, index_col=0) if model == 'InceptionV3': model_name = model img_width = 299 img_height = 299 model_class = InceptionV3 preprocess_func = iv3_preproc freeze_depth = 172 elif model == 'VGG16': model_name = model img_width = 224 img_height = 224 model_class = VGG16 preprocess_func = lambda x: x freeze_depth = 25 else: raise Exception('{} is an unsupported model.'.format(model)) output_base = '{}_{}_{}_{}'.format(model_name, asset_type, season, crop) model_path = os.path.join(PROJ_ROOT, 'models', output_base + '_keras.h5') model = load_model(model_path) images, targets, exisiting_image_ids = load_data(labels, img_width, img_height, asset_type, image_directory, season, '{}_yield'.format(crop)) preds = model.predict(images) print(preds.shape) predicted_images = labels.loc[exisiting_image_ids, :].copy() predicted_images['prediction'] = preds preds_out_path = os.path.join(PROJ_ROOT, 'models', output_base + '_preds.csv') predicted_images.to_csv(preds_out_path) geojson_preds_out = os.path.join(PROJ_ROOT, 'models', output_base + '_preds.geojson') write_geojson_predictions(image_directory, "geojson_epsg4326_{}.geojson".format(crop), crop, predicted_images, geojson_preds_out)
def main(): # Parse input data args = parsing_inputs() # Obtain the dataloaders and a dictionary class_to_idx we will use during prediction trainloader, validloader, testloader, class_to_idx = load_data(args) # Now we download the model based on the input and select the device we will train it on possible_inputs = {'vgg16': 25088, 'alexnet': 9216} model, device = build_model(possible_inputs, args) # The next step is to define the criterion and train the model criterion = nn.NLLLoss() train(model, device, args, trainloader, validloader, criterion) # We then perform a validation test on new unseen data with torch.no_grad(): validation_test(model, testloader, device, criterion) # Finally we save the checkpoint save_check(args, model, class_to_idx, possible_inputs)
def main(): args = parse_args() #args.dataset_size = 100000 print('-----------------------------------------------------------') print('Dataset size: ', args.dataset_size) args.batch_size = 1 gender = 'male' # female print('Gender: ', gender) device = torch.device("cuda:%d" % args.gpu if torch.cuda.is_available() else "cpu") torch.cuda.set_device(device) parent_dic = "/home/yifu/workspace/data/synthetic/noise_free" print('Data path: ', parent_dic) dataloader = load_data(args.dataset_size, parent_dic, args) model = myresnet50(device, num_output=args.num_output, use_pretrained=True, num_views=args.num_views) # save_name = 'out:%d_data:%d_par_w:%.1f.pth'%(args.num_output,args.dataset_size, args.par_loss_weight) # folder: network weights parent_dic = "/home/yifu/workspace/data/test/model_1" save_name = 'data:%d.pth' % (100000) save_path = os.path.join(parent_dic, save_name) print('Load state dict from save path: ', save_path) model.load_state_dict(torch.load(save_path)) print('-----------------------------------------------------------') if raw_input('Confirm the above setting? (yes/no): ') != 'yes': print('Terminated') exit() print('validation starts') print('------------------------') path = parent_dic evaluate_model(model, dataloader, args.num_views, path, device, args)
def main(): filepath = './saved_models/model' model = load_model(filepath) # Get image arrays and labels for all image files images, labels = load_data(sys.argv[1]) # Split data into training and testing sets labels = tf.keras.utils.to_categorical(list(labels)) x_train, x_test, y_train, y_test = train_test_split(np.array(images), np.array(labels), test_size=TEST_SIZE) for i in range(len(x_test[:3])): plt.imsave(f"Image #{i}.jpg", x_test[i]) predictions = model.predict(x_test[:1]) print(x_test[:1].shape, x_test[0].shape) classes = np.argmax(predictions, -1) truth_table = {0: "Atom", 1: "Sanay", 2: "Aarav"} for i in range(len(classes)): print(f"Image #{i} is {truth_table[classes[i]]}")
def main(): args = parse_args() args.dataset_size = 100000 args.batch_size = 1 args.num_output = 82 gender = 'male' # female m = load_model('../../models/basicModel_%s_lbs_10_207_0_v1.0.0.pkl' % gender[0]) parent_dic = "/home/yifu/workspace/data_smpl/A_pose_5/male/noise_free" dataloader = load_data(args.dataset_size, parent_dic, args) device = torch.device("cuda:%d" % args.gpu if torch.cuda.is_available() else "cpu") model = myresnet50(device, num_output=args.num_output, use_pretrained=True, num_views=args.num_views) #model = myresnet50(num_output=80) save_name = 'trained_resnet_%d_%d.pth' % (args.num_output, args.dataset_size) path = os.path.join(parent_dic, save_name) model.load_state_dict(torch.load(path)) path = parent_dic evaluate_model(m, model, dataloader, args.num_views, path, device, args)
self.crop_to = crop_to def _get_batches_of_transformed_samples(self, index_array): batch_x, batch_y = super()._get_batches_of_transformed_samples( index_array) if self.crop_to > 0: batch_x = self.image_data_generator.crop_data_bacth(batch_x) return batch_x, batch_y if __name__ == '__main__': from train_model import load_data data_dir = '/home/skliff13/work/PTD_Xray/datasets/tuberculosis/v2.2' data_shape = (256, 256) (x_train, y_train), (x_val, y_val) = load_data(data_dir, data_shape) train_gen = ModifiedDataGenerator(rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, rescale=1., zoom_range=0.2, fill_mode='nearest', cval=0, crop_to=224) for q, v in train_gen.flow(x_train, y_train, batch_size=8): print(q.shape) exit(14)
print("Stacking started with run sequence:", run_sequence, "\n") # if the stacking root path doesn't exist, create it if not os.path.exists(STACK_PATH): os.makedirs(STACK_PATH) train_model.MODEL_PATH = STACK_PATH predict.MODEL_PATH = STACK_PATH for f, tr, te in zip([True, False], [TRAIN30_DATA_FILE, TRAIN8_DATA_FILE], [TEST30_DATA_FILE, TEST8_DATA_FILE]): #for f, tr, te in zip([False, True], [TRAIN8_DATA_FILE, TRAIN30_DATA_FILE], [TEST8_DATA_FILE, TEST30_DATA_FILE]): # load the training and validation datasets train, validation = train_model.load_data( pickle_path=PICKLE_PATH, train_file=tr, validation_file=VALIDATION_DATA_FILE, use_validation=USE_VALIDATION, verbose=False) # Load the test dataset (required for prediction calls) test, ids, overlap = predict.load_data(pickle_path=PICKLE_PATH, test_file=te, id_file=TEST_IDS_FILE, overlap_file=OVERLAP_FILE, verbose=False) # perform KFold stacking of N models kfold_stack(train=train, validation=validation, kfold_splits=K_SPLITS, model_names=MODEL_NAMES,
def deep_extraction(): # extract deep features model = tf.keras.models.load_model('./models/extraction_model') new_model = tf.keras.models.Model(inputs=model.input, outputs=model.layers[6].output) deep_features = new_model.predict_on_batch(windows) return deep_features if __name__ == "__main__": filtered_path = './dataset/filtered' extracted_path = './dataset/extracted' # load each dataset and their labels mu_windows, mu_labels = load_data(f'{filtered_path}/move_up.npy', 0) md_windows, md_labels = load_data(f'{filtered_path}/move_down.npy', 1) nm_windows, nm_labels = load_data(f'{filtered_path}/no_movement.npy', 2) windows, labels = balance_datasets([mu_windows, md_windows, nm_windows], [mu_labels, md_labels, nm_labels]) # concatenate data windows = windows[0] + windows[1] + windows[2] labels = np.asarray(labels[0] + labels[1] + labels[2]) # form np array windows = np.array([window_df_to_array(window) for window in windows]) np.save(f'{extracted_path}/windows.npy', windows) print(windows[1]) print(windows.shape)