def process_classifier(clf, name): print("training {0} with {1} items...".format(name, len(train_set))) classifier = clf.train(train_set) print("done; measuring classifier accuracy...") print("accuracy is {0}".format(str(nltk.classify.accuracy(classifier, test_set)))) print("determining first 5 most valuable features...") print(classifier.show_most_informative_features(5)) print("done; classifying entry #{0} from raw dataset:".format(args.predict_index)) target_item = raw[args.predict_index] predict(classifier, target_item, args.is_binary, args.ne_detection_type)
def train_ann(DATA, LABELS, HIDDEN_LAYER_SIZE, iterations, TEST): for x in range(DATA.shape[1]): print("Data entry " + str(DATA[0][x]) + "&" + str(DATA[1][x]) + " with label " + str(LABELS[0][x])) result = None found = None parameters = helpers.initialize_parameters(DATA, LABELS, HIDDEN_LAYER_SIZE) for i in range(0, iterations): A2, cache = helpers.forward_propagation(DATA, parameters) predict = helpers.predict(A2) print("Predict: " + str(predict)) if (predict == TEST).all(): found = i result = predict break cost = helpers.compute_cost(A2, LABELS) print("Cost: " + str(cost)) grads = helpers.backward_propagation(parameters, cache, DATA, LABELS) parameters = helpers.update_parameters(parameters, grads) print("ANN arrived at conclusion: " + str(result) + " after " + str(found) + " iterations ") return parameters
def get_accuracy(test_list, label_encoder, signDetector): #initialize correct counter to 0 correct = 0 imageset, _ = helpers.getHandSet(test_list, 'Dataset/') #get test data X, Y = get_data(test_list, imageset, 'Dataset/') Y = label_encoder.transform(Y) #total is the number of examples in test data total = len(X) #for each example in test data for i in range(len(X)): #get the model's prediction for that example outp = label_encoder.transform( [helpers.predict(label_encoder, signDetector, X[i])])[0] #if the prediction matched the correct label, increment the correct counter if Y[i] == outp: correct += 1 return correct / total #label_encoder = LabelEncoder().fit(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', # 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y']) # #user_list =['user_3', 'user_4','user_5','user_6','user_7','user_9','user_10'] # #trainlistsize = len(user_list)//2 # #train_list = user_list[:trainlistsize] #test_list = user_list[trainlistsize:] # #signDetector = trainSignDetector(train_list, label_encoder) #matrix = get_confusion_matrix(test_list, label_encoder, signDetector) #accuracy = get_accuracy(test_list, label_encoder, signDetector)
def main(): # flowers/test/1/image_06743.jpg # checkpoints/cp_tmp.pth start_time = time() criterion = get_criterion() in_arg = get_args_predict() device = get_device(in_arg.gpu) model = load_checkpoint(in_arg.checkpoint_path, device) cat_to_name = load_names(in_arg.category_names) top_ps, top_class = predict( image_path=in_arg.image_path, model=model, cat_to_name=cat_to_name, device=device, topk=in_arg.top_k ) print(top_ps) print(top_class) tot_time = time() - start_time print(f"\n** Total Elapsed Runtime: {tot_time:.3f} seconds")
def observe_train(hyper_params, classifier, training_losses, train_X, train_Y, test_X, test_Y): print('[%d/%d]Loss: %.3f' % (hyper_params.currentEpoch + 1, hyper_params.classifier_epoch, np.mean(training_losses))) if ((((hyper_params.currentEpoch + 1) % 10) == 0) | ((hyper_params.currentEpoch + 1) == hyper_params.classifier_epoch)): print('train performance') pred_Y_train = predict(classifier, train_X) print_predict(train_Y, pred_Y_train, hyper_params) if (((hyper_params.currentEpoch + 1) % 5 == 0) | (hyper_params.currentEpoch < 10)): print('test performance') pred_Y = predict(classifier, test_X) print_predict(test_Y, pred_Y, hyper_params)
def infer(self, input): # load preprocessed input inputAsNpArr = self._imageProcessor.loadAndPreprocess(input) # # Run inference im = self._imageProcessor._im result_shape = self._imageProcessor._result_shape # model loading needs to happen after input has been prosessed as # instantianting it requires the size of the input image. model = get_model(self._ctx, 'model/model.onnx', im) conf,result_img,blended_img,raw = predict(inputAsNpArr, result_shape, model, im) return np.array(result_img)
def test(batch_size,img_size,test_images_path,model_path): test_generator = helpers.generate_test_data(batch_size,img_size,preprocess_input,test_images_path) model = helpers.get_model(model_path) yhat = helpers.predict(model,test_generator) yhat = helpers.get_best_guess(yhat) acc_nnet_M = helpers.get_accuracy(yhat,test_generator) class_mapping = helpers.get_class_mapping(test_generator) final_pred = helpers.final_prediction(class_mapping,yhat) df_nnet_M = helpers.create_dataframe(['image','class','nnet_M']) df_nnet_M = helpers.add_records(df_nnet_M,test_generator,final_pred,'nnet_M') return df_nnet_M,acc_nnet_M
def model(X_train, Y_train, X_test, Y_test, num_iter=2000, learn_rate=0.5, print_cost=False): ## PARAMETERS INIT ## w, b = init_with_zeros(X_train.shape[0]) ## GRADIENT DESCENT ## params, grads, costs = optimize(w, b, X_train, Y_train, num_iter, learn_rate, print_cost) ## RETRIEVE PARAMS ## w = params["w"] b = params["b"] ## PREDICTION ## Y_predict_test = predict(w, b, X_test) Y_predict_train = predict(w, b, X_train) print("train accuracy: {} %".format( 100 - np.mean(np.abs(Y_predict_train - Y_train)) * 100)) print( "test accuracy: {} %".format(100 - np.mean(np.abs(Y_predict_test - Y_test)) * 100)) d = { "costs": costs, "Y_predict_test": Y_predict_test, "Y_predict_train": Y_predict_train, "w": w, "b": b, "learn_rate": learn_rate, "num_iter": num_iter } return d
def get_confusion_matrix(test_list, label_encoder, signDetector): #initialize all matrix values to 0 matrix = np.zeros((24, 24)) imageset, _ = helpers.getHandSet(test_list, 'Dataset/') #get test data X, Y = get_data(test_list, imageset, 'Dataset/') Y = label_encoder.transform(Y) #for each example in test data for i in range(len(X)): #get the model's prediction for that example outp = label_encoder.transform( [helpers.predict(label_encoder, signDetector, X[i])])[0] #increment appropriate cell in matrix matrix[int(Y[i])][int(outp)] += 1 return matrix
def main(arguments): """ Main function to run the stock market tester """ if arguments.download_data: download_all_data() print("Downloaded Data!") # clean_data() # print("Cleaned Data") return if arguments.download_stock is not None: print(arguments.download_stock) install_data(arguments.download_stock) return DAYS_OUT = 5 covid_data = get_covid_data() model = Historic() train_data, test_data = get_all_stocks(covid_data) losses = [] for i in range(0, model.num_epochs): train_loss = train(model, train_data, DAYS_OUT) print("EPOCH {} training loss: {}".format(i, train_loss)) test_loss = test(model, test_data, DAYS_OUT) print("EPOCH {} Test loss: {}".format(i, test_loss)) losses.append(test_loss) visualize_loss(losses) print("Last 10 Epochs Average MAPE: {}".format(sum(losses[-10:]) / 10)) sp_data = pd.read_csv("../data/^GSPC.csv") sp_data = join(sp_data, covid_data) sp_data = normalize(sp_data) base_data = sp_data.iloc[:-DAYS_OUT] base_data = tf.convert_to_tensor(base_data) base_data = tf.expand_dims(base_data, 0) labels = sp_data.iloc[DAYS_OUT:] labels = labels["Adjusted Close"] labels = labels.values.tolist() # print(labels) predictions = predict(model, base_data) # print(len(predictions)) visualize_predictions(predictions, labels)
def get_accuracy(test_list, label_encoder, signDetector): #initialize correct counter to 0 correct = 0 imageset, _ = helpers.getHandSet(test_list, 'Dataset/') #get test data X, Y = get_data(test_list, imageset, 'Dataset/') Y = label_encoder.transform(Y) #total is the number of examples in test data total = len(X) #for each example in test data for i in range(len(X)): #get the model's prediction for that example outp = label_encoder.transform( [helpers.predict(label_encoder, signDetector, X[i])])[0] #if the prediction matched the correct label, increment the correct counter if Y[i] == outp: correct += 1 return correct / total
model.exact = True exact_time = benchmark_predict(model, dataloaders, mode='valid') print( json.dumps({ "exact_time": exact_time, "approx_time": approx_time, "approx_speedup": exact_time / approx_time, })) else: # Exact accuracy model.exact = True preds, _ = predict(model, dataloaders, mode='valid') # >> # top_k = fast_topk(preds, X_train) # exact_precisions = precision_at_ks(X_test, top_k) # -- top_k = fast_topk(preds) exact_precisions = precision_at_ks(X_train, top_k) # << # Approx accuracy model.exact = False model.approx_linear.dense = True preds, _ = predict(model, dataloaders, mode='valid') # >> # top_k = fast_topk(preds, X_train)
image_path = args.image_path model_file = args.saved_model top_k = args.top_k class_names = {} if args.category_names: category_names = args.category_names with open(category_names, 'r') as f: class_names = json.load(f) #dict #load model model = tf.keras.models.load_model( model_file, custom_objects={'KerasLayer': hub.KerasLayer}) #predict probs, classes = h.predict(image_path, model, top_k) #print out results print(f"probabilities: {probs}") if class_names: classes = [class_names[str(n)] for n in classes] print(f"classes: {classes}") """ for testing and debugging python predict.py ./test_images/wild_pansy.jpg test_model.h5 python predict.py ./test_images/wild_pansy.jpg test_model.h5 --top_k 9 --category_names label_map.json python predict.py ./test_images/wild_pansy.jpg test_model.h5 --top_k 2 python predict.py ./test_images/wild_pansy.jpg test_model.h5 --category_names label_map.json python predict.py ./test_images/hard-leaved_pocket_orchid.jpg image_net_oxford_flowers_20200511_174947_0.81.h5 python predict.py ./test_images/hard-leaved_pocket_orchid.jpg image_net_oxford_flowers_20200511_174947_0.81.h5 --top_k 10
#!/usr/bin/env python3 # PROGRAMMER: Diego da Costa Oliveira # DATE CREATED: Mar, 25, 2019. # REVISED DATE: # PURPOSE: Uses a trained network to predict the class for an input image # BASIC USAGE: python predict.py /path/to/image checkpoint # # Parameters: # 1. Return top K most likely cases as --top_k with default value 5 # 2. Use a mapping of categories to real names as --category_names with default value 'cat_to_name.json' # 3. Set GPU usage (CUDA) as --gpu from helpers import get_input_args_predict, predict # get input args in_arg = get_input_args_predict() # predict image top_p_list, flower_names = predict(in_arg.path_to_image, in_arg.path_to_checkpoint, in_arg.top_k, in_arg.category_names, in_arg.gpu) # print flower name and associated probability print(f'Flower name and associated probability:\n') for i in range(len(top_p_list)): print(f'{i+1}. {flower_names[i]} - {top_p_list[i]:.3f}')
input('Program paused. Press enter to continue.\n') ## ================= Part 9: Visualize Weights ================= # You can now "visualize" what the neural network is learning by # displaying the hidden units to see what features they are capturing in # the data. print('\nVisualizing Neural Network... \n') displayData(Theta1[:, 1:]) input('Program paused. Press enter to continue.\n') ## ================= Part 10: Implement Predict ================= # After training the neural network, we would like to use it to predict # the labels. You will now implement the "predict" function to use the # neural network to predict the labels of the training set. This lets # you compute the training set accuracy. pred = predict(Theta1, Theta2, X) print('Training Set Accuracy: {:f}'.format((np.mean(pred == y) * 100)))
else: assert False, "Unhandled option " + o # Train if not FOREST: train_tree(FILE, 'model', output='quality', numvalid=200, loss=LOSS, min_leaf=MIN_LEAF, max_depth=15, min_depth=3, loss_prob=True, verbose=VERBOSE) else: train_forest(FILE, 'model', output='quality', numvalid=200, loss=LOSS, min_leaf=MIN_LEAF, num_trees=32, dropout=0.2, max_depth=15, min_depth=3, loss_prob=True, verbose=VERBOSE) predict('model', TEST_FILE, 'output.csv', 'quality')
# Try the following values of lambda (0, 1, 10, 100). # # How does the decision boundary change when you vary lambda? How does # the training set accuracy vary? # Initialize fitting parameters initial_theta = np.zeros((n, 1)) # Set regularization parameter lambda to 1 (you should vary this) lambda_reg = 1 # Run fmin_bfgs to obtain the optimal theta # This function returns theta and the cost myargs = (X, y, lambda_reg) theta = fmin_bfgs(costFunctionReg, x0=initial_theta, args=myargs) # Plot Boundary plotDecisionBoundary(theta, X, y) # # Labels, title and Legend plt.xlabel('Microchip Test 1') plt.ylabel('Microchip Test 2') plt.title('lambda = {:f}'.format(lambda_reg)) # % Compute accuracy on our training set p = predict(theta, X) print('Train Accuracy: {:f}'.format(np.mean(p == y) * 100)) input('Program paused. Press enter to continue.\n')
model, loss_function, conllu_sentences[args.language]['test'], args.language) print(test_loss) elif args.mode == 'predict': prediction_file = open(RESULTS_RELATIVE_PATH, mode='a', encoding='UTF-8') formatted_test_file = open(FORMATTED_TEST_RELATIVE_PATH, mode='a', encoding='UTF-8') for conllu_sentence in conllu_sentences[args.language]['test']: # save formatted version fo test file formatted_test_file.write(str(conllu_sentence)) formatted_test_file.flush() # predict arc scores and labels predicted_arcs, predicted_labels = predict(model, conllu_sentence, args.language) # generate predicted sentence for word in conllu_sentence.words: word.HEAD = str(predicted_arcs[int(word.ID)]) word.DEPREL = str(em.i2l[args.language][predicted_labels[int( word.ID)]]) word.DEPS = word.HEAD + ':' + word.DEPREL prediction_file.write(str(conllu_sentence)) prediction_file.flush() # DEBUG # print([em.i2l[l] for l in np.argmax(nn.Softmax()(train_label_scores).data.numpy(), axis=1)]) # print([em.i2l[l] for l in np.argmax(nn.Softmax()(validate_label_scores).data.numpy(), axis=1)]) # print(conllu_sentences_train.get_label_list()) # plot_matrix(nn.Softmax()(train_label_scores))
def predict_ann(DATA, parameters): A2, cache = helpers.forward_propagation(DATA, parameters) predict = helpers.predict(A2) return predict
plotDecisionBoundary(theta, X_padded, y) plt.hold(False) # prevents further drawing on plot plt.show(block=False) input('Program paused. Press enter to continue.\n') ## ============== Part 4: Predict and Accuracies ============== # After learning the parameters, you'll like to use it to predict the outcomes # on unseen data. In this part, you will use the logistic regression model # to predict the probability that a student with score 45 on exam 1 and # score 85 on exam 2 will be admitted. # # Furthermore, you will compute the training and test set accuracies of # our model. # # Predict probability for a student with score 45 on exam 1 # and score 85 on exam 2 prob = sigmoid(np.dot(np.array([1, 45, 85]), theta)) print( 'For a student with scores 45 and 85, we predict an admission probability of {:f}' .format(prob)) # Compute accuracy on our training set p = predict(theta, X_padded) print('Train Accuracy: {:f}'.format(np.mean(p == y) * 100)) input('Program paused. Press enter to continue.\n')
def post(self): prediction = helpers.predict(api.payload['name']) result = ['Male', 'Female'] return {'result': result[prediction]}
hyper_params.classifier_input_dim = train_X.shape[1] hyper_params.classifier_output_dim = train_Y.shape[1] hyper_params.model_name = 'DSLL' title1 = { dataset, 'N = {}'.format(hyper_params.N), 'D = {}'.format(hyper_params.D), 'M = {}'.format(hyper_params.M), 'N_test = {}'.format(hyper_params.N_test) } print(title1) # Streaming Label Distillation print( '\n****************** Streaming Feature Distillation ******************\n') print('load past-label classifer\n') classifier_W_m = torch.load('models/past-label-classifier') classifier_W_m.eval() soft_train_Y = predict(classifier_W_m, train_X) # sigmoid soft_test_Y = predict(classifier_W_m, test_X) relu_hook_train = LayerActivations(classifier_W_m.W_m, 2) output = classifier_W_m(torch.FloatTensor(train_X)) relu_hook_train.remove() relu_out_train = relu_hook_train.features relu_hook_test = LayerActivations(classifier_W_m.W_m, 2) output = classifier_W_m(torch.FloatTensor(test_X)) relu_hook_test.remove() relu_out_test = relu_hook_test.features hyper_params.KD_epoch = 10 featureKD_model = train_KD(hyper_params, train_X, relu_out_train, test_X, relu_out_test)
elif o in ("-l", "--min_leaf_size"): MIN_LEAF = int(a) else: assert False, "Unhandled option " + o # Train if not FOREST: train_tree(FILE, 'model', output='output', numvalid=150, loss=LOSS, min_leaf=MIN_LEAF, max_depth=15, min_depth=3, verbose=VERBOSE) else: train_forest(FILE, 'model', output='output', numvalid=150, loss=LOSS, min_leaf=MIN_LEAF, num_trees=32, dropout=0.2, max_depth=15, min_depth=3, verbose=VERBOSE) predict('model', TEST_FILE, 'output.csv', 'output')
parse.add_argument('--category_names', default='cat_to_name.json', type=str, help='Mapping of categories to real names') parse.add_argument('--checkpoint', default='network_checkpoint.pth', type=str, help='Checkpoint to start network at') parse.add_argument('--top_k', default=5, type=int) # parse the argument to get variables parse_args = parse.parse_args() image_path = parse_args.image_path category_names = parse_args.category_names checkpoint = parse_args.checkpoint top_k = parse_args.top_k # Load the NN from the checkpoin nn_model = load_nn_checkpoint(checkpoint) # get categories cat_to_name = label_mapping(category_names) ps, labels = predict(image_path, nn_model, top_k) label_names = [cat_to_name[x] for x in labels] for x in range(len(label_names)): print("Flower: {} Probability: {:0.2f}%".format(label_names[x], ps[x] * 100))
def analyze(): _text = [request.form['inputText']] predictions = predict(_text) return render_template("results.html", predictions=predictions)
def main(**kwargs): seednr = 125 #123 random.seed(seednr) torch.manual_seed(seednr) torch.cuda.manual_seed(seednr) np.random.seed(seednr) random.seed(seednr) torch.backends.cudnn.enabled = False torch.backends.cudnn.deterministic = True ############################################# IMPORTANT VALUES FOR DATASETS AND ACTIVE LEARNING ############################################# # datasets = ['yeast', 'nus', 'mirfl','leda'] # dataset = datasets[3] dataset = kwargs['dataset'] print(dataset) print(kwargs) # use active learning yes or no use_al = kwargs['use_al'] ############################################# IMPORTANT VALUES FOR DATASETS AND ACTIVE LEARNING ############################################# # lower split is less old labels used, higher split is more old labels used split = kwargs['label_split'] hyper_params = get_params(dataset, **kwargs) # with open(f'npresults/1test_yeast_{hyper_params.batch_size}_f1micro.npy', 'wb') as f: # # np.save(f, np.hstack(np.array(full_measurements[f'{altype}'])[:,0])) # np.save(f, [np.array([9,8,7,6,5]), np.array([1,2,3,4,5])]) # # np.save(f, ) # with open(f'npresults/1test_yeast_{hyper_params.batch_size}_f1micro.npy', 'rb') as f: # a = np.load(f) # with open(f'npresults/1test_yeast_{hyper_params.batch_size}_f1micro_aucmicro_recall.npy', 'rb') as f: # a = np.load(f, allow_pickle=True) # print(a) # print("succes") # exit() # train_Y/test_Y are old y labels, test_Y_rest are new label indices. train_X, train_Y, train_Y_rest, test_X, test_Y, test_Y_rest = load_dataset( dataset, split, hyper_params) print(f"Working with the {dataset} dataset") train_X_tensor = torch.from_numpy(train_X).float() train_Y_tensor = torch.from_numpy(train_Y).float() train_data = TensorDataset(train_X_tensor, train_Y_tensor) print('read dataset') hyper_params.dataset_name = dataset hyper_params.N = train_X.shape[0] hyper_params.D = train_X.shape[1] hyper_params.M_full = train_Y.shape[1] + train_Y_rest.shape[1] hyper_params.M = train_Y.shape[1] hyper_params.N_test = test_X.shape[0] hyper_params.label_mapping_input_dim = train_Y.shape[1] hyper_params.classifier_input_dim = train_X.shape[1] hyper_params.classifier_output_dim = train_Y.shape[1] hyper_params.model_name = 'DSLL' hyper_params.KD_input_dim = train_X.shape[1] hyper_params.KD_output_dim = 200 hyper_params.label_mapping_output_dim = train_Y.shape[1] hyper_params.label_representation_output_dim = train_Y.shape[1] if dataset == "nus": hyper_params.classifier_hidden1 = 512 hyper_params.classifier_hidden2 = 256 title1 = { dataset, 'N = {}'.format(hyper_params.N), 'D = {}'.format(hyper_params.D), 'M = {}'.format(hyper_params.M), 'N_test = {}'.format(hyper_params.N_test) } print(title1) print( '\n****************** Streaming Feature Distillation ******************\n' ) # model_old = torch.load('models/past-label-classifier') # import pretrained model with old labels or train it from scratch (takes a few minutes) if dataset == "yeast": try: print('loading past-label classifer\n') classifier_W_m = torch.load( 'models/past-label-classifier-upd2').to(hyper_params.device) except IOError: print('learning past-label classifer\n') classifier_W_m = _classifier2(hyper_params) classifier_W_m = train_new(classifier_W_m, train_X, train_Y, hyper_params) torch.save(classifier_W_m, 'models/past-label-classifier-yeast_v02') elif dataset == "leda": try: print('loading past-label classifer\n') classifier_W_m = torch.load( 'models/past-label-classifier-leda_v1').to(hyper_params.device) except IOError: print('learning past-label classifer\n') classifier_W_m = _classifier2(hyper_params) classifier_W_m = train_new(classifier_W_m, train_X, train_Y, hyper_params) else: print('learning past-label classifer\n') classifier_W_m = _classifier2(hyper_params) classifier_W_m = train_new(classifier_W_m, train_X, train_Y, hyper_params) # torch.save(classifier_W_m, 'models/past-label-classifier-leda_v1') classifier_W_m.eval() soft_train_Y = predict(classifier_W_m, train_X) # sigmoid of forward pass # test X is predicted using the model trained on the train_x and train_y soft_test_Y = predict(classifier_W_m, test_X) # hook is meant to get the get the values from the ReLU activation functions (not the weights of the activation function) relu_hook_train = LayerActivations(classifier_W_m.W_m, 2) # relu_hook_train = LayerActivations(classifier_W_m.label_mapping, 1) # train_X = torch.FloatTensor(train_X).to(hyper_params.device) output = classifier_W_m(torch.FloatTensor(train_X).to(hyper_params.device)) # output = classifier_W_m(train_X) relu_hook_train.remove() # values after the ReLU activation function of the train_X relu_out_train = relu_hook_train.features relu_hook_test = LayerActivations(classifier_W_m.W_m, 2) # test_X = torch.FloatTensor(test_X).to(hyper_params.device) output = classifier_W_m(torch.FloatTensor(test_X).to(hyper_params.device)) # output = classifier_W_m(test_X) relu_hook_test.remove() # values after the ReLU activation function of test_X relu_out_test = relu_hook_test.features # learn to predict the feature values after the relu function of classifier_W_m # hyper_params.KD_output_dim is same dim as the relu dimension --> hyper_params.classifier_hidden1 print( '\n****************** TRAIN KNOWLEDGE DISTILLATION ******************\n' ) if dataset == "yeast": try: print('loading past-label classifer\n') featureKD_model = torch.load( 'models/knowledge_distillation-yeast_v02').to( hyper_params.device) except IOError: print('learning past-label classifer\n') featureKD_model = train_KD(hyper_params, train_X, relu_out_train) torch.save(featureKD_model, 'models/knowledge_distillation-yeast_v02') else: featureKD_model = train_KD(hyper_params, train_X, relu_out_train) # , test_X, relu_out_test # Streaming Label Mapping print('\n****************** Streaming Label Mapping ******************\n') hyper_params.label_mapping_hidden1 = 200 hyper_params.label_mapping_hidden2 = 0 hyper_params.loss = 'correlation_aware' # label correlation-aware loss device = hyper_params.device # test_Y_rest is unknown, only the test_Y is known rest_iterations = train_Y_rest.shape[1] if dataset == "nus": start_iterations = rest_iterations - 5 if dataset == "mirfl": start_iterations = rest_iterations - 5 if dataset == "leda": start_iterations = 0 else: start_iterations = 4 for i in range(start_iterations, rest_iterations): print(f"New Labels number {i} from {rest_iterations}") # these are the to be labelled train_Y_new = train_Y_rest[:, :] #i+1 test_Y_new = test_Y_rest[:, :] # define shapes hyper_params.M_new = train_Y_new.shape[1] hyper_params.label_mapping_output_dim = train_Y_new.shape[1] hyper_params.label_representation_output_dim = train_Y_new.shape[1] print('apply label mapping') # Train_Y is available, soft train Y is predicted by the old classifier at start. Soft test Y is predicted by old class # model_old = torch.load('models/{}mapping'.format(i+2)) # pepijn model: if dataset == "yeast" and split == 0.7: print("loading 0.7 split model") mapping_model = torch.load(f'models/{i+1}mapping-pep-{split}-upd', map_location=torch.device(device)) else: # y is half predicted with classifier_W_m and half original y # the thing that is predicted are the NEW LABELS USING THE PREVIOUS LABELS mapping_model = train_S_label_mapping(hyper_params, 0.5 * train_Y + 0.5 * soft_train_Y, train_Y_new) # soft_test_Y # torch.save(mapping_model, f'models/{i+1}mapping-pep-{split}-upd') # predicted mapping predicting the new labels using the previous labels mapping_train_Y_new = predict(mapping_model, 0.1 * soft_train_Y + 0.9 * train_Y) mapping_model.eval() # prediction of new test labels mapping_test_Y_new = predict(mapping_model, soft_test_Y) # Senior Student mapping_train_Y_new_tensor = torch.from_numpy( mapping_train_Y_new).float() train_Y_new_tensor = torch.from_numpy(train_Y_new).float() train_Y_old_tensor = torch.from_numpy(train_Y).float() # if we use active leanring we cant use a loader so the whole data needs to be loaded in memory!!!! # possible fix is to use random set each time in loader which changes if use_al == True: seed_pool = CustomActiveLearningDataset( train_X_tensor, mapping_train_Y_new_tensor, train_Y_old_tensor, train_Y_new_tensor, hyper_params.batch_size) test_ds = (torch.from_numpy(test_X).float(), torch.from_numpy(mapping_test_Y_new).float(), torch.from_numpy(test_Y).float(), torch.from_numpy(test_Y_new).float()) # print(train_X_tensor.shape ,train_Y_old_tensor.shape, train_Y_new_tensor.shape) # print(torch.from_numpy(test_X).float().shape, torch.from_numpy(test_Y).float().shape, torch.from_numpy(test_Y_rest).float().shape) # exit() else: train_data_DSLL = CustomDataset(train_X_tensor, mapping_train_Y_new_tensor, train_Y_new_tensor) hyper_params.classifier_dropout = 0.5 hyper_params.classifier_L2 = 1e-08 hyper_params.batchNorm = False hyper_params.changeloss = False hyper_params.loss = 'correlation_aware' # correlation_aware correlation_entropy entropy # run the DSLL model with one batch size batch_size = hyper_params.batch_size # if true then go into the AL mode if use_al == True: # train_BR_CC(train_X, train_Y, train_Y_rest, test_X, test_Y, test_Y_rest, seed_pool, seednr) # CC_dataset = (train_X, train_Y, train_Y_rest, test_X, test_Y, test_Y_rest) # exit() myclass = AL_train_DSLL_model(hyper_params, featureKD_model, train_X, train_Y, mapping_train_Y_new, train_Y_new, test_X, mapping_test_Y_new, test_Y_new, seed_pool, test_ds, use_al, seednr) # rain_X).float() # full_x = get_full_X(hyper_params).to_numpy() # soft_full_y = predict(classifier_W_m, full_x) # full_y_mapping_tensor = predict(mapping_model, soft_full_y) # full_y_mapping_tensor = torch.from_numpy(full_y_mapping_tensor).float() # full_x_tensor = torch.from_numpy(full_x).float() # predictions = predict_integrated(myclass, full_x_tensor, full_y_mapping_tensor) # xls = pd.ExcelFile('LEDAExport_20200224_verrijkt_2_voorPepijn.xlsx') # df = pd.read_excel(xls, 'LEDA_20200224') # df['Ca'] = pd.Series(predictions.tolist()) # pd.set_option('display.max_columns', None) # pd.set_option('display.expand_frame_repr', False) # pd.set_option('max_colwidth', -1) # print(df['Ca']) # df = df.applymap(lambda x: x.encode('unicode_escape').decode('utf-8') if isinstance(x, str) else x) # df.to_excel("pepijn_save.xlsx") # df.to_excel("pepijn_save2.xlsx", engine='xlsxwriter') else: # hyper_params.classifier_epoch = int(40 + 1 * hyper_params.batch_size) train_DSLL_loader = DataLoader(dataset=train_data_DSLL, batch_size=hyper_params.batch_size, shuffle=True, num_workers=5) hyper_params.label_representation_hidden1 = 200 train_DSLL_model(hyper_params, featureKD_model, train_X, train_Y, mapping_train_Y_new, train_Y_new, test_X, mapping_test_Y_new, test_Y_new, train_DSLL_loader, use_al) break