def predict(self, model_path, test_txt_path, prediction_dir, *args, **kwargs): ''' Args: data: Formatted test data to be predicted. Returns: predictions: [tuple,...], i.e. list of tuples. ''' predict.predict_model(model_path, test_txt_path, prediction_dir) fname = os.path.splitext(os.path.basename(test_txt_path))[0] + '.' + 'con' print ("Prediction Completed.") return prediction_dir + fname
def predict_models(model_names, stack_path, pickle_path, normalize_labels, run_sequence, full=True, verbose=False): if full: model_suffix = "_30" else: model_suffix = "_8" print("".join( ["\n", "=" * 50, "\nFull Data Prediction Phase\n", "=" * 50, "\n"])) for model_name in model_names: print("\n", "-" * 50, "\n MODEL: %s\n" % "".join([model_name, model_suffix]), "-" * 50, "\n") predict.MODEL_NAME = model_name # derive the model performance file location output_path = str(stack_path).replace("\\", "/").strip() if not output_path.endswith('/'): output_path = "".join((output_path, "/")) if not os.path.exists(output_path): raise RuntimeError( "Cannot predict for model '%s' - output path '%s' does not exist." % (model_name, output_path)) predict_file = "".join([ output_path, "PREDICT_", model_name, model_suffix, "_", run_sequence, ".csv" ]) # predict on full dataset trained model pred = predict.predict_model(model_path=stack_path, pickle_path=pickle_path, model_name=model_name, normalize_labels=normalize_labels, test=test, ids=ids, overlap=overlap, predict_file=predict_file, skip_output=True, skip_overlap=False, full=full, verbose=verbose) print("Model '%s' full data prediction complete.\n" % model_name) print("".join([ "\n", "=" * 50, "\nFull Data Prediction Phase Complete\n", "=" * 50, "\n" ])) return
def showFunction(contract_address, function_hash): function = get_contract_function(contract_address, function_hash) functions_exact = get_functions_in_contracts(function['tree_hash']) function_sources = contract_function_code(contract_address, function_hash) top_probs, top_fhashes = predict_model(function['tree']) top_funcs = [get_function(h) for h in top_fhashes] functions_prediction = sorted(zip(top_probs, top_funcs), key=itemgetter(0), reverse=True) return render_template( 'function.html', contract_address = contract_address, function_hash = function_hash, function = function, functions_exact = functions_exact, functions_prediction = functions_prediction, function_sources = function_sources)
if __name__ == '__main__': input_option = read_args().parse_args() input_help = read_args().print_help() commits = extract_commit(path_file=input_option.data) commits = reformat_commit_code(commits=commits, num_file=1, num_hunk=input_option.code_hunk, num_loc=input_option.code_line, num_leng=input_option.code_length) if input_option.train is True: train_model(commits=commits, params=input_option) print '--------------------------------------------------------------------------------' print '--------------------------Finish the training process---------------------------' print '--------------------------------------------------------------------------------' exit() elif input_option.predict is True: predict_model(commits=commits, params=input_option) print '--------------------------------------------------------------------------------' print '--------------------------Finish the prediction---------------------------------' print '--------------------------------------------------------------------------------' exit() else: print '--------------------------------------------------------------------------------' print 'Something wrongs with your command, please write -h to see the usage of PatchNet' print '--------------------------------------------------------------------------------' exit()
def main(path): images, frames = preprocessing_file(path) result = predict_model(images, frames) return result
val_dataset = datagen.flow(train_images, train_labels_one_hot, subset='validation') print("IS_ORIGINAL_TRAINABLE: ", IS_ORIGINAL_TRAINABLE) print("NUM_EPOCHS: ", NUM_EPOCHS) print("BATCH_SIZE: ", BATCH_SIZE) print("Training densenet") if (IS_ORIGINAL_TRAINABLE): # reseting the model dense_net.set_weights(old_weights_dense_net) train.fit_model(dense_net, NUM_EPOCHS, BATCH_SIZE, IS_ORIGINAL_TRAINABLE, train_dataset, val_dataset, test_images, test_labels_one_hot) predict.predict_model(dense_net, NUM_EPOCHS, BATCH_SIZE, IS_ORIGINAL_TRAINABLE) else: # reseting the model dense_net_tnb_false.set_weights( old_weights_dense_net_tnb_false) train.fit_model(dense_net_tnb_false, NUM_EPOCHS, BATCH_SIZE, IS_ORIGINAL_TRAINABLE, train_dataset, val_dataset, test_images, test_labels_one_hot) predict.predict_model(dense_net_tnb_false, NUM_EPOCHS, BATCH_SIZE, IS_ORIGINAL_TRAINABLE) '''print("Training vgg32s") if (IS_ORIGINAL_TRAINABLE): # reseting the model vgg_32s.set_weights(old_weights_vgg32s) train.fit_model(vgg_32s, NUM_EPOCHS, BATCH_SIZE, IS_ORIGINAL_TRAINABLE, train_dataset, val_dataset, test_images, test_labels_one_hot)
from flask import Flask, request, jsonify, make_response from predict import predict_model import time import json import flask app = Flask(_name_) predict_ = predict_model() predict_.predict('Khởi tạo lần đầu') @app.route('/api', methods=['POST']) def suggest(): predict_.log.debug('### Request comming, info = ' + str(request.headers)) # print('json:',request.get_json()) predict_.log.debug('### json: ' + str(request.get_json())) # print('data', request.get_data()) predict_.log.debug('### data: ' + str(request.get_data())) data = json.loads(request.get_data().decode(encoding='utf-8')) predict_.log.debug('### enconding data : ' + str(data)) result = {} sent = data['sentence'] result = predict_.predict(sent) # print(jsonify(result)) res = make_response(jsonify(result)) res.headers['Access-Control-Allow-Origin'] = '*' return res if _name_ == '_main_':
# test the model U_hat = model.predict([X_test, NN_test], verbose=1) U_hat = U_hat.reshape((len(U_hat))) loss_and_metrics = model.evaluate([X_test, NN_test], y_test[:, 0]) print "test error is: ", loss_and_metrics # plot the predicted versus the actual U values toPlot = np.column_stack((U_hat, y_test[:, 0])) plt.plot(toPlot) plt.show() #Testing the model with Plant Model time_steps = 1000 #Poits to predict in the future ykstack = np.zeros(shape=(time_steps, 1)) utest_start = X_test[ 0, :, :] #taking first 12 points in the test set to start prediction utest_start = utest_start.reshape((1, lstm_length - 1, 1)) nntest_start = NN_test[0] nntest_start = nntest_start.reshape(1, 3) #3 denotes 3 dimensions #array which has predicted values #specify setpoin in predict function #predict_model for constant set point #predict_model2 for varying setpoint ykstack = pdt.predict_model(model, y_test, time_steps, utest_start, nntest_start, lstm_length, ykstack) #plotting points predicted with lstm and model plt.plot(ykstack) plt.show()
) train_data["intermediates"] = iter_train(0) pickle.dump(train_data, open(metadata_path + "-dump", "wb")) return if __name__ == "__main__": parser = argparse.ArgumentParser(description=__doc__) required = parser.add_argument_group('required arguments') required.add_argument('-c', '--config', help='configuration to run', required=True) args = parser.parse_args() set_configuration(args.config) expid = utils.generate_expid(args.config) log_file = LOGS_PATH + "%s.log" % expid with print_to_file(log_file): print "Running configuration:", config().__name__ print "Current git version:", utils.get_git_revision_hash() train_model(expid) print "log saved to '%s'" % log_file predict_model(expid) print "log saved to '%s'" % log_file
def kfold_stack(train, validation, kfold_splits, model_names, stack_path, pickle_path, epochs, batch_size, normalize_labels, use_validation, validation_split, run_sequence, full=True, verbose=False): # capture the list of label names label_cols = [c for c in train.columns if not 'image' == c] if full: model_suffix = "_30" else: model_suffix = "_8" actual_labels = train[(train.index == -1)].copy() predicted_labels = {'id': []} kf = KFold(n_splits=kfold_splits, random_state=42, shuffle=False) for k, (train_idx, test_idx) in enumerate(kf.split(train)): print("".join([ "\n", "=" * 50, "\nFold Iteration %d of %d\n" % (k + 1, kfold_splits), "=" * 50, "\n" ])) # get our KFold split of "train" and "test" for stacking #stack_train = train[(train.index.values.isin(train_idx))].copy() #stack_test = train[(train.index.values.isin(test_idx))].copy() stack_train = train.iloc[train_idx].copy() stack_test = train.iloc[test_idx].copy() # capture the actual labels for the input vector actual_labels = actual_labels.append(stack_test) if k == 0: predicted_labels['id'] = test_idx else: predicted_labels['id'] = np.vstack( (predicted_labels['id'].reshape(-1, 1), test_idx.reshape(-1, 1))).ravel() # short-circuit logic if you need to restart at specific iteration due to hang/crash #if k < 3: # print("Skipping k == %d..." % k) # continue # for each model, train on the large K-fold and predict on the hold-out for model_name in model_names: print("\n", "-" * 50, "\n MODEL: %s\n" % "".join([model_name, model_suffix]), "-" * 50, "\n") train_model.MODEL_NAME = model_name predict.MODEL_NAME = model_name # specify the prediction file output_path = str(stack_path).replace("\\", "/").strip() if not output_path.endswith('/'): output_path = "".join((output_path, "/")) predict_file = "".join([ output_path, "STACK_", model_name, model_suffix, "_", str(k + 1), "_of_", str(kfold_splits), "_", run_sequence, ".csv" ]) # derive the model performance file location output_path = str(stack_path).replace("\\", "/").strip() if not output_path.endswith('/'): output_path = "".join((output_path, "/", model_name, "/")) if not os.path.exists(output_path): print("Creating output path: '%s'." % output_path) os.makedirs(output_path) model_performance_file = "".join( [output_path, "performance_", str(k), ".csv"]) # CDB : 3/30/2020 - why did I even make use_validation an option? It MUST be True, and we MUST pass in the kth holdout... sigh models, features = train_model.train_model( model_path=stack_path, pickle_path=pickle_path, model_name=model_name, batch_size=batch_size, epochs=epochs, normalize_labels=normalize_labels, train=stack_train, validation=stack_test, use_validation=True, validation_split=validation_split, skip_history=True, model_performance_file=model_performance_file, full=full, verbose=verbose) # dummy up a "test" dataframe as our current utility functions expect a specific format for test and ids temp_test = stack_test.copy() temp_test = temp_test.reset_index().rename( columns={'index': 'image_id'}) image_id = temp_test.image_id.values feature_name = np.array(label_cols) temp_ids = pd.DataFrame(np.transpose([ np.tile(image_id, len(feature_name)), np.repeat(feature_name, len(image_id)) ]), columns=['image_id', 'feature_name']) temp_ids['location'] = temp_ids.image_id temp_ids['row_id'] = temp_ids.image_id temp_ids.image_id = temp_ids.image_id.astype(np.int64) temp_ids.row_id = temp_ids.row_id.astype(np.int64) temp_ids.location = temp_ids.location.astype(np.float32) temp_ids = temp_ids[[ 'row_id', 'image_id', 'feature_name', 'location' ]] # for each kfold iteration, we need to predict and store the predictions pred = predict.predict_model(model_path=stack_path, pickle_path=pickle_path, model_name=model_name, normalize_labels=normalize_labels, test=temp_test, ids=temp_ids, overlap=None, predict_file=predict_file, skip_output=True, skip_overlap=True, full=full, verbose=verbose) # KFold training iterations complete; write the final labels file print("".join( ["\n", "=" * 50, "\nFolds Complete. Writing Labels\n", "=" * 50, "\n"])) output_path = str(stack_path).replace("\\", "/").strip() if not output_path.endswith('/'): output_path = "".join((output_path, "/")) actual_labels = actual_labels.drop(columns=['image']) actual_labels.index.rename('image_id', inplace=True) labels_file = "".join( [output_path, "STACK", model_suffix, "_labels_", run_sequence, ".csv"]) actual_labels.to_csv(labels_file) print("Labels file written to '%s'." % labels_file) return
test_dataset = MyDataset(batch_size=32, data_type="test", word2id=word2id, tag2id=tag2id) if sys.argv[1] == "train_model": # 定义模型 model = BiLSTMCRF(tag2id=tag2id, word2id_size=len(word2id), batch_size=32, embedding_dim=100, hidden_dim=128) # 训练模型 train_model(train_dataset=train_dataset, test_dataset=test_dataset, model=model, tag2id=tag2id) elif sys.argv[1] == "predict_model": # 定义模型 model = BiLSTMCRF(tag2id=tag2id, word2id_size=len(word2id), batch_size=1, embedding_dim=100, hidden_dim=128) # 加载模型参数 model.load_state_dict(torch.load("models/params.pkl")) print("model restore success!") # 预测 predict_model(model=model, word2id=word2id, tag2id=tag2id)
#-------------------------------------------------------------------------------------------------------- #-------------------------------------------------------------------------------------------------------- #Testing the model with Plant Model with LSTM and NN time_steps=1000 #Poits to predict in the future ykstack=np.zeros(shape=(time_steps,1)) utest_start=X_test[0,:,:] #taking first 12 points in the test set to start prediction utest_start= utest_start.reshape((1,lstm_length-1,1)) nntest_start=NN_test[0] nntest_start=nntest_start.reshape(1,3) #3 denotes 3 dimensions #array which has predicted values #specify setpoint in predict function #predict_model for constant set point #predict_model2 for varying setpoint ykstack=pdt.predict_model(model,y_test,time_steps,utest_start,nntest_start,lstm_length,ykstack) #plotting points predicted with lstm and model plt.plot(ykstack) plt.show() #-------------------------------------------------------------------------------------------------------- #-------------------------------------------------------------------------------------------------------- #Testing the model with Plant Model with only LSTM #Poits to predict in the future #varying setpoint (comment this section if you want to use constant setpoint) yreftest=X_test[:,:,2] yreftest_comparison=y_test[:,2] #varying setpoint