def case_worksheet_10B(): # Code for this system: https://rsmopt.com/system/concrete-strength/ # C: cement = 1.8 and 4.2 kg) # W: amount of water (between 0.4 and 1.1 L) c1 = c(2.5, 3, 2.5, 3, center=2.75, range=[2.5, 3], name="cement", units='kg') w1 = c(0.5, 0.5, 0.9, 0.9, center=0.7, range=[0.5, 0.9], name='Throughput', units='parts/hour') C1 = c1.to_coded() W1 = w1.to_coded() y1 = c(14476, 14598, 14616, 14465, name="Strength", units="-") expt1 = gather(C=C1, W=W1, y=y1, title="First experiment") mod_base1 = lm("y ~ C * W", data=expt1) summary(mod_base1) contour_plot(mod_base1, "C", "W") # Predict the points, using the model: prediction_1 = predict(mod_base1, C=C1, W=W1) print(prediction_1) print(y1 - prediction_1) # Very nonlinear: saddle: up left, or bottom right # Bottom right: (C, W) = (2, -2) C2 = C1.extend([2]) W2 = W1.extend([-2]) # Predict at this point: 14794 predict(mod_base1, C=C2, W=W2) c2 = C2.to_realworld() w2 = W2.to_realworld() # Actual: at c=3.25; w=0.4 (constraint): 14362. So wrong direction y1 = c(14476, 14598, 14616, 14465, name="Strength", units="-") expt1 = gather(C=C1, W=W1, y=y1, title="First experiment") # Try the other way: C, W= -2, 2 C2 = C1.extend([-2]) W2 = W1.extend([+2]) # Predict at this point: 14830 predict(mod_base1, C=C2, W=W2) c2 = C2.to_realworld() # 2.25 w2 = W2.to_realworld() # 1.1
def main(): # generate data from physical model T = 16 # time interval dt = 0.1 # integration step # train data (oscillation with initial angle = pi/6): t, train = phys.simulate_pendulum(0, np.array([np.pi / 6, 0]), T, dt=dt) # test data (oscillation with initial angle = pi/4): _, test1 = phys.simulate_pendulum(0, np.array([np.pi / 4, 0]), T, dt=dt) # test data (fixed point with angle = 0): _, test2 = phys.simulate_pendulum(0, np.array([0.0, 0.0]), T, dt=dt) N = 5 # size of the history window (for LSTM) # train lstm model # model_lstm = models.train_lstm(train, N) # model_lstm.save('bin_models/pendulum_lstm') # or load pre-built one model_lstm = load_model('bin_models/pendulum_lstm') # train linear model # model_linear = models.train_linear(train, N) # model_linear.save('bin_models/pendulum_linear') # or load pre-built one model_linear = load_model('bin_models/pendulum_linear') for model, n, title in zip([model_lstm, model_linear], [N, 1], [ "LSTM: non-physical predictions, training data are memorized", "Linear map: physical behaviour generalization" ]): # use model for prediction with training initial angle = pi/6 pred = models.predict(model, train[:n], step_count=int(T / dt - n), N=n) # use model for prediction with initial angle = pi/4 pred1 = models.predict(model, test1[:n], step_count=int(T / dt - n), N=n) # use lstm for prediction with initial angle = 0 pred2 = models.predict(model, test2[:n], step_count=int(T / dt - n), N=n) plot(title, train, test1, test2, pred, pred1, pred2) plt.show() return 0
def get(self): # GET PARAMS args = parser.parse_args() ## General user_query = args['content'] user_model = args['model'] user_lang = args['lang'] if (args['content']): print('[INFO REQUEST] Received Parameters') print('[INFO REQUEST - content ] ', user_query) print('[INFO REQUEST - Model ] ', user_model) print('[INFO REQUEST - Lang ] ', user_lang) application.logger.info('[LOG] Query: ', user_query) # PREDICT result_prediction = predict(user_query, user_lang, user_model) print(result_prediction) return result_prediction else: return "message : Missing Parameters"
def get_file(name): times, _ = load_data('var_tec_reshape.npz') model = load_model('models/%s' % name) x = timestamp_to_features(times) p = predict(x, model) np.savez('data/'+name, times=times, var_tec_maps=p)
def run(task): """Run the train/predict flow for `task`.""" st.markdown(f'<h1 align="center">{task}</h1>', unsafe_allow_html=True) train_button = st.sidebar.button("Train") sidebar_train_message = st.sidebar.empty() main_train_message = st.empty() slug = slugify(task) trained = is_trained(slug) if trained: sidebar_train_message.success(TRAINED_MODEL_MESSAGE) else: sidebar_train_message.warning(NO_TRAINED_MODEL_MESSAGE) main_train_message.warning(NO_TRAINED_MODEL_MESSAGE) if train_button: sidebar_train_message.info(TRAINING_MESSAGE) main_train_message.info(TRAINING_MESSAGE) train(slug, sidebar_train_message, TRAINING_MESSAGE) sidebar_train_message.success(TRAINED_MODEL_MESSAGE) main_train_message.empty() trained = is_trained(slug) if trained: show_metrics = st.sidebar.checkbox("Show metrics") if show_metrics: metrics = get_metrics(slug) for key, value in metrics.items(): st.sidebar.text(f"{key}: {value}") user_input = st.text_area("Input") st.text("Output") if user_input: output = predict(slug, user_input) display_function = DISPLAY_FUNCTIONS[slug] display_function(output)
def hello_world(): form = InputForm() res = {} if form.validate_on_submit(): input_text = form.input_text.data res = predict(input_text) return render_template('index.html', form=form, res=res)
def case_w2(): """ Teaching case week 2: https://yint.org/w2 """ # T = time used for baking: # (-1) corresponds to 80 minutes and (+1) corresponds to 100 minutes T = c(-1, +1, -1, +1, lo=80, hi=100) # F = quantity of fat used: # (-1) corresponds to 20 g and (+1) corresponds to 30 grams F = c(-1, -1, +1, +1, lo=20, hi=30) # Response y is the crispiness y = c(37, 57, 49, 53, units='crispiness') # Fit a linear model expt = gather(T=T, F=F, y=y) model_crispy = lm("y ~ T + F + T*F", expt) summary(model_crispy) # See how the two factors affect the response: contour_plot(model_crispy) #interaction_plot(T, F, y) #interaction_plot(F, T, y) # Make a prediction with this model: xT = +2 # corresponds to 110 minutes xF = -1 # corresponds to 20 grams of fat y_hat = predict(model_crispy, T=xT, F=xF) print(f'Predicted value is: {y_hat} crispiness.')
def nlp(self): # keyword extraction wrapper if not self.is_downloaded or not self.is_parsed: raise Exception('You should download and parse first!') tem = transform(self.text, self.mapping) result = predict(self.model, tem) self.set_category(result[0])
def classify(): results = None form = InputForm() if form.validate_on_submit(): content = form.content.data results = predict(config, content) results = json.dumps(results) print(results) return render_template('classify.html', form=form, results=results)
def main(): # generate data from physical model T = 10 # time interval dt = 0.1 # integration step # train data: t, train = phys.simulate_epidemiology(0, np.array([0.99, 0.01, 0]), T, dt=dt) # test data 1st: _, test1 = phys.simulate_epidemiology(0, np.array([0.4, 0.1, 0]), T, dt=dt) # test data 2nd: _, test2 = phys.simulate_epidemiology(0, np.array([1, 0.0, 0]), T, dt=dt) N = 5 # size of the history window (for LSTM) # train lstm model model_lstm = models.train_lstm(train, N) model_lstm.save('bin_models/epidemiology_lstm') # or load pre-built one model_lstm = load_model('bin_models/epidemiology_lstm') # for training a polynomial neural network (matrix Lie transform) follow link # https://github.com/andiva/DeepLieNet/blob/master/demo/SIR_Identification.ipynb # load pre-built 3rd order Lie transform: model_linear = SIR_Lie_Transform() for model, n, title in zip([model_lstm, model_linear], [N, 1], ["LSTM: non-physical predictions, training data are memorized", "Linear map: physical behaviour generalization"]): # use model for prediction with training initial conditions pred = models.predict(model, train[:n], step_count=int(T/dt-n), N=n) # use model for prediction with initial condition of test1 pred1 = models.predict(model, test1[:n], step_count=int(T/dt-n), N=n) # use lstm for prediction with initial condition of test2 pred2 = models.predict(model, test2[:n], step_count=int(T/dt-n), N=n) plot(title, train, test1, test2, pred, pred1, pred2) plt.show() return 0
def competition_run(): data = GalaxyData() (training_features, training_solutions) = data.get_training_data() (test_features, _) = data.get_test_data() # Predict (clf, columns) = models.default_model(training_features, training_solutions, 5) predicted_solutions = models.predict(clf, test_features, columns) data.save_solution(predicted_solutions)
def stream_train(train_dataset, stream_dataset): logger.info('---------------- stream train ----------------') logger.info('---------------- initial train ----------------') novelty_detector = train(trainset) logger.info('---------------- initial test ----------------') test(stream_dataset, novelty_detector) novelty_dataset = dataset.NoveltyDataset(train_dataset) iter_streamloader = enumerate( DataLoader(dataset=stream_dataset, batch_size=1, shuffle=True)) buffer = [] for i, (feature, label) in iter_streamloader: sample = (feature.squeeze(dim=0), label.squeeze(dim=0)) with torch.no_grad(): net.eval() feature, label = feature.to(net.device), label.item() feature, out = net(feature) predicted_label, distance = models.predict(feature, prototypes) prob = models.probability(feature, predicted_label, prototypes, gamma=config.gamma) detected_novelty = novelty_detector(predicted_label, distance) real_novelty = label not in novelty_detector.known_labels if detected_novelty: buffer.append(sample) logger.debug("[stream %5d]: %d, %d, %7.4f, %7.4f, %5s, %5s, %4d", i + 1, label, predicted_label, prob, distance, real_novelty, detected_novelty, len(buffer)) if len(buffer) == 1000: logger.info("novelty dataset size before extending: %d", len(novelty_dataset)) # todo try different sample methods by YW. # novelty_dataset.extend(buffer, config.novelty_buffer_sample_rate) novelty_dataset.extend_by_select( buffer, config.novelty_buffer_sample_rate, prototypes, net, soft, use_log) logger.info("novelty dataset size after extending: %d", len(novelty_dataset)) logger.info( '---------------- incremental train ----------------') novelty_detector = train(novelty_dataset) buffer.clear() return novelty_detector
def index(): if not request.is_json: return create_json(203, "Format is not a JSON. Check headers.") test = request.json missing = [] for value in values_list: if value not in test.keys(): missing.append(value) if len(missing) > 0: return create_json(204, "Missing values in request", {"values": missing}) from models import predict from keras.models import model_from_json # load json and create model json_file = open('model.json', 'r') loaded_model_json = json_file.read() json_file.close() model = model_from_json(loaded_model_json) # load weights into new model model.load_weights("model.h5") try: req_data = request.get_json(force=True) id, summary, host, lat, lng, prop_type, room, accom, baths, bedrooms,\ beds, dep, fee, extra, mini, cancel = \ req_data['id'], req_data['summary'], req_data['host_is_superhost'], \ req_data['latitude'], req_data['longitude'], req_data[ 'property_type'], \ req_data['room_type'], req_data['accomodates'], req_data['bathrooms'], \ req_data['bedrooms'], req_data['beds'], req_data['security_deposit'], \ req_data['cleaning_fee'], req_data['extra_people'], req_data[ 'minimum_nights'], req_data['cancellation_policy'] except Exception as e: return create_json(400, e) else: try: result = predict(id, summary, host, lat, lng, prop_type, room, accom, baths, bedrooms, beds, dep, fee, extra, mini, cancel, model) req_data['prediction'] = result[1] f = open("predict.log", "a") f.write(json.dumps(req_data) + "\n") f.close() t = {"listing_id": result[0], "listing_prediction": result[1]} return create_json(200, "Listing Updated", t) except Exception as e: return "{}".format(e) shutdown_server()
def index(): if(request.method == 'GET'): return render_template('index.html', data = '') elif(request.method == 'POST'): data = [0]*13 for k, v in request.form.items(): if('v' in k): i = int(k[1:].strip()) - 1 data[i] = int(v) if v else 0 data = models.predict(data)[0] return render_template('index.html', data = data) else: return request.url, 404
def login_page(): error = '' try: c, conn = connection() if request.method == "POST": data = c.execute( "SELECT * FROM users WHERE username = '******'".format( thwart(request.form['username']))) data = c.fetchone()[2] kd = pd.read_csv("/var/www/FlaskApp/data.csv") subjects = kd["subject"].unique() subject = subjects[2] vector = kd.loc[kd.subject == subject, "H.period":"H.Return"].iloc[34].values pv = prepare_data(kd, subject) d = model(pv[0], pv[2], pv[1], pv[3], num_iterations=4000, learning_rate=0.05, print_cost=False) arr = np.array([vector, vector]) lr_res = predict(d['w'], d['b'], arr.transpose()) sc = StatisticClassifier(kd, 0.95) sc_res = sc.singleClassification(kd, vector) if (sha256_crypt.verify(request.form['password'], data)) and ( (lr_res[0, 0] + sc_res) / 2 > 0.5): session['logged_in'] = True session['username'] = request.form['username'] flash("You are now logged in") return redirect(url_for("reg")) else: error = "Invalid credentials, try again." gc.collect() return render_template("login.html", error=error) except Exception as e: flash(e) error = "Invalid credentials, try again." return render_template("login.html", error=error)
def prediction(frame, answer, name): x = name.get() name.delete(0, "end") if len(x) == 0 or x == "None": answer.config(text="please fill every field", fg="red") else: out = predict(x) # out = x if out == 1: answer.config(text="ABUSIVE", fg="red") else: answer.config(text="NON ABUSIVE", fg="green")
def pred_edges(df, pkl, baseline='n'): if baseline == 'y': print('Performing baseline segmentaiton...') df['edges'] = 0 return # X for prediction and add to df under edges column feature_cols = df.columns[~df.columns.isin(exclude_cols)] X = df[feature_cols] # load_model obj = load_model(pkl) # predict grid = obj['grid'] y_pred = predict(X, grid) df['edges'] = y_pred return
def test(test_dataset, novelty_detector): logger.info('---------------- test ----------------') dataloader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False) logger.info("known labels: %s", novelty_detector.known_labels) logger.info("distance average: %s", novelty_detector.average_distances) logger.info("distance std: %s", novelty_detector.std_distances) logger.info("detector threshold: %s", novelty_detector.thresholds) detection_results = [] with torch.no_grad(): net.eval() for i, (feature, label) in enumerate(dataloader): feature, label = feature.to(net.device), label.item() feature, out = net(feature) predicted_label, distance = models.predict(feature, prototypes) prob = models.probability(feature, predicted_label, prototypes, gamma=config.gamma) detected_novelty = novelty_detector(predicted_label, distance) real_novelty = label not in novelty_detector.known_labels detection_results.append( (label, predicted_label, real_novelty, detected_novelty)) logger.debug("[test %5d]: %d, %d, %7.4f, %7.4f, %5s, %5s", i + 1, label, predicted_label, prob, distance, real_novelty, detected_novelty) tp, fp, fn, tn, cm, acc, acc_all = novelty_detector.evaluate( detection_results) precision = tp / (tp + fp + 1) recall = tp / (tp + fn + 1) logger.info("accuracy of known labels: %.4f", acc) logger.info("accuracy of all labels: %.4f", acc_all) logger.info("true positive: %d", tp) logger.info("false positive: %d", fp) logger.info("false negative: %d", fn) logger.info("true negative: %d", tn) logger.info("precision: %7.4f", precision) logger.info("recall: %7.4f", recall) logger.info("confusion matrix: \n%s", cm)
def execute_trade(n_intervals, buy_sell_data, entry_exit_df, model): if entry_exit_df: entry_exit_df = pd.DataFrame.from_dict(entry_exit_df) is_sma = (model == 'SMA10') entry_exit_df = crypto_stream.generate_signals( crypto_stream.get_data_from_table()) if not is_sma and len(entry_exit_df) > 20: entry_exit_df = models.predict(entry_exit_df, model, 20) if len(entry_exit_df) < 10: raise PreventUpdate else: account = buy_sell_data[-1] account = crypto_stream.execute_trade_strategy(entry_exit_df, account) print(account) if account: buy_sell_data.append(account) return buy_sell_data, entry_exit_df.to_dict('series'), get_trade_fig( entry_exit_df), get_sma_fig(entry_exit_df)
def inference(params, dataloaders, use_tqdm=True): device = utils.get_device() # mc dropout predict = load_trained_model(params, device) ymc_hats, eta_1s = mc_dropout(params, predict, dataloaders['test'], device) # inherent noise predict.apply(dropout_off) for x,y in dataloaders['valid']: x,y = x.to(device), y.to(device) break eta_2sq = np.mean(cpu(predict((x, y[:,0,1:])))[:,0]) # total noise etas = np.sqrt(eta_1s + eta_2sq) return ymc_hats, etas
def mc_dropout(params, predict, dataloader, device, use_tqdm=True): predict = predict.apply(dropout_on) pbar = range(params['inference']['B']) if use_tqdm: from tqdm import tqdm pbar = tqdm(pbar) y_hats = [] for b in pbar: for x,y in dataloader: x,y = x.to(device), y.to(device) break y_hat_b = predict((x, y[:,0,1:])) y_hats.append(cpu(y_hat_b)) ymc_hats = np.mean(y_hats, axis=0) eta_1s = np.mean((ymc_hats[:,0] - np.stack(y_hats)[:,:,0])**2, axis=0) return ymc_hats, eta_1s
def run_training_test(model, verbose=0): """Entry Point to run models Args: model: model function to run. """ # Load the data and split into training and validation sets data = GalaxyData(feature_extraction.raw_9, scale_features=False) (test_features, test_solutions) = data.get_test_data() (training_features, training_solutions) = data.get_training_data() # Train and Predict Model (clf, columns) = model(training_features, training_solutions, verbose) predicted_solutions = models.predict(clf, test_features, columns) # Evaluate Predictions score = evaluate.get_rmse(test_solutions, predicted_solutions) print(score)
def run(model, verbose=0): """Entry Point to run models Args: model: model function to run. """ # Load the data and split into training and validation sets data = GalaxyData(feature_extraction.hog_features, scale_features=False) (training_features, training_solutions, validation_features, validation_solutions) = data.split_training_and_validation_data(50) # Train and Predict Model (clf, columns) = model(training_features, training_solutions, verbose) predicted_validation_solutions = models.predict(clf, validation_features, columns) # Evaluate Predictions valid_rmse = evaluate.get_errors_clf(clf, validation_features, validation_solutions) train_rmse = evaluate.get_errors_clf(clf, training_features, training_solutions) print " Validation RMSE: ", valid_rmse print " Training RMSE: ", train_rmse
def _predict(model_type, params): """ Prediction on the test set """ _check_key(params, batch_size_k) batch_size = int(params[batch_size_k]) # Predictions of a neural network model if model_type == NN_k: # Checking that specific attributes for NN are specified _check_key(params, last_layer_width_k) _check_key(params, depth_k) _check_key(params, hidden_act_k) _check_key(params, outlayer_act_k) last_layer_width = int(params[last_layer_width_k]) depth = int(params[depth_k]) hidden_act = params[hidden_act_k] outlayer_act = params[outlayer_act_k] return model_nn.predict(paths.model_loc, paths.test_prepared_input_loc, paths.test_labels_loc, batch_size, last_layer_width, depth, hidden_act, outlayer_act) elif model_type == SVM_k or model_type == kernel_ridge_k or model_type == ridge_k: return models.predict(paths.model_loc, paths.test_prepared_input_loc, paths.test_labels_loc, batch_size)
def train(sess, model, optimizer, log_dir, batch_size, num_sweeps_per_summary, num_sweeps_per_save, train_input_seqs, train_reset_seqs, train_label_seqs, test_input_seqs, test_reset_seqs, test_label_seqs): """ Train a model and export summaries. `log_dir` will be *replaced* if it already exists, so it certainly shouldn't be anything generic like `/home/user`. Args: sess: A TensorFlow `Session`. model: An `LSTMModel`. optimizer: An `Optimizer`. log_dir: A string. The full path to the log directory. batch_size: An integer. The number of sequences in a batch. num_sweeps_per_summary: An integer. The number of sweeps between summaries. num_sweeps_per_save: An integer. The number of sweeps between saves. train_input_seqs: A list of 2-D NumPy arrays, each with shape `[duration, input_size]`. train_reset_seqs: A list of 2-D NumPy arrays, each with shape `[duration, 1]`. train_label_seqs: A list of 2-D NumPy arrays, each with shape `[duration, 1]`. test_input_seqs: A list of 2-D NumPy arrays, each with shape `[duration, input_size]`. test_reset_seqs: A list of 2-D NumPy arrays, each with shape `[duration, 1]`. test_label_seqs: A list of 2-D NumPy arrays, each with shape `[duration, 1]`. """ ema = tf.train.ExponentialMovingAverage(decay=0.5) update_train_loss_ema = ema.apply([model.loss]) train_loss_ema = ema.average(model.loss) tf.scalar_summary('train_loss_ema', train_loss_ema) train_accuracy = tf.placeholder(tf.float32, name='train_accuracy') train_edit_dist = tf.placeholder(tf.float32, name='train_edit_dist') test_accuracy = tf.placeholder(tf.float32, name='test_accuracy') test_edit_dist = tf.placeholder(tf.float32, name='test_edit_dist') values = [train_accuracy, train_edit_dist, test_accuracy, test_edit_dist] tags = [value.op.name for value in values] tf.scalar_summary('learning_rate', optimizer.learning_rate) tf.scalar_summary(tags, tf.pack(values)) summary_op = tf.merge_all_summaries() if os.path.exists(log_dir): shutil.rmtree(log_dir) summary_writer = tf.train.SummaryWriter(logdir=log_dir, graph=sess.graph) saver = tf.train.Saver() sess.run(tf.initialize_all_variables()) num_sweeps_visited = 0 start_time = time.time() train_gen = data.sweep_generator( [train_input_seqs, train_reset_seqs, train_label_seqs], batch_size=batch_size, shuffle=True, num_sweeps=None) while num_sweeps_visited <= optimizer.num_train_sweeps: if num_sweeps_visited % num_sweeps_per_summary == 0: train_prediction_seqs = models.predict( sess, model, train_input_seqs, train_reset_seqs) train_accuracy_, train_edit_dist_ = metrics.compute_metrics( train_prediction_seqs, train_label_seqs) test_prediction_seqs = models.predict( sess, model, test_input_seqs, test_reset_seqs) test_accuracy_, test_edit_dist_ = metrics.compute_metrics( test_prediction_seqs, test_label_seqs) summary = sess.run(summary_op, feed_dict={train_accuracy: train_accuracy_, train_edit_dist: train_edit_dist_, test_accuracy: test_accuracy_, test_edit_dist: test_edit_dist_}) summary_writer.add_summary(summary, global_step=num_sweeps_visited) status_path = os.path.join(log_dir, 'status.txt') with open(status_path, 'w') as f: line = '%05.1f ' % ((time.time() - start_time)/60) line += '%04d ' % num_sweeps_visited line += '%.6f %08.3f ' % (train_accuracy_, train_edit_dist_) line += '%.6f %08.3f ' % (test_accuracy_, test_edit_dist_) print(line, file=f) label_path = os.path.join(log_dir, 'test_label_seqs.pkl') with open(label_path, 'w') as f: cPickle.dump(test_label_seqs, f) pred_path = os.path.join(log_dir, 'test_prediction_seqs.pkl') with open(pred_path, 'w') as f: cPickle.dump(test_prediction_seqs, f) vis_filename = 'test_visualizations_%06d.png' % num_sweeps_visited vis_path = os.path.join(log_dir, vis_filename) fig, axes = data.visualize_predictions(test_prediction_seqs, test_label_seqs, model.target_size) axes[0].set_title(line) plt.tight_layout() plt.savefig(vis_path) plt.close(fig) if num_sweeps_visited % num_sweeps_per_save == 0: saver.save(sess, os.path.join(log_dir, 'model.ckpt')) train_inputs, train_resets, train_labels = train_gen.next() # We squeeze here because otherwise the targets would have shape # [batch_size, duration, 1, num_classes]. train_targets = data.one_hot(train_labels, model.target_size) train_targets = train_targets.squeeze(axis=2) _, _, num_sweeps_visited = sess.run( [optimizer.optimize_op, update_train_loss_ema, optimizer.num_sweeps_visited], feed_dict={model.inputs: train_inputs, model.resets: train_resets, model.targets: train_targets, model.training: True})
MODEL_PATH = './model/model' X_TRAIN_PATH = sys.argv[1] Y_TRAIN_PATH = sys.argv[2] X_TEST_PATH = sys.argv[3] ANS_PATH = sys.argv[4] x = utils.load_data(X_TRAIN_PATH) y = utils.load_data(Y_TRAIN_PATH).flatten() x_test = utils.load_data(X_TEST_PATH) x, max, min = utils.rescaling(x) x_test = utils.scaling(x_test, max, min) b, w = models.logistic_regression(x, y, lr=1, epoch=10000, validation_rate=0.1, optimizer='adagrad', early_stopping=True, patience=10) y_pred = models.predict(x_test, b, w) # print(y_pred) # utils.save_ans(y_pred) utils.save_ans_dir(y_pred, ANS_PATH) # utils.save_model(b, w, MODEL_PATH) # utils.save_scaler(max, min, SCALER_PATH)
def case_worksheet_10(): # Price: 0 # 0.25 above and 0.25 $/part below p = c(0.75, 0.75, 0.65, 0.85, 0.65, 0.85, center=0.75, range=[0.65, 0.85], name="Price", units='$/part') t = c(325, 325, 250, 250, 400, 400, center=325, range=[250, 400], name='Throughput', units='parts/hour') P1 = p.to_coded() T1 = t.to_coded() y1 = c(7740, 7755, 5651, 5812, 7363, 7397, name="Response: profit per hour", units="$/hour") expt1 = gather(P=P1, T=T1, y=y1, title="First experiment") mod_base1 = lm("y ~ P * T", data=expt1) summary(mod_base1) contour_plot(mod_base1, "P", "T", show=False) # Predict the points, using the model: prediction_1 = predict(mod_base1, P=P1, T=T1) print(prediction_1) print(y1 - prediction_1) # We see clear non-linearity, especially when viewed in the direction of T # Try anyway to make a prediction, to verify it # P ~ 0.15 and T ~ 2.0: P2 = P1.extend([0.15]) T2 = T1.extend([2.0]) p2 = P2.to_realworld() t2 = T2.to_realworld() print(t2) # 0.765 print(p2) # 475 print(predict(mod_base1, P=P2, T=T2)) # Should have a predicted profit of 8599, but actual is 4654. # Confirms our model is in a very nonlinear region in the T=Throughput # direction. # Perhaps our factorial was far too big. Make the range smaller in T. # Prior range = [250;400]; now try [287.5; ] # Second factorial: re-use some of the points # * Original center point become bottom left # * Original (+1, +1) become top right p3 = c(0.75, 0.85, 0.75, 0.85, 0.65, 0.85, 0.765, center=0.80, range=[0.75, 0.85], name="Price", units='$/part') t3 = c(325, 325, 400, 400, 400, 250, 475, center=(325 + 400) / 2, range=(325, 400), name='Throughput', units='parts/hour') # 2nd, y3 = c(7755, 7784, 7373, 7397, 7363, 5812, 4654, name="Response: profit per hour", units="$/hour") P3 = p3.to_coded() T3 = t3.to_coded() expt3 = gather(P=P3, T=T3, y=y3, title="Smaller ranges") mod_base3 = lm("y ~ P * T", data=expt3) summary(mod_base3) contour_plot(mod_base3, "P", "T") # Predict directly from least squares model, the next experiment # at coded values of (+2, +2) seems good predict(mod_base3, P=+2, T=+2) # Prediction is 7855 # In RW units that corresponds to: p=0.9 and t=437.5 = 438 parts/hour P4 = P3.extend([+2]) T4 = T3.extend([+2]) print(P4.to_realworld()) print(T4.to_realworld()) # ACTUAL value achieved is 6325. Not a good prediction yet either. # Add this point to the model. This point is below any of the base factorial # points! y4 = y3.extend([6325]) expt4 = gather(P=P4, T=T4, y=y4, title="Adding the next exploration") mod_base4 = lm("y ~ P * T", data=expt4) contour_plot(mod_base4, "P", "T") # It is clear that this model does not meet our needs. We need a model with # quadratic fitting, nonlinear terms, to estimate the nonlinear surface. expt5 = expt4.copy() mod_base5 = lm("y ~ P*T + I(P**2) + I(T**2)", data=expt5) print(summary(mod_base5)) # add the xlim input in a second round contour_plot(mod_base5, "P", "T", xlim=(-2, 4)) # Run at (P=3, T=-0.3) for the next run P6 = P4.extend([+3]) T6 = T4.extend([-0.3]) print(P6.to_realworld()) print(T6.to_realworld()) # Corresponds to p = 0.95 $/part, t=351 parts/hour # Predict = 7939 # Actual = 7969. Really good matching. # UPdate the model and check y6 = y4.extend([7969]) expt6 = gather(P=P6, T=T6, y=y6, title="After extrapolation, based on quadratic term") mod_base6 = lm("y ~ P*T + I(P**2) + I(T**2)", data=expt6) contour_plot(mod_base6, "P", "T", xlim=(-2, 5)) # Extrapolate again to (P=5, T=-0.3) for the next run P7 = P6.extend([+5]) T7 = T6.extend([-0.3]) print(P7.to_realworld()) print(T7.to_realworld()) predict(mod_base6, P=5, T=-0.3) # to P = 1.05, T=351 parts/hour # Predict = 7982 # Actual = 8018. Better than predicted. Perhaps surface is a steeper quadratic. # Update the model and check y7 = y6.extend([7982]) expt7 = gather(P=P7, T=T7, y=y7, title="With 2 extrapolations") mod_base7 = lm("y ~ P*T + I(P**2) + I(T**2)", data=expt7) contour_plot(mod_base7, "P", "T", xlim=(-2, 148))
def main(): logger = setup_logger(filename='log.txt') train_epoch_number = 10 batch_size = 100 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # dataset = np.loadtxt(models.Config.dataset_path, delimiter=',') # np.random.shuffle(dataset[:5000]) transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) # trainset = models.DataSet(dataset[:5000]) trainset = torchvision.datasets.CIFAR10(root='./data/cifar10', train=True, download=False, transform=transform) trainloader = DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True, num_workers=24) # testset = models.DataSet(dataset[5000:]) testset = torchvision.datasets.CIFAR10(root='./data/cifar10', train=False, download=False, transform=transform) testloader = DataLoader(dataset=testset, batch_size=1, shuffle=False, num_workers=24) prototypes = {} # net = models.CNNNet(device=device) net = models.DenseNet(device=device, number_layers=8, growth_rate=12, drop_rate=0.0) logger.info("DenseNet Channels: %d", net.channels) gcpl = models.GCPLLoss(threshold=models.Config.threshold, gamma=models.Config.gamma, b=models.Config.threshold, tao=1.0, beta=0.5, lambda_=0.001) sgd = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) if not os.path.exists("pkl"): os.mkdir("pkl") if os.path.exists(models.Config.pkl_path): state_dict = torch.load(models.Config.pkl_path) try: net.load_state_dict(state_dict) logger.info("Load state from file %s.", models.Config.pkl_path) except RuntimeError: logger.error("Loading state from file %s failed.", models.Config.pkl_path) for epoch in range(train_epoch_number): logger.info("Trainset size: %d, Epoch number: %d", len(trainset), epoch + 1) running_loss = 0.0 for i, (features, labels) in enumerate(trainloader): features = features.to(net.device) sgd.zero_grad() features = net(features).view(batch_size, 1, -1) loss = gcpl(features, labels, prototypes) loss.backward() sgd.step() running_loss += loss.item() / batch_size logger.debug("[%3d, %5d] loss: %7.4f", epoch + 1, i + 1, loss.item() / batch_size) torch.save(net.state_dict(), models.Config.pkl_path) prototype_count = 0 for c in prototypes: prototype_count += len(prototypes[c]) logger.info("Prototypes Count: %d", prototype_count) # if (epoch + 1) % 5 == 0: distance_sum = 0.0 correct = 0 for i, (feature, label) in enumerate(testloader): feature = net(feature.to(net.device)).view(1, -1) predicted_label, probability, min_distance = models.predict( feature, prototypes) if label == predicted_label: correct += 1 distance_sum += min_distance logger.debug( "%5d: Label: %d, Prediction: %d, Probability: %7.4f, Distance: %7.4f, Accuracy: %7.4f", i + 1, label, predicted_label, probability, min_distance, correct / (i + 1)) logger.info("Distance Average: %7.4f", distance_sum / len(testloader)) logger.info("Accuracy: %7.4f\n", correct / len(testloader))
def train(sess, model, optimizer, log_dir, batch_size, num_sweeps_per_summary, num_sweeps_per_save, train_input_seqs, train_reset_seqs, train_label_seqs, test_input_seqs, test_reset_seqs, test_label_seqs): """ Train a model and export summaries. `log_dir` will be *replaced* if it already exists, so it certainly shouldn't be anything generic like `/home/user`. Args: sess: A TensorFlow `Session`. model: An `LSTMModel`. optimizer: An `Optimizer`. log_dir: A string. The full path to the log directory. batch_size: An integer. The number of sequences in a batch. num_sweeps_per_summary: An integer. The number of sweeps between summaries. num_sweeps_per_save: An integer. The number of sweeps between saves. train_input_seqs: A list of 2-D NumPy arrays, each with shape `[duration, input_size]`. train_reset_seqs: A list of 2-D NumPy arrays, each with shape `[duration, 1]`. train_label_seqs: A list of 2-D NumPy arrays, each with shape `[duration, 1]`. test_input_seqs: A list of 2-D NumPy arrays, each with shape `[duration, input_size]`. test_reset_seqs: A list of 2-D NumPy arrays, each with shape `[duration, 1]`. test_label_seqs: A list of 2-D NumPy arrays, each with shape `[duration, 1]`. """ ema = tf.train.ExponentialMovingAverage(decay=0.5) update_train_loss_ema = ema.apply([model.loss]) train_loss_ema = ema.average(model.loss) tf.summary.scalar('train_loss_ema', train_loss_ema) train_accuracy = tf.placeholder(tf.float32, name='train_accuracy') train_edit_dist = tf.placeholder(tf.float32, name='train_edit_dist') test_accuracy = tf.placeholder(tf.float32, name='test_accuracy') test_edit_dist = tf.placeholder(tf.float32, name='test_edit_dist') #values = [train_accuracy, train_edit_dist, test_accuracy, test_edit_dist] #tags = [value.op.name for value in values] tf.summary.scalar('learning_rate', optimizer.learning_rate) for value in [ train_accuracy, train_edit_dist, test_accuracy, test_edit_dist ]: tf.summary.scalar(value.op.name, value) #tf.summary.scalar(tags, tf.stack(values)) summary_op = tf.summary.merge_all() if os.path.exists(log_dir): shutil.rmtree(log_dir) summary_writer = tf.summary.FileWriter(logdir=log_dir, graph=sess.graph) saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) num_sweeps_visited = 0 start_time = time.time() train_gen = data.sweep_generator( [train_input_seqs, train_reset_seqs, train_label_seqs], batch_size=batch_size, shuffle=True, num_sweeps=None) while num_sweeps_visited <= optimizer.num_train_sweeps: if num_sweeps_visited % num_sweeps_per_summary == 0: train_prediction_seqs = models.predict(sess, model, train_input_seqs, train_reset_seqs) train_accuracy_, train_edit_dist_ = metrics.compute_metrics( train_prediction_seqs, train_label_seqs) test_prediction_seqs = models.predict(sess, model, test_input_seqs, test_reset_seqs) test_accuracy_, test_edit_dist_ = metrics.compute_metrics( test_prediction_seqs, test_label_seqs) summary = sess.run(summary_op, feed_dict={ train_accuracy: train_accuracy_, train_edit_dist: train_edit_dist_, test_accuracy: test_accuracy_, test_edit_dist: test_edit_dist_ }) summary_writer.add_summary(summary, global_step=num_sweeps_visited) status_path = os.path.join(log_dir, 'status.txt') with open(status_path, 'w') as f: line = '%05.1f ' % ((time.time() - start_time) / 60) line += '%04d ' % num_sweeps_visited line += '%.6f %08.3f ' % (train_accuracy_, train_edit_dist_) line += '%.6f %08.3f ' % (test_accuracy_, test_edit_dist_) print(line, file=f) label_path = os.path.join(log_dir, 'test_label_seqs.pkl') with open(label_path, 'wb') as f: cPickle.dump(test_label_seqs, f) pred_path = os.path.join(log_dir, 'test_prediction_seqs.pkl') with open(pred_path, 'wb') as f: cPickle.dump(test_prediction_seqs, f) vis_filename = 'test_visualizations_%06d.png' % num_sweeps_visited vis_path = os.path.join(log_dir, vis_filename) fig, axes = data.visualize_predictions(test_prediction_seqs, test_label_seqs, model.target_size) axes[0].set_title(line) plt.tight_layout() plt.savefig(vis_path) plt.close(fig) if num_sweeps_visited % num_sweeps_per_save == 0: saver.save(sess, os.path.join(log_dir, 'model.ckpt')) train_inputs, train_resets, train_labels = train_gen.__next__() # We squeeze here because otherwise the targets would have shape # [batch_size, duration, 1, num_classes]. train_targets = data.one_hot(train_labels, model.target_size) train_targets = train_targets.squeeze(axis=2) _, _, num_sweeps_visited = sess.run( [ optimizer.optimize_op, update_train_loss_ema, optimizer.num_sweeps_visited ], feed_dict={ model.inputs: train_inputs, model.resets: train_resets, model.targets: train_targets, model.training: True })
def case_worksheet_10C(): # Price: 0 # 0.05 above and 0.05 $/part below p1 = c(0.75, 0.75, 0.7, 0.8, 0.7, 0.80, center=0.75, range=[0.70, 0.80], name="Price", units='$/part') t1 = c(325, 325, 300, 300, 350, 350, center=325, range=[300, 350], name='Throughput', units='parts/hour') P1 = p1.to_coded() T1 = t1.to_coded() y1 = c(7082, 7089, 6637, 6686, 7181, 7234, name="Response: profit per hour", units="$/hour") expt1 = gather(P=P1, T=T1, y=y1, title="First experiment") mod_base1 = lm("y ~ P * T", data=expt1) summary(mod_base1) contour_plot(mod_base1, "P", "T") # Predict the points, using the model: prediction_1 = predict(mod_base1, P=P1, T=T1) print(prediction_1) print(y1 - prediction_1) # We see clear non-linearity, especially when viewed in the direction of T # Try anyway to make a prediction, to verify it # P ~ 0.7 and T ~ 2.0: P2 = P1.extend([0.7]) T2 = T1.extend([2.0]) p2 = P2.to_realworld() t2 = T2.to_realworld() print(p2) # 0.785 print(t2) # 375 print(predict(mod_base1, P=P2, T=T2)) # Should have a predicted profit of 7550, but actual is 7094. # Confirms our model is in a very nonlinear region in the T=Throughput # direction. # Add axial points, starting in the T direction: P3 = P2.extend([0, 0]) T3 = T2.extend([1.68, -1.68]) p3 = P3.to_realworld() t3 = T3.to_realworld() print(p3) # 0.75, 0.75 print(t3) # 367, 283 # Now build model with quadratic term in the T direction y3 = y1.extend([7094, 7174, 6258]) expt3 = gather(P=P3, T=T3, y=y3, title="With axial points") mod_base3 = lm("y ~ P * T + I(T**2)", data=expt3) summary(mod_base3) contour_plot(mod_base3, "P", "T", xlim=(-1.5, 5)) # #Try extrapolating far out: (P, T) = (4, 1) P4 = P3.extend([4]) T4 = T3.extend([1]) p4 = P4.to_realworld() t4 = T4.to_realworld() print(p4) # 0.95 print(t4) # 350 predict(mod_base3, P=P4, T=T4) # 7301 # Actual: 7291 # great! Keep going y4 = y3.extend([7291]) #Try extrapolating far out: (P, T) = (6, 1) P5 = P4.extend([6]) T5 = T4.extend([1]) p5 = P5.to_realworld() t5 = T5.to_realworld() print(p5) # 1.05 print(t5) # 350 predict(mod_base3, P=P5, T=T5) # 7344 # Actual: 7324 # great! Keep going y5 = y4.extend([7324]) # Visualize model first y5 = y expt5 = gather(P=P5, T=T5, y=y5, title="With extrapolated points") mod_base5 = lm("y ~ P * T + I(T**2)", data=expt5) summary(mod_base5) contour_plot(mod_base5, "P", "T", xlim=(-1.5, 18)) #Try extrapolating further out: (P, T) = (10, 1) P6 = P5.extend([10]) T6 = T5.extend([1]) p6 = P6.to_realworld() t6 = T6.to_realworld() print(p6) # 1.25 print(t6) # 350 predict(mod_base3, P=P6, T=T6) # 7431 # Actual: 7378 # Not matching; rebuild the model eventually. y6 = y5.extend([7378])
def predict(store=None, data=None, predict_index=None, **kwargs): if predict_index is None: raise ValueError("You must specify a predict_index kw arg") return models.predict(Configuration(**kwargs), DataContext(store, data), predict_index=predict_index)
def modelo(): if request.method == 'POST': file = request.files['file'] test_run = request.form['test'] X_train, X_test, y_train, y_test, df_recoleta = models.prepararDatos( file, test_run) data_input = { 'X_train': X_train, 'y_train': y_train, 'X_test': X_test, 'y_test': y_test, 'test_run': test_run, 'df': df_recoleta } result_lr = models.predict(data_input, 'lr') result_lasso = models.predict(data_input, 'lasso') result_ridge = models.predict(data_input, 'ridge') if (test_run == 'Simple'): data_complex = { 'LinearRegression': { 'score': result_lr['score'] }, 'Lasso': { 'score': result_lasso['score'], 'coef': {}, 'alpha_': '' }, 'Ridge': { 'score': result_ridge['score'], 'coef': {}, 'alpha_': '' } } else: data_complex = { 'LinearRegression': { 'score': result_lr['score'].tolist() }, 'Lasso': { 'score': result_lasso['score'].tolist(), 'coef': result_lasso['coef_'], 'alpha_': result_lasso['alpha_'] }, 'Ridge': { 'score': result_ridge['score'].tolist(), 'coef': result_ridge['coef_'], 'alpha_': result_ridge['alpha_'] } } results = { 'LinearRegression': { 'score': data_complex['LinearRegression']['score'], 'intercept': result_lr['intercept_'], 'coef': result_lr['coef_'].tolist(), 'r2': result_lr['r2'], 'mae': result_lr['MAE'], 'mse': result_lr['MSE'], 'rmse': result_lr['RMSE'] }, 'Lasso': { 'score': data_complex['Lasso']['score'], 'intercept': result_lasso['intercept_'], 'coef': data_complex['Lasso']['coef'], 'r2': result_lasso['r2'], 'mae': result_lasso['MAE'], 'mse': result_lasso['MSE'], 'rmse': result_lasso['RMSE'], 'alpha': data_complex['Lasso']['alpha_'] }, 'Ridge': { 'score': data_complex['Ridge']['score'], 'intercept': result_ridge['intercept_'], 'coef': data_complex['Ridge']['coef'], 'r2': result_ridge['r2'], 'mae': result_ridge['MAE'], 'mse': result_ridge['MSE'], 'rmse': result_ridge['RMSE'], 'alpha': data_complex['Ridge']['alpha_'] } } return jsonify({'result': results})
print_cv_metrics(metrics) save_cv_metrics(metrics=results, file_name=args.results_folder + 'exp1_raw-data_cv_cv-results.txt') print('\nPlotting algorithm comparison...') fig = plt.figure() fig.suptitle('Algorithm Comparison') ax = fig.add_subplot(111) plt.boxplot(scores) ax.set_xticklabels(names) ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5) plt.show() plt.savefig(args.graphs_folder + 'exp1_raw-data_alg-comparison.png') print('\nPredicting...') test_results = [] for name, model in SPOT_CHECK_MODELS: print("\nMaking predictions with %s..." % name) predictions = predict(model, X_train, y_train, X_test) metrics = calculate_metrics(y_test, predictions) metrics['model_name'] = name test_results.append(metrics) print_model_metrics(metrics) save_model_metrics(metrics=test_results, file_name=args.results_folder + 'exp1_raw-data_cv_model-results.txt')
def run_from_config(args): # parse config file params = parse_config_file(main_folder_path + args.path) #### training set #### # if flag is set to true generate a training set to given path if params["make-training-set"]: # get pdb ids from file pdb_ids = params["training-ids"] # if the first id is equal to 'all' then use all ids in dataset if pdb_ids[0] == "all": pdb_ids = prot_dataset.get_prot_list() # generate examples res, X, y = prot_dataset.generate_random_examples( pdb_ids, short_win=params["short-window"], large_win=params["large-window"], contact_threshold=params["contact-threshold"], ex_per_chain=params["examples-per-chain"]) # if balance flag is set to true if params["balance"]: # balance number of positive and negative examples res, X, y = prot_dataset.balance_neg_pos(res, X, y, params["positive-lb"]) # output dataset prot_dataset.training_set_out( X, y, main_folder_path + params["training-set-path"]) #### model fitting #### # if flag is set to true fit the model if params["fit-model"]: # parse training set training_set = prot_dataset.training_set_in( main_folder_path + params["training-set-path"]) # initialize the standard classifier predictor = models.make_predictor(model_type=params["model-type"], config=params[params["model-type"]], training_set=training_set, features=params["features"]) models.model_out(predictor, main_folder_path + params["trained-model-path"]) #### prediction #### # if flag is set to true predict given ids if params["predict"]: # load model from file predictor = models.model_in(main_folder_path + params["trained-model-path"]) pdb_ids = params["predict-ids"] # if specified in command, sobstiture if args.pdb_ids: pdb_ids = args.pdb_ids # if the first element is 'all' if pdb_ids[0] == "all": # clear list pdb_ids = [] # append every id that is not in the training set for pdb in prot_dataset.get_prot_list(): if pdb not in params["training-ids"]: pdb_ids.append(pdb) # check if pdb and ring file exists prot_dataset.download_pdb(pdb_ids) prot_dataset.download_ring(pdb_ids) # run predict command with given parameters models.predict(clf=predictor, pdb_ids=pdb_ids, features=params["features"], short_win=params["short-window"], large_win=params["large-window"], contact_threshold=params["contact-threshold"], path=main_folder_path + params["result-folder"], blur=params["probability-blur"], blur_w=params["probability-blur-len"]) return