def main(): with open("pipe.model") as f: model = pickle.load(f) preds = [] """ samplerate, samples = read("/Users/sunny/workspace/3d-printer/recordings/smartphone/1430177546499/1430177546499.wav") for i in range(0, len(samples), 220500): data, _ = train.prepare_data(samples[i:i+220500], samplerate, "00", 220500) preds.append(model.predict(data)) """ f = wave.open("/Users/sunny/workspace/3d-printer/recordings/smartphone/1430177546499/1430177546499.wav") #f = wave.open('/Users/sunny/workspace/3d-printer/recordings/smartphone/1461799968313/1461801907950.wav') f = wave.open("/Users/sunny/Desktop/1461950958243.wav") data_tot = [] pdb.set_trace() for i in range(0, f.getnframes(), 4410000): print i waveData = f.readframes(4410000) data_raw = np.reshape(np.fromstring(waveData, dtype='int16'), (-1, 2)) data = train.prepare_data((data_raw[:, 1] + data_raw[:, 0]) / 2, f.getframerate(), "00", 4410) data_tot.append(data[0]) preds.extend([int(elem) for elem in model.predict(data[0])]) #data = struct.unpack('hh', np.array_split(np.fromstring(waveData), 220500)) pdb.set_trace() preds_smoothed = signal.medfilt(preds, kernel_size=21) pdb.set_trace()
def tune_single_model(parameter_space, config_name, max_evals, trials=None): # Prepare train data. X, y = prepare_data(parameter_space['features'], parameter_space['image_feature_folders'], test=False) def train_wrapper(params): cv_losses, cv_train_losses = cross_validate(params, X, y) # return an object to be recorded in hyperopt trials for future uses return { 'loss': np.mean(cv_losses), 'train_loss': np.mean(cv_train_losses), 'status': STATUS_OK, 'eval_time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 'params': params } if trials is None: trials = Trials() # tuning parameters t1 = time.time() timestamp = datetime.now().strftime("%m-%d_%H:%M:%S") best = fmin(train_wrapper, parameter_space, algo=tpe.suggest, max_evals=max_evals, trials=trials) t2 = time.time() print('best trial get at round: ' + str(trials.best_trial['tid'])) print('best loss: ' + str(trials.best_trial['result']['loss'])) print(best) print(space_eval(parameter_space, best)) print("time: %s" %((t2-t1) / 60)) # save the experiment trials in a pickle if not os.path.exists(TRIALS_FOLDER): os.makedirs(TRIALS_FOLDER) # TODO: save tuning config when dump trials pickle. pickle.dump(trials, open("%s%s_%s" %(TRIALS_FOLDER, config_name, timestamp), "wb")) return trials
def classify(self, X): x, x_m = prepare_data(X, self.chardict, n_chars=self.n_char) vp = self.predict(x, x_m) print vp ranks = np.argsort(vp)[:, ::-1] preds = [] for idx, item in enumerate(X): preds.append(ranks[idx, :]) return [ranks[0] for ranks in preds][0]
def test_model(Xt, yt, model_path=MODEL_PATH): # Load model and dictionaries print("Loading model params...") params = load_params('%s/best_model.npz' % model_path) print("Loading dictionaries...") with open('%s/dict.pkl' % model_path, 'rb') as f: chardict = pkl.load(f) with open('%s/label_dict.pkl' % model_path, 'rb') as f: labeldict = pkl.load(f) n_char = len(chardict.keys()) + 1 n_classes = len(labeldict.keys()) print "#classes:", n_classes print labeldict print("Building network...") # Tweet variables tweet = T.itensor3() targets = T.imatrix() # masks t_mask = T.fmatrix() # network for prediction predictions = classify(tweet, t_mask, params, n_classes, n_char) # Theano function print("Compiling theano functions...") predict = theano.function([tweet, t_mask], predictions) # Test print("Testing...") preds = [] targs = [] # iterator over batches xr, y = list(BatchTweets(Xt, yt, labeldict, batch_size=N_BATCH))[0] print xr, y x, x_m = prepare_data(xr, chardict, n_chars=n_char) vp = predict(x, x_m) ranks = np.argsort(vp)[:, ::-1] for idx, item in enumerate(xr): preds.append(ranks[idx, :]) targs.append(y[idx]) print[ranks[0] for ranks in preds] # compute precision @1 validation_cost = precision(np.asarray(preds), targs, 1) print validation_cost
def main(): print("Running train_aml.py") parser = argparse.ArgumentParser("train") parser.add_argument( "--model_name", type=str, help="Name of the Model", default="diabetes_model.pkl", ) parser.add_argument("--step_output", type=str, help=("output for passing data to next step")) parser.add_argument("--dataset_version", type=str, help=("dataset version")) parser.add_argument("--data_file_path", type=str, help=("data file path, if specified,\ a new version of the dataset will be registered")) parser.add_argument( "--caller_run_id", type=str, help=("caller run id, for example ADF pipeline run id")) parser.add_argument("--dataset_name", type=str, help=("Dataset name. Dataset must be passed by name\ to always get the desired dataset version\ rather than the one used while the pipeline creation")) args = parser.parse_args() print("Argument [model_name]: %s" % args.model_name) print("Argument [step_output]: %s" % args.step_output) print("Argument [dataset_version]: %s" % args.dataset_version) print("Argument [data_file_path]: %s" % args.data_file_path) print("Argument [caller_run_id]: %s" % args.caller_run_id) print("Argument [dataset_name]: %s" % args.dataset_name) model_name = args.model_name step_output_path = args.step_output dataset_version = args.dataset_version data_file_path = args.data_file_path dataset_name = args.dataset_name run = Run.get_context() print("Getting training parameters") # Load the training parameters from the parameters file with open("parameters.json") as f: pars = json.load(f) try: train_args = pars["training"] except KeyError: print("Could not load training values from file") train_args = {} # Log the training parameters print(f"Parameters: {train_args}") for (k, v) in train_args.items(): run.log(k, v) run.parent.log(k, v) # Get the dataset if (dataset_name): if (data_file_path == 'none'): dataset = Dataset.get_by_name(run.experiment.workspace, dataset_name, dataset_version) # NOQA: E402, E501 else: dataset = register_dataset(run.experiment.workspace, dataset_name, os.environ.get("DATASTORE_NAME"), data_file_path) else: e = ("No dataset provided") print(e) raise Exception(e) # Link dataset to the step run so it is trackable in the UI run.input_datasets['training_data'] = dataset run.parent.tag("dataset_id", value=dataset.id) # Split the data into test/train df0 = dataset.to_pandas_dataframe() df = prepare_data(df0) data = split_data(df) # Train the model model = train_model(data, train_args) explainer = TabularExplainer(model, data["train"]["X"], features=df0.drop(['car name', 'mpg'], axis=1).columns) global_explanation = explainer.explain_global(data["test"]["X"]) client = ExplanationClient.from_run(run) client.upload_model_explanation(global_explanation, comment='MPG Predication Explanation') # Evaluate and log the metrics returned from the train function metrics = get_model_metrics(model, data) for (k, v) in metrics.items(): run.log(k, v) run.parent.log(k, v) # Pass model file to next step os.makedirs(step_output_path, exist_ok=True) model_output_path = os.path.join(step_output_path, model_name) joblib.dump(value=model, filename=model_output_path) # Also upload model file to run outputs for history os.makedirs('outputs', exist_ok=True) output_path = os.path.join('outputs', model_name) joblib.dump(value=model, filename=output_path) run.tag("run_type", value="train") print(f"tags now present for run: {run.tags}") run.complete()
batch_size = 500 def predict_on_thresholds(model, X_train, y_train, X_test, y_test): thresholds = np.linspace(0.499, 0.503, 1) for threshold in thresholds: threhold = 0.5 logs = predict_and_eval(model, X_train, y_train, X_test, y_test, threshold = threshold) metrics_line = 'threshold: %.5f - ' % threshold for s in ['loss', 'acc', 'precision', 'recall', 'fbeta_score']: metrics_line += "%s: %.5f %s: %.5f - " %(s, logs[s], 'val_'+s, logs['val_' +s]) print metrics_line if __name__ == '__main__': model_filename = "../results/00061-saved-model.h5" sequences_file = "../data/protein-seqs-2017-01-23-203946.txt" functions_file = "../data/protein-functions-2017-01-23-203946.txt" # reset logging config logging.basicConfig(format='%(asctime)s [%(levelname)7s] %(message)s', level=logging.DEBUG) X_train, y_train, X_test, y_test = prepare_data(sequences_file=sequences_file, functions_file=functions_file, target_function='0005524') print X_train.shape # load model model = load_model(model_filename) predict_on_thresholds(model, X_train, y_train, X_test, y_test)
def predict(model, poses_test, test_data): predictions_parents, predictions_rels = model.predict(poses_test, verbose=0) flat_predictions_parents = [i for x in predictions_parents for i in x] flat_predictions_rels = [i for x in predictions_rels for i in x] return flat_predictions_parents, flat_predictions_rels # return postprocess(flat_predictions_parents, flat_predictions_rels, predictions_parents, predictions_rels, test_data) if __name__ == '__main__': parser = argparse.ArgumentParser( description='Predict parents & relations.') # parser.add_argument('model_path', help='Path to file with saved model', type=str) parser.add_argument('path_test', help='Path to CONLL test file', type=str) args = parser.parse_args() model = load_model('generated/model_30e.h5') poses_test, parents_test, rels_test, _ = prepare_data(args.path_test, max_len=MAX_LEN) test_data = get_conll(args.path_test, max_len=MAX_LEN) flat_predictions_parents, flat_predictions_rels = predict( model, poses_test, test_data) write_predicted_output_to_conll(flat_predictions_parents, flat_predictions_rels, test_data, MAX_LEN, 'generated/output_test.conllu')
import argparse from train import prepare_data, train_and_save_model from real_time_inference import predict_real_time if __name__ == "__main__": parser = argparse.ArgumentParser( description="Work on Neural Network Models") parser.add_argument("--command", required=True, type=str) args = parser.parse_args() if args.command == "train_conv1": x_tr, x_val, y_tr, y_val = prepare_data(True) train_and_save_model(1, x_tr, x_val, y_tr, y_val) if args.command == "train_conv2": x_tr, x_val, y_tr, y_val = prepare_data(True) train_and_save_model(2, x_tr, x_val, y_tr, y_val) if args.command == "train_GRU": x_tr, x_val, y_tr, y_val = prepare_data(False) train_and_save_model(3, x_tr, x_val, y_tr, y_val) if args.command == "predict": model_num = int( input("Enter [1] for 1-Conv, [2] for 2-Conv, [3] for GRU: ")) predict_real_time(model_num)
def request(): import requests, json base_url = "http://localhost:8502/v1/models/mnist" status_url = base_url + "" print(requests.get(status_url).json()) metadata_url = base_url + "/metadata" print(requests.get(metadata_url).json()) predict_url = base_url + ":predict" from data_iter import TrainDataIter test_data = TrainDataIter(file_path="test_filter.csv", batch_size=10) from train import prepare_data tmp = None tt = None for data, target in test_data: tmp = data tt = target break user_ids, ad_ids, code_ids, ad_his, code_his, ad_mask, lengths_xx, target = prepare_data( tmp, tt, choose_len=0) base_ph = [ "uid_batch_ph", "mid_batch_ph", "cat_batch_ph", "mid_his_batch_ph", "cat_his_batch_ph", "mask", "seq_len_ph", ] data = {} data["uid_batch_ph"] = user_ids.tolist() # data["uid_batch_ph"] = [[3953]] [3953] data["mid_batch_ph"] = ad_ids.tolist() # data["mid_batch_ph"] = [[267]] [267] data["cat_batch_ph"] = code_ids.tolist() # data["cat_batch_ph"] = [[6]] [6] data["mid_his_batch_ph"] = ad_his.tolist() # data["mid_his_batch_ph"] = [[246]] [[246]] data["cat_his_batch_ph"] = code_his.tolist() # data["cat_his_batch_ph"] = [[7]] [[7]] data["seq_len_ph"] = lengths_xx.tolist() # data["seq_len_ph"] = [[1]] [1] data["mask"] = ad_mask.tolist() # data["mask"] = [[1.0]] [[1.0]] import pickle with open("data.pkl", "wb") as f: pickle.dump(data, f, 2) dd = { "signature_name": "serving", "instances": [{ "x": [1, 1, 1], "xx": [2, 2, 2] }] # "inputs": data.copy() } import time begin = time.time() resp = requests.post( predict_url, data=json.dumps(dd), ).json() print(resp) # data = resp["outputs"] # # print(len(data), ) # try: # print(len(data[0][0])) # except: # pass print(time.time() - begin)
import pickle import torch from train import prepare_data if __name__ == "__main__": number_of_generated_names = 10 minimal_generated_name_length = 6 model = torch.load('trained_models/english/cat_names/model.pt') model.load_state_dict(torch.load('trained_models/english/cat_names/model_dicts.pt')) char_to_ix, ix_to_char = pickle.load(open('trained_models/english/cat_names/dicts.pickle', 'rb')) model.eval() names = prepare_data('data/english/cat_names.txt') generated_names = [] while len(generated_names) < number_of_generated_names: prime_str = '<SOS>' prime_input = torch.tensor(char_to_ix[prime_str]).to(dtype=torch.long) model.init_hidden() _ = model(prime_input) input = prime_input predicted_char = '' word = prime_str i = 0 while predicted_char != '<EOS>': output = model(input)
from sigopt import Connection from config import (SIGOPT_API_TOKEN, PARAMETERS, EXPERIMENT_NAME, PROJECT_NAME, METRICS, OBSERVATION_BUDGET, DATASET_PATH) from train import prepare_data, evaluate_assignments conn = Connection(client_token=SIGOPT_API_TOKEN) experiment = conn.experiments().create(name=EXPERIMENT_NAME, project=PROJECT_NAME, parameters=PARAMETERS, metrics=METRICS, observation_budget=OBSERVATION_BUDGET) nb_classes, x_train, Y_train, x_test, Y_test = prepare_data(DATASET_PATH) q = Queue() while experiment.progress.observation_count < experiment.observation_budget: suggestion = conn.experiments(experiment.id).suggestions().create() p = Process(target=evaluate_assignments, args=(q, experiment, suggestion, x_train, Y_train, x_test, Y_test, nb_classes)) p.start() p.join() metrics, metadata = q.get() conn.experiments(experiment.id).observations().create( suggestion=suggestion.id, values=metrics, metadata=metadata)
type=bool, default=False, help='display algorithm output') args = parser.parse_args() warnings.filterwarnings('ignore') total_years = 20 total_dates = total_years * 365 intervals = range(6, 27) # 21 ticker = 'IBM' data = read_data(ticker, total_dates, intervals) labels = create_labels(data) data['labels'] = labels data.dropna(inplace=True) #if args.plot: # show_data(data[-300:]) if args.mode != 'n': feature_idx, start_col, end_col = select_features(data) model, params, mcp, rlp, es = create_model_cnn() x_test, y_test, x_cv, y_cv, x_train, y_train, sample_weights = prepare_data( data, start_col, end_col, feature_idx) if args.mode == 't' or args.mode == 'te': train(model, x_train, y_train, params, x_cv, y_cv, mcp, rlp, es, sample_weights) if args.mode == 'e' or args.mode == 'te': evaluate(x_test, y_test)
np.clip(predict, 0, 1, out=predict) val_error = math.sqrt(mean_squared_error(y_val, predict)) print('validate caculated: %f' % val_error) val_errors.append(val_error) predict = pd.Series(predict, name='predict') val_id = val_id.reset_index(drop=True) predict = pd.concat([val_id, predict], axis=1) predicts.append(predict) return predicts, val_errors config = config_map['xgboost_config'] ENSEMBLE_FOLDER = 'ensemble/' X, y = prepare_data(config['features'] + ['item_id'], config['image_feature_folders'], test=False) predicts, val_errors = ensemble_for_lgb(config, X, y) print('Avg validation error: %f' % (np.mean(val_errors))) for predict in predicts: print(predict.shape) ensembled = pd.concat(predicts) print(ensembled.shape) assert (ensembled.shape == (len(X), 2)) if not os.path.exists(ENSEMBLE_FOLDER): os.makedirs(ENSEMBLE_FOLDER) ensembled.to_csv(ENSEMBLE_FOLDER + 'xgboost.csv', index=False)