예제 #1
0
def main():
    with open("pipe.model") as f:
        model = pickle.load(f)

    preds = []
    """ samplerate, samples = read("/Users/sunny/workspace/3d-printer/recordings/smartphone/1430177546499/1430177546499.wav")

    for i in range(0, len(samples), 220500):
        data, _ = train.prepare_data(samples[i:i+220500], samplerate, "00", 220500)
        preds.append(model.predict(data))
"""

    f = wave.open("/Users/sunny/workspace/3d-printer/recordings/smartphone/1430177546499/1430177546499.wav")
    #f = wave.open('/Users/sunny/workspace/3d-printer/recordings/smartphone/1461799968313/1461801907950.wav')
    f = wave.open("/Users/sunny/Desktop/1461950958243.wav")
    data_tot = []
    pdb.set_trace()
    for i in range(0, f.getnframes(), 4410000):
        print i
        waveData = f.readframes(4410000)
        data_raw = np.reshape(np.fromstring(waveData, dtype='int16'), (-1, 2))

        data = train.prepare_data((data_raw[:, 1] + data_raw[:, 0]) / 2, f.getframerate(), "00", 4410)
        data_tot.append(data[0])
        preds.extend([int(elem) for elem in model.predict(data[0])])
        #data = struct.unpack('hh', np.array_split(np.fromstring(waveData), 220500))

    pdb.set_trace()
    
    preds_smoothed = signal.medfilt(preds, kernel_size=21)
    pdb.set_trace()
예제 #2
0
def tune_single_model(parameter_space, config_name, max_evals, trials=None):
    # Prepare train data.
    X, y = prepare_data(parameter_space['features'], parameter_space['image_feature_folders'], test=False)
    def train_wrapper(params):
        cv_losses, cv_train_losses = cross_validate(params, X, y)
        # return an object to be recorded in hyperopt trials for future uses
        return {
            'loss': np.mean(cv_losses),
            'train_loss': np.mean(cv_train_losses),
            'status': STATUS_OK,
            'eval_time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            'params': params
        }

    if trials is None:
        trials = Trials()
    # tuning parameters
    t1 = time.time()
    timestamp = datetime.now().strftime("%m-%d_%H:%M:%S")
    best = fmin(train_wrapper, parameter_space, algo=tpe.suggest, max_evals=max_evals, trials=trials)
    t2 = time.time()
    print('best trial get at round: ' + str(trials.best_trial['tid']))
    print('best loss: ' + str(trials.best_trial['result']['loss']))
    print(best)
    print(space_eval(parameter_space, best))
    print("time: %s" %((t2-t1) / 60))

    # save the experiment trials in a pickle
    if not os.path.exists(TRIALS_FOLDER):
        os.makedirs(TRIALS_FOLDER)
    # TODO: save tuning config when dump trials pickle.
    pickle.dump(trials, open("%s%s_%s" %(TRIALS_FOLDER, config_name, timestamp), "wb"))

    return trials
예제 #3
0
 def classify(self, X):
     x, x_m = prepare_data(X, self.chardict, n_chars=self.n_char)
     vp = self.predict(x, x_m)
     print vp
     ranks = np.argsort(vp)[:, ::-1]
     preds = []
     for idx, item in enumerate(X):
         preds.append(ranks[idx, :])
     return [ranks[0] for ranks in preds][0]
예제 #4
0
def test_model(Xt, yt, model_path=MODEL_PATH):
    # Load model and dictionaries
    print("Loading model params...")
    params = load_params('%s/best_model.npz' % model_path)
    print("Loading dictionaries...")
    with open('%s/dict.pkl' % model_path, 'rb') as f:
        chardict = pkl.load(f)
    with open('%s/label_dict.pkl' % model_path, 'rb') as f:
        labeldict = pkl.load(f)

    n_char = len(chardict.keys()) + 1
    n_classes = len(labeldict.keys())
    print "#classes:", n_classes
    print labeldict

    print("Building network...")

    # Tweet variables
    tweet = T.itensor3()
    targets = T.imatrix()

    # masks
    t_mask = T.fmatrix()

    # network for prediction
    predictions = classify(tweet, t_mask, params, n_classes, n_char)

    # Theano function
    print("Compiling theano functions...")
    predict = theano.function([tweet, t_mask], predictions)

    # Test
    print("Testing...")
    preds = []
    targs = []

    # iterator over batches
    xr, y = list(BatchTweets(Xt, yt, labeldict, batch_size=N_BATCH))[0]
    print xr, y

    x, x_m = prepare_data(xr, chardict, n_chars=n_char)
    vp = predict(x, x_m)
    ranks = np.argsort(vp)[:, ::-1]

    for idx, item in enumerate(xr):
        preds.append(ranks[idx, :])
        targs.append(y[idx])

    print[ranks[0] for ranks in preds]
    # compute precision @1
    validation_cost = precision(np.asarray(preds), targs, 1)
    print validation_cost
예제 #5
0
def main():
    print("Running train_aml.py")

    parser = argparse.ArgumentParser("train")
    parser.add_argument(
        "--model_name",
        type=str,
        help="Name of the Model",
        default="diabetes_model.pkl",
    )

    parser.add_argument("--step_output",
                        type=str,
                        help=("output for passing data to next step"))

    parser.add_argument("--dataset_version",
                        type=str,
                        help=("dataset version"))

    parser.add_argument("--data_file_path",
                        type=str,
                        help=("data file path, if specified,\
               a new version of the dataset will be registered"))

    parser.add_argument(
        "--caller_run_id",
        type=str,
        help=("caller run id, for example ADF pipeline run id"))

    parser.add_argument("--dataset_name",
                        type=str,
                        help=("Dataset name. Dataset must be passed by name\
              to always get the desired dataset version\
              rather than the one used while the pipeline creation"))

    args = parser.parse_args()

    print("Argument [model_name]: %s" % args.model_name)
    print("Argument [step_output]: %s" % args.step_output)
    print("Argument [dataset_version]: %s" % args.dataset_version)
    print("Argument [data_file_path]: %s" % args.data_file_path)
    print("Argument [caller_run_id]: %s" % args.caller_run_id)
    print("Argument [dataset_name]: %s" % args.dataset_name)

    model_name = args.model_name
    step_output_path = args.step_output
    dataset_version = args.dataset_version
    data_file_path = args.data_file_path
    dataset_name = args.dataset_name

    run = Run.get_context()

    print("Getting training parameters")

    # Load the training parameters from the parameters file
    with open("parameters.json") as f:
        pars = json.load(f)
    try:
        train_args = pars["training"]
    except KeyError:
        print("Could not load training values from file")
        train_args = {}

    # Log the training parameters
    print(f"Parameters: {train_args}")
    for (k, v) in train_args.items():
        run.log(k, v)
        run.parent.log(k, v)

    # Get the dataset
    if (dataset_name):
        if (data_file_path == 'none'):
            dataset = Dataset.get_by_name(run.experiment.workspace,
                                          dataset_name,
                                          dataset_version)  # NOQA: E402, E501
        else:
            dataset = register_dataset(run.experiment.workspace, dataset_name,
                                       os.environ.get("DATASTORE_NAME"),
                                       data_file_path)
    else:
        e = ("No dataset provided")
        print(e)
        raise Exception(e)

    # Link dataset to the step run so it is trackable in the UI
    run.input_datasets['training_data'] = dataset
    run.parent.tag("dataset_id", value=dataset.id)

    # Split the data into test/train
    df0 = dataset.to_pandas_dataframe()
    df = prepare_data(df0)
    data = split_data(df)

    # Train the model
    model = train_model(data, train_args)
    explainer = TabularExplainer(model,
                                 data["train"]["X"],
                                 features=df0.drop(['car name', 'mpg'],
                                                   axis=1).columns)
    global_explanation = explainer.explain_global(data["test"]["X"])
    client = ExplanationClient.from_run(run)
    client.upload_model_explanation(global_explanation,
                                    comment='MPG Predication Explanation')

    # Evaluate and log the metrics returned from the train function
    metrics = get_model_metrics(model, data)
    for (k, v) in metrics.items():
        run.log(k, v)
        run.parent.log(k, v)

    # Pass model file to next step
    os.makedirs(step_output_path, exist_ok=True)
    model_output_path = os.path.join(step_output_path, model_name)
    joblib.dump(value=model, filename=model_output_path)

    # Also upload model file to run outputs for history
    os.makedirs('outputs', exist_ok=True)
    output_path = os.path.join('outputs', model_name)
    joblib.dump(value=model, filename=output_path)

    run.tag("run_type", value="train")
    print(f"tags now present for run: {run.tags}")

    run.complete()
예제 #6
0
batch_size = 500

def predict_on_thresholds(model, X_train, y_train, X_test, y_test):
    thresholds = np.linspace(0.499, 0.503, 1)


    for threshold in thresholds:
        threhold = 0.5
        logs = predict_and_eval(model, X_train, y_train, X_test, y_test, threshold = threshold)

        metrics_line = 'threshold: %.5f - ' % threshold
        for s in ['loss', 'acc', 'precision', 'recall', 'fbeta_score']:
            metrics_line += "%s: %.5f %s: %.5f - " %(s, logs[s], 'val_'+s, logs['val_' +s])

        print metrics_line

if __name__ == '__main__':
    model_filename = "../results/00061-saved-model.h5"
    sequences_file = "../data/protein-seqs-2017-01-23-203946.txt"
    functions_file = "../data/protein-functions-2017-01-23-203946.txt"

    # reset logging config
    logging.basicConfig(format='%(asctime)s [%(levelname)7s] %(message)s', level=logging.DEBUG)
    X_train, y_train, X_test, y_test = prepare_data(sequences_file=sequences_file, functions_file=functions_file, target_function='0005524')

    print X_train.shape

    # load model
    model = load_model(model_filename)
    predict_on_thresholds(model, X_train, y_train, X_test, y_test)
예제 #7
0
def predict(model, poses_test, test_data):
    predictions_parents, predictions_rels = model.predict(poses_test,
                                                          verbose=0)

    flat_predictions_parents = [i for x in predictions_parents for i in x]

    flat_predictions_rels = [i for x in predictions_rels for i in x]

    return flat_predictions_parents, flat_predictions_rels
    # return postprocess(flat_predictions_parents, flat_predictions_rels, predictions_parents, predictions_rels, test_data)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='Predict parents & relations.')
    # parser.add_argument('model_path', help='Path to file with saved model', type=str)
    parser.add_argument('path_test', help='Path to CONLL test file', type=str)
    args = parser.parse_args()

    model = load_model('generated/model_30e.h5')

    poses_test, parents_test, rels_test, _ = prepare_data(args.path_test,
                                                          max_len=MAX_LEN)
    test_data = get_conll(args.path_test, max_len=MAX_LEN)
    flat_predictions_parents, flat_predictions_rels = predict(
        model, poses_test, test_data)

    write_predicted_output_to_conll(flat_predictions_parents,
                                    flat_predictions_rels, test_data, MAX_LEN,
                                    'generated/output_test.conllu')
예제 #8
0
파일: main.py 프로젝트: RCopJr/speech_recog
import argparse

from train import prepare_data, train_and_save_model
from real_time_inference import predict_real_time

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Work on Neural Network Models")
    parser.add_argument("--command", required=True, type=str)
    args = parser.parse_args()

    if args.command == "train_conv1":
        x_tr, x_val, y_tr, y_val = prepare_data(True)
        train_and_save_model(1, x_tr, x_val, y_tr, y_val)

    if args.command == "train_conv2":
        x_tr, x_val, y_tr, y_val = prepare_data(True)
        train_and_save_model(2, x_tr, x_val, y_tr, y_val)

    if args.command == "train_GRU":
        x_tr, x_val, y_tr, y_val = prepare_data(False)
        train_and_save_model(3, x_tr, x_val, y_tr, y_val)

    if args.command == "predict":
        model_num = int(
            input("Enter [1] for 1-Conv, [2] for 2-Conv, [3] for GRU: "))
        predict_real_time(model_num)
예제 #9
0
def request():
    import requests, json

    base_url = "http://localhost:8502/v1/models/mnist"
    status_url = base_url + ""
    print(requests.get(status_url).json())

    metadata_url = base_url + "/metadata"
    print(requests.get(metadata_url).json())

    predict_url = base_url + ":predict"

    from data_iter import TrainDataIter
    test_data = TrainDataIter(file_path="test_filter.csv", batch_size=10)
    from train import prepare_data
    tmp = None
    tt = None
    for data, target in test_data:
        tmp = data
        tt = target
        break

    user_ids, ad_ids, code_ids, ad_his, code_his, ad_mask, lengths_xx, target = prepare_data(
        tmp, tt, choose_len=0)

    base_ph = [
        "uid_batch_ph",
        "mid_batch_ph",
        "cat_batch_ph",
        "mid_his_batch_ph",
        "cat_his_batch_ph",
        "mask",
        "seq_len_ph",
    ]
    data = {}
    data["uid_batch_ph"] = user_ids.tolist()
    # data["uid_batch_ph"] = [[3953]]
    [3953]
    data["mid_batch_ph"] = ad_ids.tolist()
    # data["mid_batch_ph"] = [[267]]
    [267]
    data["cat_batch_ph"] = code_ids.tolist()
    # data["cat_batch_ph"] = [[6]]
    [6]
    data["mid_his_batch_ph"] = ad_his.tolist()
    # data["mid_his_batch_ph"] = [[246]]
    [[246]]
    data["cat_his_batch_ph"] = code_his.tolist()
    # data["cat_his_batch_ph"] = [[7]]
    [[7]]
    data["seq_len_ph"] = lengths_xx.tolist()
    # data["seq_len_ph"] = [[1]]
    [1]
    data["mask"] = ad_mask.tolist()
    # data["mask"] = [[1.0]]
    [[1.0]]

    import pickle
    with open("data.pkl", "wb") as f:
        pickle.dump(data, f, 2)

    dd = {
        "signature_name": "serving",
        "instances": [{
            "x": [1, 1, 1],
            "xx": [2, 2, 2]
        }]
        # "inputs": data.copy()
    }

    import time
    begin = time.time()
    resp = requests.post(
        predict_url,
        data=json.dumps(dd),
    ).json()
    print(resp)

    # data = resp["outputs"]
    #
    # print(len(data), )
    # try:
    #     print(len(data[0][0]))
    # except:
    #     pass
    print(time.time() - begin)
예제 #10
0
import pickle

import torch

from train import prepare_data

if __name__ == "__main__":
    number_of_generated_names = 10
    minimal_generated_name_length = 6
    model = torch.load('trained_models/english/cat_names/model.pt')
    model.load_state_dict(torch.load('trained_models/english/cat_names/model_dicts.pt'))
    char_to_ix, ix_to_char = pickle.load(open('trained_models/english/cat_names/dicts.pickle', 'rb'))

    model.eval()

    names = prepare_data('data/english/cat_names.txt')
    generated_names = []

    while len(generated_names) < number_of_generated_names:
        prime_str = '<SOS>'
        prime_input = torch.tensor(char_to_ix[prime_str]).to(dtype=torch.long)

        model.init_hidden()
        _ = model(prime_input)

        input = prime_input
        predicted_char = ''
        word = prime_str
        i = 0
        while predicted_char != '<EOS>':
            output = model(input)
예제 #11
0
from sigopt import Connection

from config import (SIGOPT_API_TOKEN, PARAMETERS, EXPERIMENT_NAME,
                    PROJECT_NAME, METRICS, OBSERVATION_BUDGET, DATASET_PATH)

from train import prepare_data, evaluate_assignments

conn = Connection(client_token=SIGOPT_API_TOKEN)

experiment = conn.experiments().create(name=EXPERIMENT_NAME,
                                       project=PROJECT_NAME,
                                       parameters=PARAMETERS,
                                       metrics=METRICS,
                                       observation_budget=OBSERVATION_BUDGET)

nb_classes, x_train, Y_train, x_test, Y_test = prepare_data(DATASET_PATH)
q = Queue()

while experiment.progress.observation_count < experiment.observation_budget:

    suggestion = conn.experiments(experiment.id).suggestions().create()

    p = Process(target=evaluate_assignments,
                args=(q, experiment, suggestion, x_train, Y_train, x_test,
                      Y_test, nb_classes))
    p.start()
    p.join()
    metrics, metadata = q.get()

    conn.experiments(experiment.id).observations().create(
        suggestion=suggestion.id, values=metrics, metadata=metadata)
예제 #12
0
                        type=bool,
                        default=False,
                        help='display algorithm output')

    args = parser.parse_args()
    warnings.filterwarnings('ignore')

    total_years = 20
    total_dates = total_years * 365
    intervals = range(6, 27)  # 21
    ticker = 'IBM'

    data = read_data(ticker, total_dates, intervals)
    labels = create_labels(data)
    data['labels'] = labels
    data.dropna(inplace=True)

    #if args.plot:
    #    show_data(data[-300:])

    if args.mode != 'n':
        feature_idx, start_col, end_col = select_features(data)
        model, params, mcp, rlp, es = create_model_cnn()
        x_test, y_test, x_cv, y_cv, x_train, y_train, sample_weights = prepare_data(
            data, start_col, end_col, feature_idx)
        if args.mode == 't' or args.mode == 'te':
            train(model, x_train, y_train, params, x_cv, y_cv, mcp, rlp, es,
                  sample_weights)
        if args.mode == 'e' or args.mode == 'te':
            evaluate(x_test, y_test)
예제 #13
0
        np.clip(predict, 0, 1, out=predict)

        val_error = math.sqrt(mean_squared_error(y_val, predict))
        print('validate caculated: %f' % val_error)
        val_errors.append(val_error)

        predict = pd.Series(predict, name='predict')
        val_id = val_id.reset_index(drop=True)
        predict = pd.concat([val_id, predict], axis=1)
        predicts.append(predict)
    return predicts, val_errors


config = config_map['xgboost_config']
ENSEMBLE_FOLDER = 'ensemble/'

X, y = prepare_data(config['features'] + ['item_id'],
                    config['image_feature_folders'],
                    test=False)
predicts, val_errors = ensemble_for_lgb(config, X, y)

print('Avg validation error: %f' % (np.mean(val_errors)))
for predict in predicts:
    print(predict.shape)
ensembled = pd.concat(predicts)
print(ensembled.shape)
assert (ensembled.shape == (len(X), 2))

if not os.path.exists(ENSEMBLE_FOLDER):
    os.makedirs(ENSEMBLE_FOLDER)
ensembled.to_csv(ENSEMBLE_FOLDER + 'xgboost.csv', index=False)