Ejemplo n.º 1
0
def create_lang_dataset(out_data_path, min_samples=0):

    if out_data_path[-1] != '/':
        out_data_path = out_data_path + '/'
    if min_samples <= 0:
        min_samples = 2 ** 20

    en_input, en_output = get_data("accent", english_dataset_path, file_list[5], out_accent_file)
    fr_input, fr_output = get_data("accent", french_dataset_path, file_list[5], out_accent_file)
    de_input, de_output = get_data("accent", german_dataset_path, file_list[5], out_accent_file)
    en_input = en_input[:min_samples]
    fr_input = fr_input[:min_samples]
    de_input = de_input[:min_samples]
    en_output = ["english" for _ in range(len(en_input))]
    fr_output = ["french" for _ in range(len(fr_input))]
    de_output = ["german" for _ in range(len(de_input))]
    inputs = en_input + fr_input + de_input
    outputs = en_output + fr_output + de_output

    print(len(inputs))
    print(len(outputs))
    print(get_count(outputs))

    get_features(out_data_path + "lang_", inputs, ['delta', 'delta2', 'sdc'])
    write_to_file_labels(out_data_path + "lang_out", outputs)
    in_files = ["lang_input" + str(i + 1) for i in range(6)]
    concat_files(out_data_path, in_files, "lang_in")
Ejemplo n.º 2
0
def create_gender_dataset(out_data_path, min_samples=0):
    """
    create the files holding the data for the gender prediction model
    :param out_data_path: where to save the files
    :param min_samples: minimum number of samples
    """
    if out_data_path[-1] != '/':
        out_data_path = out_data_path + '/'
    if min_samples <= 0:
        min_samples = 2 ** 20

    en_input, en_output = get_data(["gender", "age"], english_dataset_path,
                                   file_list[5], out_gender_file)

    inputs, outputs = clean_gender_dataset(en_input, en_output)
    inputs, outputs = create_equal_dataset(inputs, outputs, min_samples)

    print(len(inputs))
    print(len(outputs))
    print(get_count(outputs))

    get_features(out_data_path + "gender_", inputs, ['delta', 'delta2', 'pitch'])
    write_to_file_labels(out_data_path + "gender_out", outputs)
    in_files = ["gender_input" + str(i + 1) for i in range(6)]
    concat_files(out_data_path, in_files, "gender_in")
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--out", default=".")
    parser.add_argument("--modes", action='append', required=True)
    parser.add_argument("--sets", action='append', required=True)
    parser.add_argument("--normalize", default=True)
    parser.add_argument("model_path", help="Pylearn2 model")

    options = parser.parse_args()

    from extract_features import get_features
    from emotiw.bouthilx.datasets import FeaturesDataset

    out = options.out
    d_modes = options.modes
    sets = options.sets
    model_path = options.model_path
    normalize = options.normalize

    targets = os.path.join(base_path, "afew2_train_targets.npy")

    from theano import config
    from theano import function
    for s in sets:
        features = [
            os.path.join(base_path, modes[mode], base_name % s)
            for mode in d_modes
        ]
        fd = FeaturesDataset(features, targets, "", normalize, shuffle=False)
        data = np.cast[config.floatX](fd.get_design_matrix())
        preds = get_features(model_path, data, layer_idx=None)

        np.save(os.path.join(out, "_".join(d_modes) + "_" + s), preds)
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--out",default=".")
    parser.add_argument("--modes",action='append',required=True)
    parser.add_argument("--sets",action='append',required=True)
    parser.add_argument("--normalize",default=True)
    parser.add_argument("model_path",help="Pylearn2 model")

    options = parser.parse_args()

    from extract_features import get_features
    from emotiw.bouthilx.datasets import FeaturesDataset

    out = options.out
    d_modes = options.modes
    sets = options.sets
    model_path = options.model_path
    normalize = options.normalize

    targets = os.path.join(base_path,"afew2_train_targets.npy")
    
    from theano import config
    from theano import function
    for s in sets:
        features = [os.path.join(base_path,modes[mode],base_name % s) for mode in d_modes]
        fd = FeaturesDataset(features,targets,"",normalize,shuffle=False)
        data = np.cast[config.floatX](fd.get_design_matrix())
        preds = get_features(model_path,data,layer_idx=None)

        np.save(os.path.join(out,"_".join(d_modes)+"_"+s),preds)
Ejemplo n.º 5
0
def create_age_dataset(out_data_path, min_samples=0):
    if out_data_path[-1] != '/':
        out_data_path = out_data_path + '/'
    if min_samples <= 0:
        min_samples = 2 ** 20

    en_input, en_output = get_data("age", english_dataset_path, file_list[5], out_age_file)

    inputs, outputs = clean_age_dataset(en_input, en_output)
    inputs, outputs = create_equal_dataset(inputs, outputs, min_samples)

    print(len(inputs))
    print(len(outputs))
    print(get_count(outputs))

    get_features(out_data_path + "age_", inputs, ['delta', 'delta2', 'pitch'])
    write_to_file_labels(out_data_path + "age_out", outputs)
    in_files = ["age_input" + str(i + 1) for i in range(6)]
    concat_files(out_data_path, in_files, "age_in")
Ejemplo n.º 6
0
def calculate_featuresX(filename, a, sw):
    # All samples for activity
    X = genfromtxt(filename, delimiter=' ')
    i = 0
    # Get functions for features
    features = extract_features.generate_features()
    # Calculated features matrix
    outf = None
    while i + sw < X.shape[0]:
        fx = extract_features.get_features(X[i:i+sw,0], features)
        fy = extract_features.get_features(X[i:i+sw,1], features)
        fz = extract_features.get_features(X[i:i+sw,2], features)
        # Concatenate vectores for axis
        feat = np.concatenate((fx, fy, fx, [a]))
        if type(outf).__module__ != np.__name__:
            outf = feat
        else:
            # Concatenate matrices
            outf = np.vstack((outf, feat))
        # Move window
        i += sw/2
    savetxt('../data/huawei-p7/' + filename.split('/')[-1].split('.')[0] + 'X.txt', outf, delimiter=',')
Ejemplo n.º 7
0
def engine_evaluate(position):
    '''
    The zero-search engine's evaluation of `position`; a higher number
    means that the engine evaluates that the position favors white.
    '''
    x_unscaled = np.array([extract_features.get_features(position)
                           ]).astype(float)
    x_scaled = train.scaler_X.transform(x_unscaled)
    y_scaled = scaled_evaluation = model.predict(
        extract_features.split_features(x_scaled))
    y_unscaled = unscaled_evaluation = train.scaler_Y.inverse_transform(
        y_scaled)
    return unscaled_evaluation[0][0]
Ejemplo n.º 8
0
def make_prediction(url):

    features = get_features(url)
    #print(features)
    features_extracted = convertEncodingToPositive(features)
    #print(features_extracted)

    from sklearn.preprocessing import OneHotEncoder
    encoder = OneHotEncoder(sparse=False)

    one_hot_enc = pickle.load(open("One_Hot_Encoder", "rb"))
    transformed_point = one_hot_enc.transform(
        np.array(features_extracted).reshape(1, -1))

    model = pickle.load(open("RF_Final_Model.pkl", "rb"))
    prediction = model.predict(transformed_point)[0]

    return prediction
Ejemplo n.º 9
0
def predict(filename, le, model_file):

    model = load_model(model_file)
    prediction_feature = extract_features.get_features(filename)
    if model_file == "trained_mlp.h5":
        prediction_feature = np.array([prediction_feature])
    elif model_file == "trained_cnn.h5":
        prediction_feature = np.expand_dims(np.array([prediction_feature]),
                                            axis=2)

    predicted_vector = model.predict_classes(prediction_feature)
    predicted_class = le.inverse_transform(predicted_vector)
    print("Predicted class", predicted_class[0])
    predicted_proba_vector = model.predict_proba([prediction_feature])

    predicted_proba = predicted_proba_vector[0]
    for i in range(len(predicted_proba)):
        category = le.inverse_transform(np.array([i]))
        print(category[0], "\t\t : ", format(predicted_proba[i], '.32f'))
def predict(filename, model_file):

    model = load_model(model_file)
    prediction_feature = extract_features.get_features(filename)
    if model_file == "trained_mlp.h5":
        prediction_feature = np.array([prediction_feature])
    elif model_file == "trained_cnn.h5":
        prediction_feature = np.expand_dims(np.array([prediction_feature]),
                                            axis=2)

    predicted_vector = model.predict_classes(prediction_feature)
    #predicted_class = le.inverse_transform(predicted_vector)
    classes = predicted_vector[0]
    #print("Predicted: ",classes)
    #print("Predicted class",predicted_class[0])
    predicted_proba_vector = model.predict_proba([prediction_feature])

    predicted_proba = predicted_proba_vector[0]
    #print("Prob:" ,predicted_proba[1])
    return predicted_proba[1] * 100
    '''
Ejemplo n.º 11
0
def main(_):
    trn_snt_files = [
        # '../datasets/training/as_simplified_training.utf8',
        '../datasets/training/cityu_simplified_training.utf8',
        '../datasets/training/msr_training.utf8',
        '../datasets/training/pku_training.utf8'
    ]
    trn_lbl_files = [splitext(f)[0] + '.bies' for f in trn_snt_files]

    tf.logging.info('Loading training data...')
    trn_examples = read_examples(trn_snt_files)
    trn_features = np.asarray(get_features(trn_examples, ''))
    y_trn = read_labels(trn_lbl_files)

    tf.logging.info('Creating model...')
    model = create_model()
    model.summary()

    tf.logging.info('Training model...')
    epochs = 1
    batch_size = 32
    steps = int(len(trn_features) / batch_size)
    for epoch in range(epochs):
        print('Epoch', epoch + 1)
        for uni_b, lbl_b in tqdm(train_data_generator([trn_features, y_trn],
                                                      batch_size,
                                                      shuffle=True),
                                 desc='Training Loop',
                                 total=steps):
            try:
                loss, acc = model.train_on_batch(uni_b, lbl_b)
                # print('Loss:', loss, 'Acc:', acc)
            except Exception as e:
                print(e)

    model.save('combined_bert_model.h5')
Ejemplo n.º 12
0
'''


def random_guess():
    data_random = {}

    data_random['domain'] = random.sample(domain_value_list, 1)[0]
    data_random['intent'] = random.sample(intent_value_list, 1)[0]
    slot = {}
    slot[random.sample(slots_key_list, 1)[0]] = \
        random.sample(slots_value_list, 1)[0]
    data_random['slots'] = slot

    return data_random


if __name__ == '__main__':
    import json
    dev_dct = json.load(open(sys.argv[1]), encoding='utf8')

    domain_value_list, intent_value_list, slots_key_list, slots_value_list = get_features(
        sys.argv[1])

    rguess_dct = []
    for dev_data in dev_dct:
        text_dic = {"text": dev_data['text']}
        rguess_dct.append(dict(text_dic, **random_guess()))
    json.dump(rguess_dct, open(
        sys.argv[2],
        'w',
    ))
Ejemplo n.º 13
0
def create_data(n_samples, verbose=False):
    file_game_pgns, file_stockfish_evals = (
        open(
            '/Users/colinni/evAl-chess/game_database.pgn',
            encoding='utf-8-sig',
            errors='surrogateescape'
        ),
        open('/Users/colinni/evAl-chess/stockfish_evaluations.csv')
    )
    # Discard the first line; it contains headers.
    file_stockfish_evals.readline()

    # The accumulated data samples.
    data_X, data_Y = [],[]

    # Iterate through every game in the archive.
    curr_game = chess.pgn.read_game(file_game_pgns)
    n_curr_sample = 0
    # (`chess.pgn.read_game()` returns None when it reaches the EOF.)
    while curr_game is not None and n_curr_sample < n_samples:

        print('\rcurr game |', n_curr_sample, end='')

        # The evaluations of each position of each game.
        stockfish_evals = (
            # The evaluations are given in centi-pawns. Convert to the
            # more standard pawn scale.
            float(stockfish_eval) / 100.0
            # Stockfish gives 'NA' for forced mates.
            if stockfish_eval != 'NA'
            else None
            # The lines each begin with a number and comma
            # (e.g., '451,') which aren't part of the evaluations.
            # Discard by splitting the string by the comma, taking the
            # second part, and splitting once again to get the
            # individual numbers.
            for stockfish_eval in (
                file_stockfish_evals.readline()
                .split(',')[1]
                .split()
            )
        )

        # Iterate through every move played using the `chess.Game`
        # class.
        curr_game_node = (
            curr_game.root().variation(0)
            if not curr_game.root().variation(0).is_end()
            else None
        )
        # Setting `curr_game_node` to `None` as a flag is sloppy, but
        # the `chess.Game` class doesn't have a better way of detecting
        # 0-move games, which the database does contain.
        while curr_game_node is not None and n_curr_sample < n_samples:
            features, stockfish_eval = (
                extract_features.get_features(
                    curr_game_node.board(),
                    verbose=verbose
                ),
                next(stockfish_evals)
            )
            # Stockfish gives 'NA' for forced mates, which we earlier
            # set to `None`.
            if stockfish_eval is not None:
                data_X.append(features)
                data_Y.append(stockfish_eval)
                n_curr_sample += 1

            # Set curr_game_node to the next position in the game. If
            # it's the end of the game, set it to None as a flag.
            curr_game_node = (
                curr_game_node.variation(0)
                if not curr_game_node.is_end()
                else None
            )

        # Get the next game in the pgn file.
        curr_game = chess.pgn.read_game(file_game_pgns)

    # Convert `data_X` and `data_Y` into numpy arrays and store them
    # in numpy's npy format. To load, `np.load(path)`.
    np.save('../evAl-chess/X.npy', np.array(data_X).astype(float))
    np.save('../evAl-chess/Y.npy', np.array(data_Y))
Ejemplo n.º 14
0
def train_imageAVmodel():

    training_file = sys.argv[1]
    data_directory = sys.argv[2]
    parameter_file = 'params.json'
    
    params = json.loads(open(parameter_file).read())

    if params['extract_features'] == 'true':
        x_raw, y_raw = get_features(training_file, data_directory, params['vgg_file'], params['gistFile'], params['semF_file'])
        with open('x_data.pickle', 'wb') as f:
            pickle.dump(x_raw, f, protocol=pickle.HIGHEST_PROTOCOL)
        with open('y_data.pickle', 'wb') as f:
            pickle.dump(y_raw, f, protocol=pickle.HIGHEST_PROTOCOL)
    else:
        with open(params['x_file'], 'rb') as f:
            x_raw = pickle.load(f)
        with open(params['y_file'], 'rb') as g:
            y_raw = pickle.load(g)

    x = np.array(x_raw)
    y = np.array(y_raw)


    """ randomly shuffle data """
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    x_shuffled = x[shuffle_indices]
    y_shuffled = y[shuffle_indices]

    """ split the original dataset into train_ and test sets"""
    x_, x_test, y_, y_test = train_test_split(x_shuffled, y_shuffled, test_size=0.1, random_state=42)


    """ shuffle the train_ set and split the train set into train and val sets"""
    shuffle_indices = np.random.permutation(np.arange(len(y_)))
    x_shuffled = x_[shuffle_indices]
    y_shuffled = y_[shuffle_indices]
    x_train, x_val, y_train, y_val = train_test_split(x_shuffled, y_shuffled, test_size=0.1)

    logging.info('x_train: {}, x_val: {}, x_test: {}'.format(len(x_train), len(x_val), len(x_test)))
    logging.info('y_train: {}, y_val: {}, y_test: {}'.format(len(y_train), len(y_val), len(y_test)))


    """ build a graph """
    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            imageAV = ImageAVmodel(
                input_length = x_train.shape[1],
                num_neurons_in_layers = params['num_neurons_in_layers'] )

            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.MomentumOptimizer(params['learning_rate'], params['momentum'])
            # grads_and_vars = optimizer.compute_gradients(imageAV.loss)
            train_op = optimizer.minimize(imageAV.loss, global_step=global_step)

            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "modelData/trained_model_" + timestamp))

            saved_model_dir = os.path.abspath(os.path.join(out_dir, "saved_model"))
            checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables())

            if params['warm_start'] == 'true':
                saver.restore(sess, params['save_path'])
                logginf.info('Model loaded from {}'.format(params['save_path']))


            # One training step: train the model with one batch
            def train_step(x_batch, y_batch):
                y_batch = np.reshape(y_batch, (len(y_batch),1))
                feed_dict = {
                    imageAV.input_x: x_batch,
                    imageAV.input_y: y_batch}
                _, step, loss, loss_S = sess.run([train_op, global_step, imageAV.loss, imageAV.loss_summary], feed_dict)
                return loss, loss_S

            # One evaluation step: evaluate the model with one batch
            def val_step(x_batch, y_batch):
                y_batch = np.reshape(y_batch, (len(y_batch),1))
                feed_dict = {imageAV.input_x: x_batch, imageAV.input_y: y_batch}
                step, loss, loss_S = sess.run([global_step, imageAV.loss, imageAV.loss_summary], feed_dict)
                return loss, loss_S


            sess.run(tf.global_variables_initializer())
            writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())

            train_batches = batch_iter(list(zip(x_train, y_train)), params['batch_size'],params['num_epochs'])
            min_loss, min_at_step = float("inf"), 0

            logging.info('<--------------Training has begun--------------->')

            """ train the cnn model with x_train and y_train (batch by batch)"""
            for train_batch in train_batches:
                x_train_batch, y_train_batch = zip(*train_batch)
                train_loss, _train_loss_summary = train_step(x_train_batch, y_train_batch)
                current_step = tf.train.global_step(sess, global_step)
                logging.debug('Train Step {}, Loss: {}'.format(current_step,train_loss))
                writer.add_summary(_train_loss_summary, current_step)
                
                """ evaluate the model with x_val and y_val (batch by batch)"""
                if current_step % params['evaluate_every'] == 0:
                    val_batches = batch_iter(list(zip(x_val, y_val)), params['batch_size'], 1)
                    total_val_loss = 0.0
                    for val_batch in val_batches:
                        x_val_batch, y_val_batch = zip(*val_batch)
                        val_loss, _val_loss_summary = val_step(x_val_batch, y_val_batch)
                        total_val_loss += val_loss

                    writer.add_summary(_val_loss_summary, current_step)

                    # avg_val_loss = total_val_loss/len(y_val)
                    logging.info('At step {},  Total loss on val set: {}'.format(current_step, total_val_loss))
                    # logging.info('At step {}, Average loss on val set: {}'.format(current_step, avg_val_loss))

                    """ save the model if it is the best based on loss on the val set """
                    if total_val_loss <= min_loss:
                        min_loss, min_at_step = total_val_loss, current_step
                        path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                        logging.debug('Saved model {} at step {}'.format(path, min_at_step))
                        logging.debug('Best accuracy {} at step {}'.format(min_loss, min_at_step))

            """ predict x_test (batch by batch)"""
            test_batches = batch_iter(list(zip(x_test, y_test)), params['batch_size'], 1)
            total_test_loss = 0.0
            logging.info("Testing Now.")
            for test_batch in test_batches:
                x_test_batch, y_test_batch = zip(*test_batch)
                test_loss, _test_loss_summary = val_step(x_test_batch, y_test_batch)
                total_test_loss += test_loss

            # avg_test_loss = total_test_loss/len(y_test)
            logging.info('Total loss on the test set is {} based on the best model {}'.format(total_test_loss, path))
            # logging.critical('Average loss on test set is {} based on the best model {}'.format(avg_test_loss, path))
            logging.info('The training is complete.')

            """ saving the model """