Ejemplo n.º 1
0
def build_train_dev_data(file_src, file_tgt, file_align, mode):
    temp_train_file = tempfile.NamedTemporaryFile(delete=False)
    temp_dev_file = tempfile.NamedTemporaryFile(delete=False)

    train_file = open(temp_train_file.name, 'w')
    dev_file = open(temp_dev_file.name, 'w')

    dev_lines = 1000
    n = 0
    outfile = dev_file
    with open(file_src) as srcfile, open(file_tgt) as tgtfile, open(file_align) as alignfile:
        for x, y, a in zip(srcfile, tgtfile, alignfile):
            if(n == dev_lines):
                outfile = train_file
            x = x.strip()
            y = y.strip()
            a = a.strip()
            outfile.write("{0}\t{1}\t{2}\n".format(x, y, a))
            n += 1

    train_file.close()
    dev_file.close()

    temp_train_data_file = tempfile.NamedTemporaryFile(delete=False)
    temp_dev_data_file = tempfile.NamedTemporaryFile(delete=False)

    build_data(['', '-data', temp_train_file.name, '-mode', mode, '-output', temp_train_data_file.name])
    build_data(['', '-data', temp_dev_file.name, '-mode', mode, '-output', temp_dev_data_file.name])

    os.remove(temp_train_file.name)
    os.remove(temp_dev_file.name)
    return temp_train_data_file.name, temp_dev_data_file.name
Ejemplo n.º 2
0
    def train(self, subset=[]):
        print("\n### Train MLP - %s" % self.database)
        Y = self.labels
        X = self.images
        if subset != []:
            Y = Y[subset]
            X = X[subset]

# X = X#build_data(images)
        X = build_data(X)
        self.mean = np.mean(X, axis=(0, 1, 2))
        self.std = np.std(X, axis=(0, 1, 2))
        X = self.normalize(X)
        (n_samples, height, width, p) = X.shape
        X = X.reshape(n_samples * height * width, p)
        Y = Y.reshape(n_samples * height * width, 1)

        # Train
        Y_train_vector = (Y == np.arange(self.nb_labels)) * 1
        mlp = MLPClassifier(hidden_layer_sizes=params.hidden_layer_sizes,
                            max_iter=params.max_iter,
                            alpha=params.alpha,
                            solver=params.solver,
                            activation=params.activation,
                            verbose=params.verbose,
                            tol=params.tol,
                            random_state=params.random_state,
                            learning_rate_init=params.learning_rate_init)
        mlp.fit(X, Y_train_vector)
        print("Done.")
        self.model = mlp
Ejemplo n.º 3
0
    index_files_dict['valid'] = [
        PREFIX_INDEX_FOLDER + "debug_valid.txt",
        # PREFIX_INDEX_FOLDER + "bouliane_valid.txt",
        # PREFIX_INDEX_FOLDER + "hand_picked_Spotify_valid.txt",
        # PREFIX_INDEX_FOLDER + "liszt_classical_archives_valid.txt"
    ]
    index_files_dict['test'] = [
        PREFIX_INDEX_FOLDER + "debug_test.txt",
        # PREFIX_INDEX_FOLDER + "bouliane_test.txt",
        # PREFIX_INDEX_FOLDER + "hand_picked_Spotify_test.txt",
        # PREFIX_INDEX_FOLDER + "liszt_classical_archives_test.txt"
    ]

    build_data(index_files_dict=index_files_dict,
               meta_info_path=data_folder + '/temp.p',
               quantization=script_param['quantization'],
               temporal_granularity=script_param['temporal_granularity'],
               store_folder=data_folder,
               logging=logging)

############################################################
# Hyper parameter space
############################################################
model_space = Model_class.get_hp_space()
optim_space = Optimization_method.get_hp_space()
space = {'model': model_space, 'optim': optim_space, 'train': train_param, 'script': script_param}

############################################################
# MongoDB
############################################################
host = "localhost"
port = 27017
Ejemplo n.º 4
0
        raise ValueError(
            'The configuration file and the timestamp should be specified.')

    es_file = sys.argv[3] + "/es_" + sys.argv[2] + ".txt"
    es_epoch = sys.maxsize
    if os.path.isfile(es_file) == True:
        with open(es_file, 'r') as myfile:
            es_epoch = int(myfile.read())
            myfile.close()
    return es_epoch


if __name__ == "__main__":

    es_epoch = checkInputs()
    config = build_data(sys.argv[1])
    config.train_id_docs.extend(config.dev_id_docs)
    train_data = utils.HeadData(config.train_id_docs, np.arange(len(config.train_id_docs)))
    test_data = utils.HeadData(config.test_id_docs, np.arange(len(config.test_id_docs)))
    tf.reset_default_graph()
    tf.set_random_seed(1)
    utils.printParameters(config)

    # ---- Training ----
    config1 = tf.ConfigProto()
    config1.gpu_options.per_process_gpu_memory_fraction = 0.85
    with tf.Session(config=config1) as sess:
        # saver = tf.train.import_meta_graph('model.ckpt.meta')
        # saver.restore(sess, 'model.ckpt')
        embedding_matrix = tf.get_variable('embedding_matrix', shape=config.wordvectors.shape, dtype=tf.float32,
                                           trainable=False).assign(config.wordvectors)
Ejemplo n.º 5
0
    if (len(sys.argv) <= 3) or os.path.isfile(sys.argv[0]) == False:
        raise ValueError(
            'The configuration file and the timestamp should be specified.')


import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

gpuConfig = tf.ConfigProto(allow_soft_placement=True)
gpuConfig.gpu_options.allow_growth = True

if __name__ == "__main__":

    # checkInputs()

    config = build_data("./configs/CoNLL04/bio_config")

    train_data = utils.HeadData(config.train_id_docs,
                                np.arange(len(
                                    config.train_id_docs)))  ## build data
    dev_data = utils.HeadData(config.dev_id_docs,
                              np.arange(len(config.dev_id_docs)))
    test_data = utils.HeadData(config.test_id_docs,
                               np.arange(len(config.test_id_docs)))

    tf.reset_default_graph()
    tf.set_random_seed(1)

    utils.printParameters(config)

    with tf.Session(config=gpuConfig) as sess:
Ejemplo n.º 6
0
    if (len(sys.argv) <= 3) or os.path.isfile(sys.argv[0]) == False:
        raise ValueError(
            'The configuration file and the timestamp should be specified.')


import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

gpuConfig = tf.ConfigProto(allow_soft_placement=True)
gpuConfig.gpu_options.allow_growth = True

if __name__ == "__main__":

    # checkInputs()

    config = build_data("./configs/ADE/bio_config")

    train_data = utils.HeadData(config.train_id_docs,
                                np.arange(len(
                                    config.train_id_docs)))  ## build data
    # dev_data = utils.HeadData(config.dev_id_docs, np.arange(len(config.dev_id_docs)))
    # test_data = utils.HeadData(config.test_id_docs, np.arange(len(config.test_id_docs)))
    train_data, dev_data = train_data.split(0.2)
    dev_data, test_data = dev_data.split(0.5)

    tf.reset_default_graph()
    tf.set_random_seed(1)

    utils.printParameters(config)

    with tf.Session(config=gpuConfig) as sess:
Ejemplo n.º 7
0
from build_data import build_data

import numpy as np
from sklearn.neural_network import MLPClassifier

from skimage import color
from matplotlib import pyplot as plt

folder = '../Corel_Dataset/'
images = np.load(folder + 'images_lab.npy')
labels = np.load(folder + 'labels.npy')

(n_samples, height, width, p) = images.shape
Y = labels.reshape(n_samples, width * height)

X = build_data(images, labels)
(n_samples, height, width, size_input) = np.shape(X)


# Shuffle the images
np.random.seed(3)
order = np.arange(n_samples)
np.random.shuffle(order)
# X = X[order]
# Y = Y[order]



# Build training data
nb_labels = len(np.unique(Y))
train_size = 60
Ejemplo n.º 8
0
            PREFIX_INDEX_FOLDER + "hand_picked_Spotify_valid.txt",
            PREFIX_INDEX_FOLDER + "liszt_classical_archives_valid.txt"
        ]
        index_files_dict['test'] = [
            # PREFIX_INDEX_FOLDER + "debug_test.txt",
            PREFIX_INDEX_FOLDER + "bouliane_test.txt",
            PREFIX_INDEX_FOLDER + "hand_picked_Spotify_test.txt",
            PREFIX_INDEX_FOLDER + "liszt_classical_archives_test.txt"
        ]

        if not os.path.isdir(data_folder):
            os.mkdir(data_folder)

        build_data(index_files_dict=index_files_dict,
                   meta_info_path=LOCAL_SCRATCH + '/Data/temp.p',
                   quantization=quantization,
                   temporal_granularity=temporal_granularity,
                   store_folder=LOCAL_SCRATCH + '/Data',
                   logging=logging)
    else:
        logging.info('# ** Database NOT rebuilt ** ')
    ######################################
    ######################################

    ###### HOPT function
    if not ONLY_BUILD_DB:
        best = train_hopt(max_evals, result_file)
        logging.info(best)
    ######################################

    ######################################
    ###### Or directly call the train function for one set of HPARAMS
Ejemplo n.º 9
0
    nb_labels = len(np.unique(labels))
    precision_acc = 3
    np.random.seed(3)

    (n_samples, height, width, p) = images.shape
    n_steps = width * height

    # Raw image ###############################
    X = images.reshape(n_samples, height, width, 3)
    X = X[image_num].reshape(1, height, width, 3)
    image = color.lab2rgb(X.reshape(height, width, 3))
    final_plot(image, 'original', 1)

    # Feautre image ###########################
    print "Compute Features..."
    X = build_data(X)
    (_, _, _, size_input) = X.shape
    X = normalize(X, mean, std)
    X_test = X[0].reshape(width * height, size_input)

    # Ground truth labels #####################
    Y = labels.reshape(n_samples, height * width)
    Y_test = Y[image_num].reshape(width * height)
    final_plot(colorize(Y_test.reshape(height, width)), 'ground truth', 2)

    # Initial labeling (MLP) ##################
    Y_proba = mlp_model.predict_proba(X_test)
    Y_init = np.argmax(Y_proba, axis=1)
    initial_accuracy = round(
        np.sum(Y_init == Y_test) / float(width * height), precision_acc)
    final_plot(colorize(Y_init.reshape(height, width)),
Ejemplo n.º 10
0
def config_from_args(args):
    config = Config()
    for key, value in vars(args).items():
        config.__dict__[key] = value
    config.auto_config()
    logger = get_logger(config.log_path)
    return config, logger


if __name__ == "__main__":
    args = parse_parameters()
    config, logger = config_from_args(args)

    if args.do_preprocess:
        build_data(config, logger)

    # load vocabs
    vocab_words = load_vocab(config.words_filename)
    vocab_tags = load_vocab(config.tags_filename)
    vocab_chars = load_vocab(config.chars_filename)
    vocab_relations = load_vocab(config.relations_filename)

    # get processing functions
    processing_word = get_processing_word(vocab_words,
                                          vocab_chars,
                                          lowercase=config.lowercase,
                                          chars=config.chars)
    processing_tag = get_processing_word(vocab_tags, lowercase=False)
    processing_relation = get_processing_relation(vocab_relations)