def build_train_dev_data(file_src, file_tgt, file_align, mode): temp_train_file = tempfile.NamedTemporaryFile(delete=False) temp_dev_file = tempfile.NamedTemporaryFile(delete=False) train_file = open(temp_train_file.name, 'w') dev_file = open(temp_dev_file.name, 'w') dev_lines = 1000 n = 0 outfile = dev_file with open(file_src) as srcfile, open(file_tgt) as tgtfile, open(file_align) as alignfile: for x, y, a in zip(srcfile, tgtfile, alignfile): if(n == dev_lines): outfile = train_file x = x.strip() y = y.strip() a = a.strip() outfile.write("{0}\t{1}\t{2}\n".format(x, y, a)) n += 1 train_file.close() dev_file.close() temp_train_data_file = tempfile.NamedTemporaryFile(delete=False) temp_dev_data_file = tempfile.NamedTemporaryFile(delete=False) build_data(['', '-data', temp_train_file.name, '-mode', mode, '-output', temp_train_data_file.name]) build_data(['', '-data', temp_dev_file.name, '-mode', mode, '-output', temp_dev_data_file.name]) os.remove(temp_train_file.name) os.remove(temp_dev_file.name) return temp_train_data_file.name, temp_dev_data_file.name
def train(self, subset=[]): print("\n### Train MLP - %s" % self.database) Y = self.labels X = self.images if subset != []: Y = Y[subset] X = X[subset] # X = X#build_data(images) X = build_data(X) self.mean = np.mean(X, axis=(0, 1, 2)) self.std = np.std(X, axis=(0, 1, 2)) X = self.normalize(X) (n_samples, height, width, p) = X.shape X = X.reshape(n_samples * height * width, p) Y = Y.reshape(n_samples * height * width, 1) # Train Y_train_vector = (Y == np.arange(self.nb_labels)) * 1 mlp = MLPClassifier(hidden_layer_sizes=params.hidden_layer_sizes, max_iter=params.max_iter, alpha=params.alpha, solver=params.solver, activation=params.activation, verbose=params.verbose, tol=params.tol, random_state=params.random_state, learning_rate_init=params.learning_rate_init) mlp.fit(X, Y_train_vector) print("Done.") self.model = mlp
index_files_dict['valid'] = [ PREFIX_INDEX_FOLDER + "debug_valid.txt", # PREFIX_INDEX_FOLDER + "bouliane_valid.txt", # PREFIX_INDEX_FOLDER + "hand_picked_Spotify_valid.txt", # PREFIX_INDEX_FOLDER + "liszt_classical_archives_valid.txt" ] index_files_dict['test'] = [ PREFIX_INDEX_FOLDER + "debug_test.txt", # PREFIX_INDEX_FOLDER + "bouliane_test.txt", # PREFIX_INDEX_FOLDER + "hand_picked_Spotify_test.txt", # PREFIX_INDEX_FOLDER + "liszt_classical_archives_test.txt" ] build_data(index_files_dict=index_files_dict, meta_info_path=data_folder + '/temp.p', quantization=script_param['quantization'], temporal_granularity=script_param['temporal_granularity'], store_folder=data_folder, logging=logging) ############################################################ # Hyper parameter space ############################################################ model_space = Model_class.get_hp_space() optim_space = Optimization_method.get_hp_space() space = {'model': model_space, 'optim': optim_space, 'train': train_param, 'script': script_param} ############################################################ # MongoDB ############################################################ host = "localhost" port = 27017
raise ValueError( 'The configuration file and the timestamp should be specified.') es_file = sys.argv[3] + "/es_" + sys.argv[2] + ".txt" es_epoch = sys.maxsize if os.path.isfile(es_file) == True: with open(es_file, 'r') as myfile: es_epoch = int(myfile.read()) myfile.close() return es_epoch if __name__ == "__main__": es_epoch = checkInputs() config = build_data(sys.argv[1]) config.train_id_docs.extend(config.dev_id_docs) train_data = utils.HeadData(config.train_id_docs, np.arange(len(config.train_id_docs))) test_data = utils.HeadData(config.test_id_docs, np.arange(len(config.test_id_docs))) tf.reset_default_graph() tf.set_random_seed(1) utils.printParameters(config) # ---- Training ---- config1 = tf.ConfigProto() config1.gpu_options.per_process_gpu_memory_fraction = 0.85 with tf.Session(config=config1) as sess: # saver = tf.train.import_meta_graph('model.ckpt.meta') # saver.restore(sess, 'model.ckpt') embedding_matrix = tf.get_variable('embedding_matrix', shape=config.wordvectors.shape, dtype=tf.float32, trainable=False).assign(config.wordvectors)
if (len(sys.argv) <= 3) or os.path.isfile(sys.argv[0]) == False: raise ValueError( 'The configuration file and the timestamp should be specified.') import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" gpuConfig = tf.ConfigProto(allow_soft_placement=True) gpuConfig.gpu_options.allow_growth = True if __name__ == "__main__": # checkInputs() config = build_data("./configs/CoNLL04/bio_config") train_data = utils.HeadData(config.train_id_docs, np.arange(len( config.train_id_docs))) ## build data dev_data = utils.HeadData(config.dev_id_docs, np.arange(len(config.dev_id_docs))) test_data = utils.HeadData(config.test_id_docs, np.arange(len(config.test_id_docs))) tf.reset_default_graph() tf.set_random_seed(1) utils.printParameters(config) with tf.Session(config=gpuConfig) as sess:
if (len(sys.argv) <= 3) or os.path.isfile(sys.argv[0]) == False: raise ValueError( 'The configuration file and the timestamp should be specified.') import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" gpuConfig = tf.ConfigProto(allow_soft_placement=True) gpuConfig.gpu_options.allow_growth = True if __name__ == "__main__": # checkInputs() config = build_data("./configs/ADE/bio_config") train_data = utils.HeadData(config.train_id_docs, np.arange(len( config.train_id_docs))) ## build data # dev_data = utils.HeadData(config.dev_id_docs, np.arange(len(config.dev_id_docs))) # test_data = utils.HeadData(config.test_id_docs, np.arange(len(config.test_id_docs))) train_data, dev_data = train_data.split(0.2) dev_data, test_data = dev_data.split(0.5) tf.reset_default_graph() tf.set_random_seed(1) utils.printParameters(config) with tf.Session(config=gpuConfig) as sess:
from build_data import build_data import numpy as np from sklearn.neural_network import MLPClassifier from skimage import color from matplotlib import pyplot as plt folder = '../Corel_Dataset/' images = np.load(folder + 'images_lab.npy') labels = np.load(folder + 'labels.npy') (n_samples, height, width, p) = images.shape Y = labels.reshape(n_samples, width * height) X = build_data(images, labels) (n_samples, height, width, size_input) = np.shape(X) # Shuffle the images np.random.seed(3) order = np.arange(n_samples) np.random.shuffle(order) # X = X[order] # Y = Y[order] # Build training data nb_labels = len(np.unique(Y)) train_size = 60
PREFIX_INDEX_FOLDER + "hand_picked_Spotify_valid.txt", PREFIX_INDEX_FOLDER + "liszt_classical_archives_valid.txt" ] index_files_dict['test'] = [ # PREFIX_INDEX_FOLDER + "debug_test.txt", PREFIX_INDEX_FOLDER + "bouliane_test.txt", PREFIX_INDEX_FOLDER + "hand_picked_Spotify_test.txt", PREFIX_INDEX_FOLDER + "liszt_classical_archives_test.txt" ] if not os.path.isdir(data_folder): os.mkdir(data_folder) build_data(index_files_dict=index_files_dict, meta_info_path=LOCAL_SCRATCH + '/Data/temp.p', quantization=quantization, temporal_granularity=temporal_granularity, store_folder=LOCAL_SCRATCH + '/Data', logging=logging) else: logging.info('# ** Database NOT rebuilt ** ') ###################################### ###################################### ###### HOPT function if not ONLY_BUILD_DB: best = train_hopt(max_evals, result_file) logging.info(best) ###################################### ###################################### ###### Or directly call the train function for one set of HPARAMS
nb_labels = len(np.unique(labels)) precision_acc = 3 np.random.seed(3) (n_samples, height, width, p) = images.shape n_steps = width * height # Raw image ############################### X = images.reshape(n_samples, height, width, 3) X = X[image_num].reshape(1, height, width, 3) image = color.lab2rgb(X.reshape(height, width, 3)) final_plot(image, 'original', 1) # Feautre image ########################### print "Compute Features..." X = build_data(X) (_, _, _, size_input) = X.shape X = normalize(X, mean, std) X_test = X[0].reshape(width * height, size_input) # Ground truth labels ##################### Y = labels.reshape(n_samples, height * width) Y_test = Y[image_num].reshape(width * height) final_plot(colorize(Y_test.reshape(height, width)), 'ground truth', 2) # Initial labeling (MLP) ################## Y_proba = mlp_model.predict_proba(X_test) Y_init = np.argmax(Y_proba, axis=1) initial_accuracy = round( np.sum(Y_init == Y_test) / float(width * height), precision_acc) final_plot(colorize(Y_init.reshape(height, width)),
def config_from_args(args): config = Config() for key, value in vars(args).items(): config.__dict__[key] = value config.auto_config() logger = get_logger(config.log_path) return config, logger if __name__ == "__main__": args = parse_parameters() config, logger = config_from_args(args) if args.do_preprocess: build_data(config, logger) # load vocabs vocab_words = load_vocab(config.words_filename) vocab_tags = load_vocab(config.tags_filename) vocab_chars = load_vocab(config.chars_filename) vocab_relations = load_vocab(config.relations_filename) # get processing functions processing_word = get_processing_word(vocab_words, vocab_chars, lowercase=config.lowercase, chars=config.chars) processing_tag = get_processing_word(vocab_tags, lowercase=False) processing_relation = get_processing_relation(vocab_relations)