def compress(saved_model_path, tflite_model_path, img_size, quantize=None, device=None): converter = lite.TFLiteConverter.from_saved_model(saved_model_path) if quantize: sample_dataset = DataGenerator(get_train_data(), 10, img_size).sample() sample_images = sample_dataset[0] def representative_dataset_gen(): for index in range(sample_images.shape[0] - 1): yield [sample_images[index:index + 1]] converter.representative_dataset = tf.lite.RepresentativeDataset( representative_dataset_gen) converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.target_spec.supported_types = [tf.float16] tflite_model = converter.convert() x = open(tflite_model_path, "wb").write(tflite_model) print(x)
# Editor : VIM # File name : test.py # Author : YunYang1994 # Created date: 2019-10-23 23:14:38 # Description : # #================================================================ import numpy as np import tensorflow as tf from fcn8s import FCN8s from utils import visual_result, DataGenerator model = FCN8s(n_class=21) TestSet = DataGenerator("./data/test_image.txt", "./data/test_labels", 1) ## load weights and test your model after training ## if you want to test model, first you need to initialize your model ## with "model(data)", and then load model weights data = np.ones(shape=[1, 224, 224, 3], dtype=np.float) model(data) model.load_weights("FCN8s.h5") for idx, (x, y) in enumerate(TestSet): result = model(x) pred_label = tf.argmax(result, axis=-1) result = visual_result(x[0], pred_label[0].numpy()) save_file = "./data/prediction/%d.jpg" % idx print("=> saving prediction result into ", save_file) result.save(save_file)
args.train_size, args.aux_inputs, seed=args.seed, drop_lowq=args.drop_lowq) X_train, X_test, y_train, y_test = data if args.scale_targets: # scale targets to std == 1 scales = y_train.std(axis=0) y_train /= scales y_test /= scales # create data generators for on the fly augmentations dg_train = DataGenerator(X_train, y_train, batch_size=args.batch_size, seed=args.seed, augment=args.augment_train, im_size=args.im_size, n_channels=len(args.colors), y_shape=(len(args.targets), )) dg_test = DataGenerator(X_test, y_test, batch_size=args.batch_size, im_size=args.im_size, y_shape=(len(args.targets), ), shuffle=False, n_channels=len(args.colors)) ############################################################################### # train and save model n_steps = args.steps_per_epoch if args.steps_per_epoch else dg_train.n_steps sdecay = partial(step_decay,
def model(batch_size=128, nb_epoch=100): # set parameters: nb_classes = len(functions) start_time = time.time() logging.info("Loading Data") train, val, test, train_df, valid_df, test_df = load_data() train_df = pd.concat([train_df, valid_df]) test_gos = test_df['gos'].values train_data, train_labels = train val_data, val_labels = val test_data, test_labels = test logging.info("Data loaded in %d sec" % (time.time() - start_time)) logging.info("Training data size: %d" % len(train_data[0])) logging.info("Validation data size: %d" % len(val_data[0])) logging.info("Test data size: %d" % len(test_data[0])) # pre_model_path = DATA_ROOT + 'pre_model_weights_' + FUNCTION + '.pkl' model_path = DATA_ROOT + 'model_' + FUNCTION + '.h5' checkpointer = ModelCheckpoint(filepath=model_path, verbose=1, save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1) logging.info('Starting training the model') train_generator = DataGenerator(batch_size, nb_classes) train_generator.fit(train_data, train_labels) valid_generator = DataGenerator(batch_size, nb_classes) valid_generator.fit(val_data, val_labels) test_generator = DataGenerator(batch_size, nb_classes) test_generator.fit(test_data, test_labels) # model = get_model() # model.fit_generator( # train_generator, # samples_per_epoch=len(train_data[0]), # nb_epoch=nb_epoch, # validation_data=valid_generator, # nb_val_samples=len(val_data[0]), # max_q_size=batch_size, # callbacks=[checkpointer, earlystopper]) logging.info('Loading best model') model = load_model(model_path) logging.info('Predicting') preds = model.predict_generator(test_generator, val_samples=len(test_data[0])) # incon = 0 # for i in range(len(test_data)): # for j in range(len(functions)): # childs = set(go[functions[j]]['children']).intersection(func_set) # ok = True # for n_id in childs: # if preds[i, j] < preds[i, go_indexes[n_id]]: # preds[i, j] = preds[i, go_indexes[n_id]] # ok = False # if not ok: # incon += 1 logging.info('Computing performance') f, p, r, t, preds_max = compute_performance(preds, test_labels, test_gos) roc_auc = compute_roc(preds, test_labels) logging.info('Fmax measure: \t %f %f %f %f' % (f, p, r, t)) logging.info('ROC AUC: \t %f ' % (roc_auc, ))
sent_mention = Concatenate()([sentence_label_emb, mention_emb, sent_mention]) sent_mention = Dense(dim, activation='relu')(sent_mention) pt = Dense(1, activation='sigmoid')(sent_mention) train_model = Model([sentence_in, mention_in, left_in, right_in, y_in, t_in], [left_p, right_p, pt]) left_in = K.expand_dims(left_in, 2) right_in = K.expand_dims(right_in, 2) left_loss = K.binary_crossentropy(left_in, left_p) left_loss = K.sum(left_loss * sentence_mask) / K.sum(sentence_mask) right_loss = K.binary_crossentropy(right_in, right_p) right_loss = K.sum(right_loss * sentence_mask) / K.sum(sentence_mask) pt_loss = K.mean(K.binary_crossentropy(t_in, pt)) loss = left_loss + right_loss + pt_loss train_model.add_loss(loss) train_model.compile(optimizer=Adam(1e-3)) train_model.summary() train_D = DataGenerator(train_data, char2id, kb2id, id2kb) train_model.fit_generator(train_D.__iter__(), steps_per_epoch=len(train_D), epochs=40)
def main(args): main_start = time.time() tf.set_random_seed(2019) random.seed(2019) np.random.seed(2019) if len(args) != 1: raise Exception('Problem with flags: %s' % args) # Correcting a few flags for test/eval mode. if FLAGS.mode != 'train': FLAGS.batch_size = FLAGS.beam_size FLAGS.bs_dec_steps = FLAGS.dec_steps if FLAGS.model.lower() != "tx": FLAGS.dec_steps = 1 assert FLAGS.mode == 'train' or FLAGS.batch_size == FLAGS.beam_size, \ "In test mode, batch size should be equal to beam size." assert FLAGS.mode == 'train' or FLAGS.dec_steps == 1 or FLAGS.model.lower() == "tx", \ "In test mode, no. of decoder steps should be one." os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' os.environ['CUDA_VISIBLE_DEVICES'] = ",".join( str(gpu_id) for gpu_id in FLAGS.GPUs) if not os.path.exists(FLAGS.PathToCheckpoint): os.makedirs(FLAGS.PathToCheckpoint) if FLAGS.mode == "test" and not os.path.exists(FLAGS.PathToResults): os.makedirs(FLAGS.PathToResults) os.makedirs(FLAGS.PathToResults + 'predictions') os.makedirs(FLAGS.PathToResults + 'groundtruths') if FLAGS.mode == 'eval': eval_model(FLAGS.PathToResults) else: start = time.time() vocab = Vocab(max_vocab_size=FLAGS.vocab_size, emb_dim=FLAGS.dim, dataset_path=FLAGS.PathToDataset, glove_path=FLAGS.PathToGlove, vocab_path=FLAGS.PathToVocab, lookup_path=FLAGS.PathToLookups) if FLAGS.model.lower() == "plain": print("Setting up the plain model.\n") data = DataGenerator(path_to_dataset=FLAGS.PathToDataset, max_inp_seq_len=FLAGS.enc_steps, max_out_seq_len=FLAGS.dec_steps, vocab=vocab, use_pgen=FLAGS.use_pgen, use_sample=FLAGS.sample) summarizer = SummarizationModel(vocab, data) elif FLAGS.model.lower() == "hier": print("Setting up the hier model.\n") data = DataGeneratorHier( path_to_dataset=FLAGS.PathToDataset, max_inp_sent=FLAGS.max_enc_sent, max_inp_tok_per_sent=FLAGS.max_enc_steps_per_sent, max_out_tok=FLAGS.dec_steps, vocab=vocab, use_pgen=FLAGS.use_pgen, use_sample=FLAGS.sample) summarizer = SummarizationModelHier(vocab, data) elif FLAGS.model.lower() == "rlhier": print("Setting up the Hier RL model.\n") data = DataGeneratorHier( path_to_dataset=FLAGS.PathToDataset, max_inp_sent=FLAGS.max_enc_sent, max_inp_tok_per_sent=FLAGS.max_enc_steps_per_sent, max_out_tok=FLAGS.dec_steps, vocab=vocab, use_pgen=FLAGS.use_pgen, use_sample=FLAGS.sample) summarizer = SummarizationModelHierSC(vocab, data) else: raise ValueError( "model flag should be either of plain/hier/bayesian/shared!! \n" ) end = time.time() print( "Setting up vocab, data and model took {:.2f} sec.".format(end - start)) summarizer.build_graph() if FLAGS.mode == 'train': summarizer.train() elif FLAGS.mode == "test": summarizer.test() else: raise ValueError("mode should be either train/test!! \n") main_end = time.time() print("Total time elapsed: %.2f \n" % (main_end - main_start))
def model(params, batch_size=128, nb_epoch=6, is_train=True): # set parameters: nb_classes = len(functions) start_time = time.time() logging.info("Loading Data") train, val, test, train_df, valid_df, test_df = load_data() train_df = pd.concat([train_df, valid_df]) test_gos = test_df['gos'].values train_data, train_labels = train val_data, val_labels = val test_data, test_labels = test logging.info("Data loaded in %d sec" % (time.time() - start_time)) logging.info("Training data size: %d" % len(train_data[0])) logging.info("Validation data size: %d" % len(val_data[0])) logging.info("Test data size: %d" % len(test_data[0])) model_path = (DATA_ROOT + 'models/model_' + FUNCTION + '.h5') # '-' + str(params['embedding_dims']) + # '-' + str(params['nb_filter']) + # '-' + str(params['nb_conv']) + # '-' + str(params['nb_dense']) + '.h5') checkpointer = ModelCheckpoint( filepath=model_path, verbose=1, save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1) logging.info('Starting training the model') train_generator = DataGenerator(batch_size, nb_classes) train_generator.fit(train_data, train_labels) valid_generator = DataGenerator(batch_size, nb_classes) valid_generator.fit(val_data, val_labels) test_generator = DataGenerator(batch_size, nb_classes) test_generator.fit(test_data, test_labels) if is_train: model = get_model(params) model.fit_generator( train_generator, samples_per_epoch=len(train_data[0]), nb_epoch=nb_epoch, validation_data=valid_generator, nb_val_samples=len(val_data[0]), max_q_size=batch_size, callbacks=[checkpointer, earlystopper]) logging.info('Loading best model') start_time = time.time() model = load_model(model_path) logging.info('Loading time: %d' % (time.time() - start_time)) # orgs = ['9606', '10090', '10116', '7227', '7955', # '559292', '3702', '284812', '6239', # '83333', '83332', '224308', '208964'] # for org in orgs: # logging.info('Predicting for %s' % (org,)) # train, val, test, train_df, valid_df, test_df = load_data(org=org) # test_data, test_labels = test # test_gos = test_df['gos'].values # test_generator = DataGenerator(batch_size, nb_classes) # test_generator.fit(test_data, test_labels) start_time = time.time() preds = model.predict_generator( test_generator, val_samples=len(test_data[0])) running_time = time.time() - start_time logging.info('Running time: %d %d' % (running_time, len(test_data[0]))) logging.info('Computing performance') f, p, r, t, preds_max = compute_performance(preds, test_labels, test_gos) roc_auc = compute_roc(preds, test_labels) mcc = compute_mcc(preds_max, test_labels) logging.info('Fmax measure: \t %f %f %f %f' % (f, p, r, t)) logging.info('ROC AUC: \t %f ' % (roc_auc, )) logging.info('MCC: \t %f ' % (mcc, )) print(('%.3f & %.3f & %.3f & %.3f & %.3f' % ( f, p, r, roc_auc, mcc))) # return f # logging.info('Inconsistent predictions: %d' % incon) # logging.info('Saving the predictions') proteins = test_df['proteins'] predictions = list() for i in range(preds_max.shape[0]): predictions.append(preds_max[i]) df = pd.DataFrame( { 'proteins': proteins, 'predictions': predictions, 'gos': test_df['gos'], 'labels': test_df['labels']}) df.to_pickle(DATA_ROOT + 'test-' + FUNCTION + '-preds.pkl')
def handler(context): print('Start train handler.') if not isinstance(context, dict): message = 'Error: Support only "abeja/all-cpu:19.04" or "abeja/all-gpu:19.04".' print(message) raise Exception(message) try: dataset_alias = context['datasets'] id2index, _ = set_categories(dataset_alias.values()) num_classes = len(id2index) dataset_item_ids = get_dataset_item_ids(dataset_alias.values()) random.shuffle(dataset_item_ids) test_size = int(len(dataset_item_ids) * EARLY_STOPPING_TEST_SIZE) if test_size: train_ids, test_ids = dataset_item_ids[ test_size:], dataset_item_ids[:test_size] else: raise Exception( "Dataset size is too small. Please add more dataset.") input_shape = (IMG_ROWS, IMG_COLS, NB_CHANNELS) print('num classes:', num_classes) print('input shape:', input_shape) print(len(train_ids), 'train samples') print(len(test_ids), 'test samples') print('parameters:', utils.parameters) model = create_model(num_classes, input_shape) tensorboard = TensorBoard(log_dir=log_path, histogram_freq=0, write_graph=True, write_images=False) statistics = Statistics() early = EarlyStopping(monitor='val_acc', min_delta=0, patience=EARLY_STOPPING_PATIENCE, verbose=1, mode='auto') # Do you want to add `checkpoint` to callback as well? model.compile(loss=keras.losses.categorical_crossentropy, optimizer=Adam(lr=LEARNING_RATE, beta_1=ADAM_BETA_1, beta_2=ADAM_BETA_2, epsilon=ADAM_EPSILON, decay=ADAM_DECAY), metrics=['accuracy']) # fit_generator train_gen = DataGenerator(train_ids, id2index, is_train=True) test_gen = DataGenerator(test_ids, id2index, is_train=False) # fit_generator model.fit_generator(train_gen, epochs=EPOCHS, verbose=1, validation_data=test_gen, callbacks=[tensorboard, statistics, early]) score = model.evaluate_generator(test_gen) model.save(os.path.join(ABEJA_TRAINING_RESULT_DIR, 'model.h5')) print('Test loss:', score[0]) print('Test accuracy:', score[1]) except Exception as e: print(str(e)) print(traceback.format_exc()) raise e
def main(feature_type: str, language: str, domain: str, main_dir: str, seq_len: int, batch_size: int, lstm_dim: int, character_level: bool = False): """ Parameters ---------- feature_type: the name of the feature language: language of the text. main_dir: base directory seq_len: sequence length batch_size: batch size lstm_dim: lstm hidden dimension character_level: whether tokenizer should be on character level. """ texts = get_texts(main_dir, language, feature_type, character_level, domain) tokenizer = Tokenizer(texts.values(), character_level=character_level) samples = {} for book in texts: print(len(texts[book])) len_text = len(texts[book]) if character_level else len(texts[book].split()) if len_text < seq_len: logger.warn(f"Requested seq_len larger than text length: {len_text} / {seq_len} " f"for {book} and feature type {feature_type}.") continue rand_idx = np.random.randint(0, len_text - seq_len, batch_size) if character_level: samples[book] = tokenizer.encode([texts[book][i: i + seq_len] for i in rand_idx]) else: split_text = texts[book].split() samples[book] = tokenizer.encode( [" ".join(split_text[i: i + seq_len]) for i in rand_idx] ) test_generator = DataGenerator(tokenizer, tokenizer.full_text, seq_len=seq_len, batch_size=batch_size, with_embedding=True, train=False) sample_batch = next(iter(test_generator)) logger.info(f"X batch shape: {sample_batch[0].shape}, y batch shape: {sample_batch[1].shape}") logger.info(f"Sample batch text: {tokenizer.decode(sample_batch[0][0])}") file_path = os.path.join(main_dir, 'models', f'{feature_type}_{language}_lstm_{lstm_dim}') if domain: file_path += '_' + domain if character_level: file_path += '_character_level' file_path += '.h5' logger.info(f"Loading {file_path}") prediction_model = lstm_model(num_words=tokenizer.num_words, lstm_dim=lstm_dim, seq_len=1, batch_size=batch_size, stateful=True, return_state=True) prediction_model.load_weights(file_path) hiddens = {} seeds = {} predictions = {} for book in samples: seed = np.stack(samples[book]) print(seed.shape) hf, preds = generate_text(prediction_model, tokenizer, seed, get_hidden=True) print(hf.shape) hiddens[book] = hf seeds[book] = seed preds = [tokenizer.ix_to_word[pred] for pred in preds] predictions[book] = preds file_name = f'{feature_type}_{language}_lstm_{lstm_dim}_seq_len_{seq_len}' if domain: file_name += '_' + domain if character_level: file_name += '_character-level' file_name += '.pkl' path_out = os.path.join('data', 'hidden_states', file_name) with open(path_out, 'wb') as f: pickle.dump(hiddens, f) logger.info(f"Succesfully saved hidden dimensions to {path_out}") path_out = os.path.join('data', 'seeds', file_name) with open(path_out, 'wb') as f: pickle.dump(seeds, f) logger.info(f"Succesfully saved seeds to {path_out}") path_out = os.path.join('data', 'predictions', file_name) with open(path_out, 'wb') as f: pickle.dump(predictions, f) logger.info(f"Succesfully saved predictions to {path_out}")
parser.add_argument('-hps_path', default='./hps/giga.json') parser.add_argument('-dataset_path', default='/home/jjery2243542/datasets/summary/structured/26693_50_30/giga_40_10.h5') parser.add_argument('--pretrain_wordvec', action='store_true') parser.add_argument('-npy_path', default='/home/jjery2243542/datasets/summary/structured/26693_50_30/glove.npy') parser.add_argument('-log_file_path', default='./log.txt') parser.add_argument('-write_model_path', default='./model/model.ckpt') parser.add_argument('--load_model') parser.add_argument('-read_model_path', default='./model/model.ckpt') parser.add_argument('-vocab_path', default='/home/jjery2243542/datasets/summary/structured/26693_50_30/vocab.pkl') args = parser.parse_args() # get hps hps = Hps() hps.load(args.hps_path) hps_tuple = hps.get_tuple() print(hps_tuple) vocab = Vocab(args.vocab_path, args.dataset_path + '.unk.json') data_generator = DataGenerator(args.dataset_path) model = PointerModel(hps_tuple, vocab) if args.pretrain_wordvec: model.init(npy_path=args.npy_path) else: model.init() if args.load_model: model.load_model(args.read_load_model) train( model=model, data_generator=data_generator, log_file_path=args.log_file_path, model_path=args.write_model_path, )
def model(): # set parameters: batch_size = 128 nb_epoch = 100 nb_classes = len(functions) start_time = time.time() logging.info("Loading Data") train, val, test, test_df = load_data() test_gos = test_df['gos'].values train_data, train_labels = train val_data, val_labels = val test_data, test_labels = test logging.info("Data loaded in %d sec" % (time.time() - start_time)) logging.info("Training data size: %d" % len(train_data)) logging.info("Validation data size: %d" % len(val_data)) logging.info("Test data size: %d" % len(test_data)) logging.info("Building the model") inputs = Input(shape=(MAXLEN, ), dtype='int32', name='input1') feature_model = get_feature_model()(inputs) layers = get_layers(feature_model) output_models = [] for i in range(len(functions)): output_models.append(layers[functions[i]]['output']) net = merge(output_models, mode='concat', concat_axis=1) # net = Dense(nb_classes * 2, activation='relu')(feature_model) # net = Dense(nb_classes, activation='sigmoid')(net) # net = Activation('sigmoid')(net) model = Model(input=inputs, output=net) logging.info('Model built in %d sec' % (time.time() - start_time)) logging.info('Saving the model') model_json = model.to_json() with open(DATA_ROOT + 'model_seq_' + FUNCTION + '.json', 'w') as f: f.write(model_json) logging.info('Compiling the model') optimizer = RMSprop() model.compile(optimizer=optimizer, loss='binary_crossentropy') pre_model_path = DATA_ROOT + 'pre_model_seq_weights_' + FUNCTION + '.pkl' model_path = DATA_ROOT + 'model_seq_weights_' + FUNCTION + '.pkl' checkpointer = MyCheckpoint(filepath=model_path, verbose=1, save_best_only=True, save_weights_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1) logging.info('Compilation finished in %d sec' % (time.time() - start_time)) # logging.info('Loading pretrained weights') # load_model_weights(model, pre_model_path) logging.info('Starting training the model') train_generator = DataGenerator(batch_size, nb_classes) train_generator.fit(train_data, train_labels) valid_generator = DataGenerator(batch_size, nb_classes) valid_generator.fit(val_data, val_labels) test_generator = DataGenerator(batch_size, nb_classes) test_generator.fit(test_data, test_labels) # model.fit_generator( # train_generator, # samples_per_epoch=len(train_data), # nb_epoch=nb_epoch, # validation_data=valid_generator, # nb_val_samples=len(val_data), # max_q_size=batch_size, # callbacks=[checkpointer, earlystopper]) logging.info('Loading weights') load_model_weights(model, model_path) # model.save(DATA_ROOT + 'model_%s.h5' % FUNCTION) preds = model.predict_generator(test_generator, val_samples=len(test_data)) logging.info(preds.shape) incon = 0 # for i in xrange(len(test_data)): # for j in xrange(len(functions)): # childs = set(go[functions[j]]['children']).intersection(func_set) # ok = True # for n_id in childs: # if preds[i, j] < preds[i, go_indexes[n_id]]: # preds[i, j] = preds[i, go_indexes[n_id]] # ok = False # if not ok: # incon += 1 f, p, r, preds_max = compute_performance(preds, test_labels, test_gos) roc_auc = compute_roc(preds, test_labels) logging.info('Fmax measure: \t %f %f %f' % (f, p, r)) logging.info('ROC AUC: \t %f ' % (roc_auc, )) logging.info('Inconsistent predictions: %d' % incon) logging.info('Saving the predictions') proteins = test_df['proteins'] predictions = list() for i in xrange(preds_max.shape[0]): predictions.append(preds_max[i]) df = pd.DataFrame({ 'proteins': proteins, 'predictions': predictions, 'gos': test_df['gos'], 'labels': test_df['labels'] }) df.to_pickle(DATA_ROOT + 'test-' + FUNCTION + '-preds-seq.pkl') logging.info('Done in %d sec' % (time.time() - start_time)) function_centric_performance(functions, preds.T, test_labels.T)
import os import time import numpy as np from tqdm.auto import tqdm import cv2 from PIL import Image from matplotlib import pyplot as plt from utils import DataGenerator, face_plot root_path = os.getcwd() face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt2.xml') driver_path = 'chromedriver.exe' data_generator = DataGenerator(root_path, face_cascade, driver_path) data_generator.get_idol_faces('鬼娃恰吉') data_generator.get_idol_faces('王世堅') print('OK') from dataset import ImageFolder from model import CNN_MODEL import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader, random_split import torchvision.transforms as transforms TRAIN_SIZE = 0.8
# ### Create the data generator to load batches of data # In[20]: import utils; reload(utils) from utils import DataGenerator NUM_TRAIN_PAIRS = 150000 NUM_VAL_PAIRS = 10000 BATCH_SIZE = 128 datagen = DataGenerator(X_train, y_train, num_train_pairs = NUM_TRAIN_PAIRS, num_val_pairs = NUM_VAL_PAIRS, X_val = X_val[val_train], train_alphabet_to_index = train_alphabet_to_index, val_alphabet_to_index = val_train_index, y_val = y_val[val_train], batch_sz = BATCH_SIZE, verbose = True) datagen.create_data_transformer(rotation_range=10, width_shift_range=0.01, height_shift_range=0.01, shear_range=0.01) STEPS_PER_EPOCH = NUM_TRAIN_PAIRS // BATCH_SIZE VALIDATION_STEPS = NUM_VAL_PAIRS // BATCH_SIZE from keras.optimizers import Adam learning_rate = 5e-5 adam = Adam(learning_rate) scheduler = LearningRateScheduler(lambda epoch : learning_rate * pow(0.985, epoch)) siamese_net.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy']) siamese_net.load_weights(INIT_WEIGHTS)
for result in all_result: for word_idx in result: f_out.write('{} '.format(word_idx)) f_out.write('\n') if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-hps_path', default='./hps/cd_v3.json') parser.add_argument('-vocab_path', default='/home/jjery2243542/datasets/summary/structured/26693_50_30/vocab.pkl') parser.add_argument('-model_path', default='./model/model.ckpt-2999') parser.add_argument('-dataset_path', default='/home/jjery2243542/datasets/summary/structured/26693_50_30/giga_40_10.h5') parser.add_argument('-dataset_type', default='valid') parser.add_argument('-output_path', default='result.txt') args = parser.parse_args() hps = Hps() hps.load(args.hps_path) hps_tuple = hps.get_tuple() print(hps_tuple) vocab = Vocab(args.vocab_path, args.dataset_path + '.unk.json') data_generator = DataGenerator(args.dataset_path) model = PointerModel(hps_tuple, vocab) model.load_model(args.model_path) dg = DataGenerator(args.dataset_path) iterator = dg.iterator( batch_size=hps_tuple.batch_size, dataset_type=args.dataset_type, infinite=False, shuffle=False ) predict(model, iterator, args.output_path)
def main(feature_type: str, language: str, domain: str, main_dir: str, seq_len: int, batch_size: int, test_batch_size: int, lstm_dim: int, character_level: bool = False): """ Parameters ---------- feature_type: the name of the feature main_dir: base directory language: language of corpus seq_len: sequence length batch_size: batch size test_batch_size: test batch size lstm_dim: lstm hidden dimension character_level: whether tokenizer should be on character level. """ texts = get_texts(main_dir, language, feature_type, character_level, domain) tokenizer = Tokenizer(texts.values(), character_level=character_level) train_generator = DataGenerator(tokenizer, tokenizer.full_text, seq_len=seq_len, batch_size=batch_size, with_embedding=True, train=True) test_generator = DataGenerator(tokenizer, tokenizer.full_text, seq_len=seq_len, batch_size=test_batch_size, with_embedding=True, train=False) sample_batch = next(iter(train_generator)) logger.info( f"X batch shape: {sample_batch[0].shape}, y batch shape: {sample_batch[1].shape}" ) logger.info(f"Sample batch text: {tokenizer.decode(sample_batch[0][0])}") training_model = lstm_model(num_words=tokenizer.num_words, seq_len=seq_len, lstm_dim=lstm_dim, stateful=False) file_path = os.path.join(main_dir, 'models', f'{feature_type}_{language}_lstm_{lstm_dim}') if domain: file_path += '_' + domain if character_level: file_path += '_character_level' file_path += '.h5' training_model.save_weights(file_path) checkpoint = tf.keras.callbacks.ModelCheckpoint(file_path, monitor='val_loss', save_best_only=True) early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2) generate_text = GenerateText(test_generator, tokenizer, file_path, lstm_dim) callbacks_list = [checkpoint, early_stopping, generate_text] training_model.fit_generator(train_generator, validation_data=test_generator, callbacks=callbacks_list, epochs=256)
import os from builtins import enumerate import pandas as pd import numpy as np from utils import DataGenerator import pickle cwd = "/home/go96bix/projects/epitop_pred" directory = os.path.join(cwd, "data_generator") classes = 0 num_samples = [] all_samples = [] classic = False embedding = False elmo_embedder = DataGenerator.Elmo_embedder() slicesize = 49 use_old_test_set = True if use_old_test_set: test_df_old = pd.DataFrame.from_csv( "/home/le86qiz/Documents/Konrad/general_epitope_analyses/bepipred_evaluation/deepipred_results/test_samples.csv", sep=",", header=None, index_col=None) test_df_old_y = test_df_old[2].values test_df_old = test_df_old[1].values else: test_df_old = [] for root, dirs, files in os.walk(directory): for file in files: if file.endswith(".csv"):
with tables.open_file(os.path.join('data', dataset_name + '.h5'), 'r') as dataset: # split into train and test sets total_imgs = dataset.root.imgs.shape[0] sample_weights = sample_weights[0:total_imgs] if use_sample_weights else [] all_inds = list(range(0, total_imgs)) np.random.shuffle(all_inds) train_inds = all_inds[0:int(total_imgs * (1 - test_set_portion))] test_inds = all_inds[int(total_imgs * (1 - test_set_portion)):] # create model and data generators train_generator = DataGenerator( train_inds, dataset, batch_size=batch_size, shuffle=True, sample_weights=sample_weights, num_loss_fcns=2 if network_structure == 'stacked_hourglass' else 1) test_generator = DataGenerator( test_inds, dataset, batch_size=batch_size, shuffle=False, sample_weights=sample_weights, num_loss_fcns=2 if network_structure == 'stacked_hourglass' else 1) model = models_dict(network_structure)( (train_generator.img_dims[0], train_generator.img_dims[1], 1), train_generator.channels, first_layer_filters,
def fTrainInner(cnn, modelName, X_train=None, y_train=None, Y_segMasks_train=None, X_valid=None, y_valid=None, Y_segMasks_valid=None, X_test=None, y_test=None, Y_segMasks_test=None, sOutPath=None, patchSize=0, batchSize=None, learningRate=None, iEpochs=None, usingClassification=False, dlnetwork=None, data=None): print('Training CNN') print('with lr = ' + str(learningRate) + ' , batchSize = ' + str(batchSize)) # sio.savemat('D:med_data/' + 'checkdata_voxel_and_mask.mat', # {'mask_train': Y_segMasks_train, # 'voxel_train': X_train, # 'mask_test': Y_segMasks_test, # 'voxel_test': X_test}) # save names _, sPath = os.path.splitdrive(sOutPath) sPath, sFilename = os.path.split(sPath) sFilename, sExt = os.path.splitext(sFilename) model_name = sOutPath + os.sep + sFilename weight_name = model_name + '_weights.h5' model_json = model_name + '.json' model_all = model_name + '_model.h5' model_mat = model_name + '.mat' if (os.path.isfile(model_mat)): # no training if output file exists print('------- already trained -> go to next') return # create optimizer if dlnetwork != None: if dlnetwork.optimizer == 'SGD': opti = keras.optimizers.SGD(lr=learningRate, momentum=dlnetwork.momentum, decay=dlnetwork.weightdecay, nesterov=dlnetwork.nesterov) elif dlnetwork.optimizer == 'RMSPROP': opti = keras.optimizers.RMSprop(lr=learningRate, decay=dlnetwork.weightdecay) elif dlnetwork.optimizer == 'ADAGRAD': opti = keras.optimizers.Adagrad(lr=learningRate, epsilon=None, decay=dlnetwork.weightdecay) elif dlnetwork.optimizer == 'ADADELTA': opti = keras.optimizers.Adadelta(lr=learningRate, rho=0.95, epsilon=None, decay=dlnetwork.weightdecay) elif dlnetwork.optimizer == 'ADAM': opti = keras.optimizers.Adam(lr=learningRate, beta_1=0.9, beta_2=0.999, epsilon=None, decay=dlnetwork.weightdecay) else: raise ValueError("Unknown Optimizer!") else: # opti = SGD(lr=learningRate, momentum=1e-8, decay=0.1, nesterov=True);#Adag(lr=0.01, epsilon=1e-06) opti = keras.optimizers.Adam(lr=learningRate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) cnn.summary() # compile model if usingClassification: cnn.compile(loss={ 'segmentation_output': dice_coef_loss, 'classification_output': 'categorical_crossentropy' }, optimizer=opti, metrics={ 'segmentation_output': dice_coef, 'classification_output': 'accuracy' }) else: cnn.compile(loss=dice_coef_loss, optimizer=opti, metrics=[dice_coef]) # callbacks #callback_earlyStopping = EarlyStopping(monitor='val_loss', patience=12, verbose=1) # callback_tensorBoard = keras.callbacks.TensorBoard(log_dir=dlart_handle.getLearningOutputPath() + '/logs', # histogram_freq=2, # batch_size=batchSize, # write_graph=True, # write_grads=True, # write_images=True, # embeddings_freq=0, # embeddings_layer_names=None, # embeddings_metadata=None) #callbacks = [callback_earlyStopping] callbacks = [] #callbacks.append( # ModelCheckpoint(sOutPath + os.sep + 'checkpoints' + os.sep + 'checker.hdf5', monitor='val_acc', verbose=0, # period=1, save_best_only=True)) # overrides the last checkpoint, its just for security # callbacks.append(ReduceLROnPlateau(monitor='loss', factor=0.1, patience=5, min_lr=1e-4, verbose=1)) callbacks.append(LearningRateScheduler(schedule=step_decay, verbose=1)) #callbacks.append(LivePlotCallback(dlart_handle)) print('Start training') # TODO: add here data augmentation via ImageDataGenerator from utils/image_preprocessing if dlnetwork.trainMode == 'GENERATOR': # prepare data generators if os.path.exists(X_train): # splitting was already done train_gen = DataGenerator(X_train, batch_size=batchSize, dim=patchSize, usingClassification=usingClassification) val_gen = DataGenerator(X_valid, batch_size=batchSize, dim=patchSize, usingClassification=usingClassification) test_gen = DataGenerator(X_test, batch_size=batchSize, dim=patchSize, usingClassification=usingClassification) else: # splitting needs to be done datapath = os.path.dirname(X_train) datafiles = [ f for f in os.listdir(datapath) if (os.path.isfile(os.path.join(datapath, f)) and f.endswith('.hdf5')) ] train_files, val_files, test_files = fSplitSegmentationDataset_generator( datafiles, data.allPats, data.allTestPats, data.splittingMode, testTrainingDatasetRatio=data.trainTestDatasetRatio, validationTrainRatio=data.trainValidationRatio, nfolds=data.nfolds, isRandomShuffle=data.isRandomShuffle) train_gen = DataGenerator(datapath, batch_size=batchSize, dim=patchSize, usingClassification=usingClassification, list_IDs=train_files) val_gen = DataGenerator(datapath, batch_size=batchSize, dim=patchSize, usingClassification=usingClassification, list_IDs=val_files) test_gen = DataGenerator(datapath, batch_size=batchSize, dim=patchSize, usingClassification=usingClassification, list_IDs=test_files) existing_validation = True if len(val_gen.list_IDs) > 0 else False else: # ARRAY existing_validation = (X_valid != 0 and X_valid is not None) if existing_validation: # using test set for validation if usingClassification: if dlnetwork.trainMode == 'ARRAY': result = cnn.fit(X_train, { 'segmentation_output': Y_segMasks_train, 'classification_output': y_train }, validation_data=(X_valid, { 'segmentation_output': Y_segMasks_valid, 'classification_output': y_valid }), epochs=iEpochs, batch_size=batchSize, callbacks=callbacks, verbose=1) else: result = cnn.fit_generator(train_gen, validation_data=val_gen, epochs=iEpochs, batch_size=batchSize, callbacks=callbacks, use_multiprocessing=True, workers=8, max_queue_size=32, verbose=1) else: if dlnetwork.trainMode == 'ARRAY': result = cnn.fit(X_train, Y_segMasks_train, validation_data=(X_valid, Y_segMasks_valid), epochs=iEpochs, batch_size=batchSize, callbacks=callbacks, verbose=1) else: result = cnn.fit_generator(train_gen, validation_data=val_gen, epochs=iEpochs, batch_size=batchSize, callbacks=callbacks, use_multiprocessing=True, workers=8, max_queue_size=32, verbose=1) else: # using validation set for validation if usingClassification: if dlnetwork.trainMode == 'ARRAY': result = cnn.fit(X_train, { 'segmentation_output': Y_segMasks_train, 'classification_output': y_train }, validation_data=(X_test, { 'segmentation_output': Y_segMasks_test, 'classification_output': y_test }), epochs=iEpochs, batch_size=batchSize, callbacks=callbacks, verbose=1) else: result = cnn.fit_generator(train_gen, validation_data=test_gen, epochs=iEpochs, batch_size=batchSize, callbacks=callbacks, use_multiprocessing=True, workers=8, max_queue_size=32, verbose=1) else: if dlnetwork.trainMode == 'ARRAY': result = cnn.fit(X_train, Y_segMasks_train, validation_data=(X_test, Y_segMasks_test), epochs=iEpochs, batch_size=batchSize, callbacks=callbacks, verbose=1) else: result = cnn.fit_generator(train_gen, validation_data=test_gen, epochs=iEpochs, batch_size=batchSize, callbacks=callbacks, use_multiprocessing=True, workers=8, max_queue_size=32, verbose=1) # return the loss value and metrics values for the model in test mode if dlnetwork.trainMode == 'ARRAY': if usingClassification: model_metrics = cnn.metrics_names loss_test, segmentation_output_loss_test, classification_output_loss_test, segmentation_output_dice_coef_test, classification_output_acc_test \ = cnn.evaluate(X_test, {'segmentation_output': Y_segMasks_test, 'classification_output': y_test}, batch_size=batchSize, verbose=1) else: score_test, dice_coef_test = cnn.evaluate(X_test, Y_segMasks_test, batch_size=batchSize, verbose=1) prob_test = cnn.predict(X_test, batchSize, 0) else: if usingClassification: model_metrics = cnn.metrics_names loss_test, segmentation_output_loss_test, classification_output_loss_test, segmentation_output_dice_coef_test, classification_output_acc_test \ = cnn.evaluate_generator(test_gen, batch_size=batchSize, verbose=1) else: score_test, dice_coef_test = cnn.evaluate_generator( test_gen, batch_size=batchSize, verbose=1) prob_test = cnn.predict_generator(test_gen, batchSize, 0) # save model json_string = cnn.to_json() with open(model_json, 'w') as jsonFile: jsonFile.write(json_string) # wei = cnn.get_weights() cnn.save_weights(weight_name, overwrite=True) # cnn.save(model_all) # keras > v0.7 if not usingClassification: # matlab dice_coef_training = result.history['dice_coef'] training_loss = result.history['loss'] if X_valid != 0: val_dice_coef = result.history['val_dice_coef'] val_loss = result.history['val_loss'] else: val_dice_coef = 0 val_loss = 0 print('Saving results: ' + model_name) sio.savemat( model_name, { 'model_settings': model_json, 'model': model_all, 'weights': weight_name, 'dice_coef': dice_coef_training, 'training_loss': training_loss, 'val_dice_coef': val_dice_coef, 'val_loss': val_loss, 'score_test': score_test, 'dice_coef_test': dice_coef_test, 'prob_test': prob_test }) else: # matlab segmentation_output_loss_training = result.history[ 'segmentation_output_loss'] classification_output_loss_training = result.history[ 'classification_output_loss'] segmentation_output_dice_coef_training = result.history[ 'segmentation_output_dice_coef'] classification_output_acc_training = result.history[ 'classification_output_acc'] if X_valid != 0: val_segmentation_output_loss = result.history[ 'val_segmentation_output_loss'] val_classification_output_loss = result.history[ 'val_classification_output_loss'] val_segmentation_output_dice_coef = result.history[ 'val_segmentation_output_dice_coef'] val_classification_output_acc = result.history[ 'val_classification_output_acc'] else: val_segmentation_output_loss = 0 val_classification_output_loss = 0 val_segmentation_output_dice_coef = 0 val_classification_output_acc = 0 print('Saving results: ' + model_name) sio.savemat( model_name, { 'model_settings': model_json, 'model': model_all, 'weights': weight_name, 'segmentation_output_loss_training': segmentation_output_loss_training, 'classification_output_loss_training': classification_output_loss_training, 'segmentation_output_dice_coef_training': segmentation_output_dice_coef_training, 'classification_output_acc_training': classification_output_acc_training, 'segmentation_output_loss_val': val_segmentation_output_loss, 'classification_output_loss_val': val_classification_output_loss, 'segmentation_output_dice_coef_val': val_segmentation_output_dice_coef, 'classification_output_acc_val': val_classification_output_acc, 'loss_test': loss_test, 'segmentation_output_loss_test': segmentation_output_loss_test, 'classification_output_loss_test': classification_output_loss_test, 'segmentation_output_dice_coef_test': segmentation_output_dice_coef_test, 'classification_output_acc_test': classification_output_acc_test, 'segmentation_predictions': prob_test[0], 'classification_predictions': prob_test[1] })
def model(params, batch_size=b_size, nb_epoch=n_epoch, is_train=True): # set parameters: nb_classes = len(functions) start_time = time.time() logging.info("Loading Data") train, val, test, train_df, valid_df, test_df = load_data() print len(test_df) train_df = pd.concat([train_df, valid_df]) test_gos = test_df['gos'].values train_data, train_labels = train val_data, val_labels = val test_data, test_labels = test print len(test_labels) logging.info("Data loaded in %d sec" % (time.time() - start_time)) logging.info("Training data size: %d" % len(train_data[0])) logging.info("Validation data size: %d" % len(val_data[0])) logging.info("Test data size: %d" % len(test_data[0])) model_path = (DATA_ROOT + 'models/model_' + FUNCTION + '.h5') checkpointer = ModelCheckpoint(filepath=model_path, verbose=1, save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1) logging.info('Starting training the model') print train_data train_generator = DataGenerator(batch_size, nb_classes) train_generator.fit(train_data, train_labels) valid_generator = DataGenerator(batch_size, nb_classes) valid_generator.fit(val_data, val_labels) test_generator = DataGenerator(batch_size, nb_classes) test_generator.fit(test_data, test_labels) is_train = True if is_train: model = get_model(params) model.fit_generator(train_generator, samples_per_epoch=len(train_data[0]), nb_epoch=nb_epoch, validation_data=valid_generator, nb_val_samples=len(val_data[0]), max_q_size=batch_size, callbacks=[checkpointer, earlystopper]) logging.info('Loading best model') start_time = time.time() model = load_model(model_path) logging.info('Loading time: %d' % (time.time() - start_time)) start_time = time.time() preds = model.predict_generator(test_generator, val_samples=len(test_data[0])) running_time = time.time() - start_time logging.info('Running time: %d %d' % (running_time, len(test_data[0]))) logging.info('Computing performance') # pred_file="pred"+FUNCTION+".txt" # test_file ="test"+FUNCTION+".txt" # gos_file = "test"+FUNCTION+"_goc.txt" # write_file(pred_file,preds) # write_file(test_file,test_labels) # write_file(gos_file,test_gos) f, p, r, t, preds_max = compute_performance(preds, test_labels, test_gos) roc_auc = compute_roc(preds, test_labels) mcc = compute_mcc(preds_max, test_labels) logging.info('Fmax measure: \t %f %f %f %f' % (f, p, r, t)) logging.info('ROC AUC: \t %f ' % (roc_auc, )) logging.info('MCC: \t %f ' % (mcc, )) print('f :%.3f & p: %.3f & r: %.3f & roc_auc: %.3f & mcc: %.3f' % (f, p, r, roc_auc, mcc)) write_results([f, p, r, roc_auc, mcc]) proteins = test_df['proteins'] predictions = list() for i in xrange(preds_max.shape[0]): predictions.append(preds_max[i]) df = pd.DataFrame({ 'proteins': proteins, 'predictions': predictions, 'gos': test_df['gos'], 'labels': test_df['labels'] }) print df df.to_pickle('test' + FUNCTION + 'preds.pkl')
test_list_IDs = list_IDs[int(train_test_ratio*length_data):] test_list_xmls = list_xmls[int(train_test_ratio*length_data):] # In[3]: def yolo_loss_(y_true, y_pred): return y_pred # In[4]: train_generator = DataGenerator(train_list_IDs, train_list_xmls, num_class, cls2id, anchors, batch_size, image_size, max_boxes=max_boxes, is_training=True) val_generator = DataGenerator(test_list_IDs, test_list_xmls, num_class, cls2id, anchors, batch_size, image_size, max_boxes=max_boxes, is_training=False) model = yolov3_model(num_class, anchors, max_boxes, image_size, batch_size, is_training=True) # model = yolov3_model(num_class, anchors, max_boxes, image_size, is_training=True) if finetune: lr = lr*0.5 yolov3_filepath = './models/yolov3_weights.h5' model.load_weights(yolov3_filepath, by_name=True) # myyolo_loss = partial(yolo_loss, anchors=anchors, num_chasses=num_class, image_size=(416,416), ignore_thresh=0.5) # myyolo_loss.__name__ = 'myyolo_loss'
def model(): # set parameters: batch_size = 128 nb_epoch = 100 nb_classes = len(functions) start_time = time.time() logging.info("Loading Data") train, val, test, train_df, valid_df, test_df = load_data() train_df = pd.concat([train_df, valid_df]) test_gos = test_df['gos'].values train_data, train_labels = train val_data, val_labels = val test_data, test_labels = test logging.info("Data loaded in %d sec" % (time.time() - start_time)) logging.info("Training data size: %d" % len(train_data[0])) logging.info("Validation data size: %d" % len(val_data[0])) logging.info("Test data size: %d" % len(test_data[0])) pre_model_path = DATA_ROOT + 'pre_model_weights_' + FUNCTION + '.pkl' model_path = DATA_ROOT + 'model_weights_' + FUNCTION + '.pkl' last_model_path = DATA_ROOT + 'model_weights_' + FUNCTION + '.last.pkl' checkpointer = MyCheckpoint(filepath=model_path, verbose=1, save_best_only=True, save_weights_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1) model = get_model() # logging.info('Loading pretrained weights') # load_model_weights(model, pre_model_path) logging.info('Starting training the model') train_generator = DataGenerator(batch_size, nb_classes) train_generator.fit(train_data, train_labels) valid_generator = DataGenerator(batch_size, nb_classes) valid_generator.fit(val_data, val_labels) test_generator = DataGenerator(batch_size, nb_classes) test_generator.fit(test_data, test_labels) # model.fit_generator( # train_generator, # samples_per_epoch=len(train_data[0]), # nb_epoch=nb_epoch, # validation_data=valid_generator, # nb_val_samples=len(val_data[0]), # max_q_size=batch_size, # callbacks=[checkpointer, earlystopper]) logging.info('Loading weights') load_model_weights(model, model_path) model.save(DATA_ROOT + 'model_%s.h5' % FUNCTION) logging.info('Predicting') preds = model.predict_generator(test_generator, val_samples=len(test_data[0])) # incon = 0 # for i in xrange(len(test_data)): # for j in xrange(len(functions)): # childs = set(go[functions[j]]['children']).intersection(func_set) # ok = True # for n_id in childs: # if preds[i, j] < preds[i, go_indexes[n_id]]: # preds[i, j] = preds[i, go_indexes[n_id]] # ok = False # if not ok: # incon += 1 logging.info('Computing performance') f, p, r, preds_max = compute_performance(preds, test_labels, test_gos) # roc_auc = compute_roc(preds, test_labels) # logging.info('Fmax measure: \t %f %f %f' % (f, p, r)) # logging.info('ROC AUC: \t %f ' % (roc_auc, )) # logging.info('Inconsistent predictions: %d' % incon) logging.info('Saving the predictions') proteins = test_df['proteins'] predictions = list() for i in xrange(preds_max.shape[0]): predictions.append(preds_max[i]) df = pd.DataFrame({ 'proteins': proteins, 'predictions': predictions, 'gos': test_df['gos'], 'labels': test_df['labels'] }) df.to_pickle(DATA_ROOT + 'test-' + FUNCTION + '-predictions.pkl') logging.info('Done in %d sec' % (time.time() - start_time))
def main(args): contents = [ PointToTargetContent, ChangeDetectionContent, OddOneOutContent, VisualSearchContent, MultipleObjectTrackingContent, RandomDotMotionDiscriminationContent ] content = contents[args.content - 1]() dg = DataGenerator(content, retina=args.retina) print('egocentric images: {} episode, {} length'.format( args.episode, args.length)) print('allocentric images: {} scene'.format(args.scene)) print('image shape: {} height, {} width, {} channel'.format(128, 128, 3)) print('collecting egocentric images...') dg.generate_egocentric_images(episode=args.episode, length=args.length, inplace=True) e_path = dg.save_egocentric_images(dirname='images', prefix='egocentric_images') dg.reset_egocentric_images() print('save {}'.format(str(e_path))) print('collecting allocentric images...') dg.generate_allocentric_images(scene=args.scene, inplace=True) a_path = dg.save_allocentric_images(dirname='images', prefix='allocentric_images') dg.reset_allocentric_images() print('save {}'.format(str(a_path)))
output_dim = 185 # Display frequency (print/#batch) display_step = 400 writer_path = "visualization" checkpoint_path = "checkpoints" if os.path.exists(writer_path): shutil.rmtree(writer_path) os.makedirs(writer_path) '''main part of training''' # Place data loading and pre-processing on cpu with tf.device('/cpu:0'): train_data = DataGenerator(train_list, image_size[0], output_dim, mode='training', batch_size=batch_size, shuffle=True) val_data = DataGenerator(val_list, image_size[0], output_dim, mode='inference', batch_size=batch_size, shuffle=False) # Create an reinitializable iterator given the data structure iterator = Iterator.from_structure(train_data.data.output_types, train_data.data.output_shapes) next_batch = iterator.get_next() # Ops for initializing the two different iterators
if not use_loaded_model: print("Pre-Training") if pred_weight != 0: H = F.predict([x1[train_idx], x2[train_idx]], batch_size=batch_size) set_trainability(K, True) K.fit(H, leaky_features[train_idx], epochs=10, batch_size=batch_size, sample_weight=sample_weight[train_idx]) data_generator = DataGenerator(x1[train_idx], x2[train_idx], y[train_idx], leaky_features[train_idx], leaky_features_adv[train_idx], sample_weight[train_idx], batch_size=batch_size, shuffle=True, data_gen_mode=data_gen_mode) y_loss = [] l_loss = [] l_adv_loss = [] lr_adn = lr lr_k = lr val_best = -1 num_no_improv = 0 best_epoch = -1 for epoch in range(num_epochs): y_loss_batch = []
tensorboard_cb = TensorBoard(log_dir=log_dir) callbacks_list = [ checkpoint_cb, # lr_cb, #earlystopping_cb, tensorboard_cb ] # Generate data image_shape = (args.image_size, ) * 3 #FAIL: (144,144,144) #(160,160,144) #(192,192,144) #(208,208,144) #(240,240,144) gen_factor = 1 train_gen = DataGenerator(train_ids, src_dir, n_samples=n_train * gen_factor, rotation_range=0.4, batch_size=args.batch_size, image_shape=image_shape) valid_gen = DataGenerator(valid_ids, src_dir, n_samples=n_val * gen_factor, rotation_range=0.4, batch_size=args.batch_size, image_shape=image_shape) test_gen = DataGenerator(test_ids, src_dir, n_samples=n_test * gen_factor, rotation_range=0.4, batch_size=args.batch_size, image_shape=image_shape) train_steps = len(train_ids * gen_factor) // args.batch_size
def train_model(batch_size=128, epochs=100, is_train=True, model_path='data/model.h5'): # set parameters: start_time = time.time() logging.info("Loading Data") train, valid, test = load_data() train_data, train_labels = train valid_data, valid_labels = valid test_data, test_labels = test logging.info("Data loaded in %d sec" % (time.time() - start_time)) logging.info("Training data size: %d" % train_data.shape[0]) logging.info("Validation data size: %d" % valid_data.shape[0]) logging.info("Test data size: %d" % test_data.shape[0]) model_path = 'data/model.h5' checkpointer = ModelCheckpoint(filepath=model_path, verbose=1, save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1) logging.info('Starting training the model') train_generator = DataGenerator(batch_size) train_generator.fit(train_data, train_labels) valid_generator = DataGenerator(batch_size) valid_generator.fit(valid_data, valid_labels) test_generator = DataGenerator(batch_size) test_generator.fit(test_data, test_labels) if is_train: valid_steps = int(math.ceil(valid_data.shape[0] / batch_size)) train_steps = int(math.ceil(train_data.shape[0] / batch_size)) model = get_model() model.fit_generator(train_generator, steps_per_epoch=train_steps, epochs=epochs, validation_data=valid_generator, validation_steps=valid_steps, max_queue_size=batch_size, workers=12, callbacks=[checkpointer, earlystopper]) logging.info('Loading best model') model = load_model(model_path) logging.info('Predicting') test_steps = int(math.ceil(test_data.shape[0] / batch_size)) preds = model.predict_generator(test_generator, steps=test_steps, verbose=1) logging.info('Computing performance') test_labels = test_labels.toarray() f, p, r, t, preds_max = compute_performance(preds, test_labels) roc_auc = compute_roc(preds, test_labels) mcc = compute_mcc(preds_max, test_labels) logging.info('Fmax measure: \t %f %f %f %f' % (f, p, r, t)) logging.info('ROC AUC: \t %f ' % (roc_auc, )) logging.info('MCC: \t %f ' % (mcc, )) print('%.3f & %.3f & %.3f & %.3f & %.3f' % (f, p, r, roc_auc, mcc))
import bunch from transform import PCA from utils import DataGenerator config = bunch.Bunch({ 'amplitude': 1.0, 'length_scale': 10.0, 'n_features': 500, 'n_components': 10, 'n_iterations': 1000, 'learning_rate': 0.01 }) train_file = '../data/train.npy' observed_data = DataGenerator(train_file, config.n_features) with tf.name_scope('data'): X = tf.placeholder(dtype=tf.float64, shape=[None, config.n_features], name='features') y = tf.placeholder(dtype=tf.float64, shape=[None], name='targets') with tf.name_scope('PCA'): pca = PCA(config.n_components) Xt = pca.fit_transform(X) with tf.name_scope('hyperparameters'): sigma = tf.Variable(initial_value=config.amplitude, name='sigma', dtype=np.float64)
test_historical = historical_hot(test_codes_x, len(code_map)) visit_rnn_dims = [200] hyper_params = { 'code_dims': [32, 32, 32, 32], 'patient_dim': 16, 'word_dim': 16, 'patient_hidden_dims': [32], 'code_hidden_dims': [64, 128], 'visit_rnn_dims': visit_rnn_dims, 'visit_attention_dim': 32, 'note_attention_dim': visit_rnn_dims[-1] } test_codes_gen = DataGenerator( [test_codes_x, test_visit_lens, test_note_x, test_note_lens], shuffle=False) def lr_schedule_fn(epoch, lr): if epoch < 20: lr = 0.01 elif epoch < 100: lr = 0.001 elif epoch < 200: lr = 0.0001 else: lr = 0.00001 return lr lr_scheduler = LearningRateScheduler(lr_schedule_fn) test_callback = EvaluateCodesCallBack(test_codes_gen,
def model(batch_size=128, nb_epoch=100, is_train=True): # set parameters: nb_classes = len(functions) start_time = time.time() logging.info("Loading Data") train, val, test, train_df, valid_df, test_df = load_data() train_df = pd.concat([train_df, valid_df]) test_gos = test_df['gos'].values train_data, train_labels = train val_data, val_labels = val test_data, test_labels = test logging.info("Data loaded in %d sec" % (time.time() - start_time)) logging.info("Training data size: %d" % len(train_data)) logging.info("Validation data size: %d" % len(val_data)) logging.info("Test data size: %d" % len(test_data)) model_path = DATA_ROOT + 'models/model_seq_' + FUNCTION + '.h5' checkpointer = ModelCheckpoint(filepath=model_path, verbose=1, save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1) logging.info('Starting training the model') train_generator = DataGenerator(batch_size, nb_classes) train_generator.fit(train_data, train_labels) valid_generator = DataGenerator(batch_size, nb_classes) valid_generator.fit(val_data, val_labels) test_generator = DataGenerator(batch_size, nb_classes) test_generator.fit(test_data, test_labels) if is_train: model = get_model() model.fit_generator(train_generator, samples_per_epoch=len(train_data), nb_epoch=nb_epoch, validation_data=valid_generator, nb_val_samples=len(val_data), max_q_size=batch_size, callbacks=[checkpointer, earlystopper]) logging.info('Loading best model') model = load_model(model_path) model = model.layers[1] output = model.predict_generator(test_generator, val_samples=len(test_data)) print((output.shape)) return logging.info('Predicting') preds = model.predict_generator(test_generator, val_samples=len(test_data)) # incon = 0 # for i in range(len(test_data)): # for j in range(len(functions)): # childs = set(go[functions[j]]['children']).intersection(func_set) # ok = True # for n_id in childs: # if preds[i, j] < preds[i, go_indexes[n_id]]: # preds[i, j] = preds[i, go_indexes[n_id]] # ok = False # if not ok: # incon += 1 logging.info('Computing performance') f, p, r, t, preds_max = compute_performance(preds, test_labels, test_gos) roc_auc = compute_roc(preds, test_labels) mcc = compute_mcc(preds_max, test_labels) logging.info('Fmax measure: \t %f %f %f %f' % (f, p, r, t)) logging.info('ROC AUC: \t %f ' % (roc_auc, )) logging.info('MCC: \t %f ' % (mcc, )) print(('%.3f & %.3f & %.3f & %.3f & %.3f' % (f, p, r, roc_auc, mcc))) # logging.info('Inconsistent predictions: %d' % incon) # logging.info('Saving the predictions') # proteins = test_df['proteins'] # predictions = list() # for i in range(preds_max.shape[0]): # predictions.append(preds_max[i]) # df = pd.DataFrame( # { # 'proteins': proteins, 'predictions': predictions, # 'gos': test_df['gos'], 'labels': test_df['labels']}) # df.to_pickle(DATA_ROOT + 'test-' + FUNCTION + '-predictions.pkl') # logging.info('Done in %d sec' % (time.time() - start_time)) function_centric_performance(functions, preds.T, test_labels.T)
if __name__ == "__main__": import numpy as np import torch.optim as optim from torch.utils.data import Dataset, DataLoader from utils import sampling, DataGenerator args = parameters.get_config() device = torch.device("cuda:0" if args.cuda else "cpu") #data = sampling(50, 100, "sin") #data2 = sampling(50, 100, "cos", phase=0.5*np.pi) #data = np.concatenate([data, data2], 0) data, noised_data = sampling(100, 100, "sin", noise=True) generator = DataGenerator(noised_data, data) inputs = DataLoader(generator, batch_size=args.batch_size) trainer = Trainer(args) #trainer.build_model(RNN) trainer.build_model(LSTM) #trainer.train(inputs) #rnn = trainer.model #rnn.eval() #output_log = [] #for i in range(99): # if i == 0: # #cur_input = torch.zeros([1,2]) # cur_input = torch.zeros([1, 2]) # cur_input.data.numpy()[0, 0] = -0.75 # state = None