def load_word_embeddings(emb_path, emb_dim, data_file, min_freq=1, verbose=True): ## pre-load emb words from deepats import ets_reader from deepats.w2vEmbReader import W2VEmbReader as EmbReader emb_reader = EmbReader(emb_path, emb_dim) emb_words = emb_reader.load_words() text = U.read_col(data_file, col=-1, type='string') vocab = ets_reader.create_vocab(text, tokenize_text=True, to_lower=True, min_word_freq=min_freq, emb_words=emb_words) # vocab = {'<pad>':0, '<unk>':1, '<num>':2, .....} ####################################################### pad = '<pad>' unk = '<unk>' num = '<num>' words = set(vocab) words.discard(pad) words.discard(unk) words.discard(num) emb_file = emb_path.format(emb_dim) word2emb = load_embeddings(emb_file, filter_words=words, verbose=verbose) n = len(word2emb) + 3 d = word2emb[next(iter(word2emb))].size E = np.zeros( [n, d], dtype=np.float32) # <unk> is given all-zero embedding... at E[0,:] word_vocab = Vocab(unk_index=1) word_vocab.feed(pad) # <pad> is at index 0 in word vocab word_vocab.feed( unk ) # <unk> is at index 1 in word vocab --> so idx=1 returned for unknown toks word_vocab.feed(num) # <num> is at index 2 in word vocab for word in list(word2emb): idx = word_vocab.feed(word) E[idx, :] = word2emb[word] #print(word) return E, word_vocab
def load_word_embeddings_NEW(emb_path, emb_dim, data_file, min_freq=1, unk='<unk>', eos='+', verbose=True): ## pre-load emb words from deepats import ets_reader from deepats.w2vEmbReader import W2VEmbReader as EmbReader emb_reader = EmbReader(emb_path, emb_dim) emb_words = emb_reader.load_words() text = U.read_col(data_file, col=-1, type='string') vocab = ets_reader.create_vocab(text, tokenize_text=True, to_lower=True, min_word_freq=min_freq, emb_words=emb_words) ####################################################### words = set(vocab) words.discard(unk) emb_file = emb_path.format(emb_dim) word2emb = load_embeddings(emb_file, filter_words=words, verbose=verbose) n = len(word2emb) + 3 d = word2emb[next(iter(word2emb))].size E = np.zeros( [n, d], dtype=np.float32) # <unk> is given all-zero embedding... at E[0,:] word_vocab = Vocab() word_vocab.feed(unk) if eos: word_vocab.feed(eos) for word in list(word2emb): idx = word_vocab.feed(word) E[idx, :] = word2emb[word] #print(word) return E, word_vocab
def data(): from keras.utils import np_utils from keras.preprocessing import sequence import keras.backend as K import numpy as np import pickle as pk import os from deepats.w2vEmbReader import W2VEmbReader as EmbReader import deepats.ets_reader as dataset from deepats.ets_config import get_args args = get_args() emb_reader = EmbReader(args.emb_path, args.emb_dim) emb_words = emb_reader.load_words() train_df, dev_df, test_df, vocab, overal_maxlen, qwks = dataset.get_data( args.data_path, emb_words=emb_words, seed=args.seed) train_x = train_df['text'].values train_y = train_df['y'].values dev_x = dev_df['text'].values dev_y = dev_df['y'].values test_x = test_df['text'].values test_y = test_df['y'].values abs_vocab_file = os.path.join(args.abs_out, 'vocab.pkl') with open(abs_vocab_file, 'wb') as vocab_file: pk.dump(vocab, vocab_file) train_x = sequence.pad_sequences(train_x, maxlen=overal_maxlen) dev_x = sequence.pad_sequences(dev_x, maxlen=overal_maxlen) test_x = sequence.pad_sequences(test_x, maxlen=overal_maxlen) return train_x, train_y, dev_x, dev_y, test_x, test_y, overal_maxlen, qwks
def run(argv=None): parser = argparse.ArgumentParser() parser.add_argument("-o", "--out-dir", dest="out_dir_path", type=str, metavar='<str>', required=True, help="The path to the output directory") parser.add_argument( "-p", "--prompt", dest="prompt_id", type=int, metavar='<int>', required=False, help="Promp ID for ASAP dataset. '0' means all prompts.") parser.add_argument("-t", "--type", dest="model_type", type=str, metavar='<str>', default='regp', help="Model type (reg|regp|breg|bregp) (default=regp)") parser.add_argument( "-u", "--rec-unit", dest="recurrent_unit", type=str, metavar='<str>', default='lstm', help="Recurrent unit type (lstm|gru|simple) (default=lstm)") parser.add_argument( "-a", "--algorithm", dest="algorithm", type=str, metavar='<str>', default='rmsprop', help= "Optimization algorithm (rmsprop|sgd|adagrad|adadelta|adam|adamax) (default=rmsprop)" ) parser.add_argument("-l", "--loss", dest="loss", type=str, metavar='<str>', default='mse', help="Loss function (mse|mae) (default=mse)") parser.add_argument("-e", "--embdim", dest="emb_dim", type=int, metavar='<int>', default=50, help="Embeddings dimension (default=50)") parser.add_argument( "-c", "--cnndim", dest="cnn_dim", type=int, metavar='<int>', default=0, help="CNN output dimension. '0' means no CNN layer (default=0)") parser.add_argument("-w", "--cnnwin", dest="cnn_window_size", type=int, metavar='<int>', default=3, help="CNN window size. (default=3)") parser.add_argument( "-r", "--rnndim", dest="rnn_dim", type=int, metavar='<int>', default=300, help="RNN dimension. '0' means no RNN layer (default=300)") parser.add_argument("-b", "--batch-size", dest="batch_size", type=int, metavar='<int>', default=32, help="Batch size (default=32)") parser.add_argument("-v", "--vocab-size", dest="vocab_size", type=int, metavar='<int>', default=4000, help="Vocab size (default=4000)") parser.add_argument( "--aggregation", dest="aggregation", type=str, metavar='<str>', default='mot', help= "The aggregation method for regp and bregp types (mot|attsum|attmean) (default=mot)" ) parser.add_argument( "--dropout", dest="dropout_prob", type=float, metavar='<float>', default=0.5, help= "The dropout probability. To disable, give a negative number (default=0.5)" ) parser.add_argument( "--vocab-path", dest="vocab_path", type=str, metavar='<str>', help="(Optional) The path to the existing vocab file (*.pkl)") parser.add_argument("--skip-init-bias", dest="skip_init_bias", action='store_true', help="Skip initialization of the last layer bias") parser.add_argument( "--emb", dest="emb_path", type=str, metavar='<str>', help="The path to the word embeddings file (Word2Vec format)") parser.add_argument("--epochs", dest="epochs", type=int, metavar='<int>', default=100, help="Number of epochs (default=50)") parser.add_argument( "--maxlen", dest="maxlen", type=int, metavar='<int>', default=0, help= "Maximum allowed number of words during training. '0' means no limit (default=0)" ) parser.add_argument("--seed", dest="seed", type=int, metavar='<int>', default=1234, help="Random seed (default=1234)") ## dsv parser.add_argument("--min-word-freq", dest="min_word_freq", type=int, metavar='<int>', default=2, help="Min word frequency") parser.add_argument("--stack", dest="stack", type=int, metavar='<int>', default=1, help="how deep to stack core RNN") parser.add_argument("--skip-emb-preload", dest="skip_emb_preload", action='store_true', help="Skip preloading embeddings") parser.add_argument("--tokenize-old", dest="tokenize_old", action='store_true', help="use old tokenizer") parser.add_argument("-ar", "--abs-root", dest="abs_root", type=str, metavar='<str>', required=False, help="Abs path to root directory") parser.add_argument("-ad", "--abs-data", dest="abs_data", type=str, metavar='<str>', required=False, help="Abs path to data directory") parser.add_argument("-ao", "--abs-out", dest="abs_out", type=str, metavar='<str>', required=False, help="Abs path to output directory") parser.add_argument("-dp", "--data-path", dest="data_path", type=str, metavar='<str>', required=False, help="Abs path to output directory") ## if argv is None: args = parser.parse_args() else: args = parser.parse_args(argv) out_dir = args.abs_out U.mkdir_p(os.path.join(out_dir, 'preds')) U.set_logger(out_dir) U.print_args(args) assert args.model_type in {'reg', 'regp', 'breg', 'bregp', 'rwa'} assert args.algorithm in { 'rmsprop', 'sgd', 'adagrad', 'adadelta', 'adam', 'adamax' } assert args.loss in {'mse', 'mae', 'kappa', 'soft_kappa'} assert args.recurrent_unit in {'lstm', 'gru', 'simple', 'rwa'} assert args.aggregation in {'mot', 'attsum', 'attmean'} if args.seed > 0: RANDSEED = args.seed else: RANDSEED = np.random.randint(10000) np.random.seed(RANDSEED) ####################### #from deepats.util import GPUtils as GPU import GPUtil as GPU mem = GPU.avail_mem() logger.info('AVAIL GPU MEM == %.4f' % mem) # if mem < 0.05: # return None ############################################################################################################################### ## Prepare data # emb_words = None if not args.skip_emb_preload: #if args.emb_path: from deepats.w2vEmbReader import W2VEmbReader as EmbReader logger.info('Loading embedding vocabulary...') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) emb_words = emb_reader.load_words() train_df, dev_df, test_df, vocab, overal_maxlen, qwks = dataset.get_data( args.data_path, emb_words=emb_words, seed=RANDSEED) vocab_size = len(vocab) train_x = train_df['text'].values train_y = train_df['y'].values dev_x = dev_df['text'].values dev_y = dev_df['y'].values test_x = test_df['text'].values test_y = test_df['y'].values # Dump vocab abs_vocab_file = os.path.join(out_dir, 'vocab.pkl') with open(os.path.join(out_dir, 'vocab.pkl'), 'wb') as vocab_file: pk.dump(vocab, vocab_file) if args.recurrent_unit == 'rwa': setattr(args, 'model_type', 'rwa') # Pad sequences for mini-batch processing from keras.preprocessing import sequence if args.model_type in {'breg', 'bregp', 'rwa'}: assert args.rnn_dim > 0 train_x = sequence.pad_sequences(train_x, maxlen=overal_maxlen) dev_x = sequence.pad_sequences(dev_x, maxlen=overal_maxlen) test_x = sequence.pad_sequences(test_x, maxlen=overal_maxlen) else: train_x = sequence.pad_sequences(train_x) dev_x = sequence.pad_sequences(dev_x) test_x = sequence.pad_sequences(test_x) ############################################################################################################################### ## Some statistics # train_y = np.array(train_y, dtype=K.floatx()) # dev_y = np.array(dev_y, dtype=K.floatx()) # test_y = np.array(test_y, dtype=K.floatx()) bincounts, mfs_list = U.bincounts(train_y) with open(os.path.join(out_dir, 'bincounts.txt'), 'w') as output_file: for bincount in bincounts: output_file.write(str(bincount) + '\n') train_mean = train_y.mean(axis=0) train_std = train_y.std(axis=0) dev_mean = dev_y.mean(axis=0) dev_std = dev_y.std(axis=0) test_mean = test_y.mean(axis=0) test_std = test_y.std(axis=0) logger.info('Statistics:') logger.info(' TEST KAPPAS (float, int)= \033[92m%.4f (%.4f)\033[0m ' % (qwks[1], qwks[0])) logger.info(' RANDSEED = ' + str(RANDSEED)) logger.info(' train_x shape: ' + str(np.array(train_x).shape)) logger.info(' dev_x shape: ' + str(np.array(dev_x).shape)) logger.info(' test_x shape: ' + str(np.array(test_x).shape)) logger.info(' train_y shape: ' + str(train_y.shape)) logger.info(' dev_y shape: ' + str(dev_y.shape)) logger.info(' test_y shape: ' + str(test_y.shape)) logger.info(' train_y mean: %s, stdev: %s, MFC: %s' % (str(train_mean), str(train_std), str(mfs_list))) logger.info(' overal_maxlen: ' + str(overal_maxlen)) ############################################################################################################################### ## Optimizaer algorithm # from deepats.optimizers import get_optimizer #optimizer = get_optimizer(args) from keras import optimizers ## RMS-PROP #optimizer = optimizers.RMSprop(lr=0.00075, rho=0.9, clipnorm=1) #optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, clipnorm=1) optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-6, clipnorm=10) #optimizer = optimizers.RMSprop(lr=0.0018, rho=0.88, epsilon=1e-6, clipnorm=10) #optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-8, clipnorm=10) #optimizer = optimizers.RMSprop(lr=0.004, rho=0.85, epsilon=1e-6, clipnorm=10)# best 2.1 (RWA) #optimizer = optimizers.RMSprop(lr=0.0025, rho=0.8, epsilon=1e-8, clipnorm=10) # best 2.1 (RWA) #optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-8, clipnorm=10) # best 2.3 (RWA) #optimizer = optimizers.RMSprop(lr=0.0025, rho=0.88, epsilon=1e-8, clipnorm=10) # best 2.3 (RWA) #optimizer = optimizers.RMSprop(lr=0.004, rho=0.85, epsilon=1e-8, clipnorm=10) # best 2.10 (RWA) ## OTHER METHODS #optimizer = optimizers.Adam(lr=0.0018, clipnorm=5) #optimizer = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, clipnorm=1) #optimizer = optimizers.Adam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-06, clipnorm=10) #optimizer = optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, clipnorm=10) #optimizer = optimizers.SGD(lr=0.05, momentum=0, decay=0.0, nesterov=False, clipnorm=10) #optimizer = optimizers.Adagrad(lr=0.03, epsilon=1e-08, clipnorm=10) #optimizer = optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=1e-06, clipnorm=10) ############################################################################################################################### ## Building model # if args.loss == 'mse': loss = 'mean_squared_error' metric = kappa_metric metric_name = 'kappa_metric' elif args.loss == 'mae': loss = 'mean_absolute_error' metric = kappa_metric metric_name = 'kappa_metric' elif args.loss == 'kappa': loss = kappa_loss metric = kappa_metric metric_name = 'kappa_metric' ######################################################## from deepats.models import create_model model = create_model(args, train_y.mean(axis=0), overal_maxlen, vocab) ############################################ ''' # test yaml serialization/de-serialization yaml = model.to_yaml() print yaml from deepats.my_layers import MeanOverTime from deepats.rwa import RWA model = model_from_yaml(yaml, custom_objects={'MeanOverTime': MeanOverTime, 'RWA':RWA}) ''' ############################################ model.compile(loss=loss, optimizer=optimizer, metrics=[metric]) print(model.summary()) ############################################################################################################################### ## Plotting model # # from keras.utils.visualize_util import plot # plot(model, to_file = os.path.join(out_dir,'model.png')) ############################################################################################################################### ## Save model architecture # logger.info('Saving model architecture') with open(os.path.join(out_dir, 'model_arch.json'), 'w') as arch: arch.write(model.to_json(indent=2)) logger.info(' Done') ############################################################################################################################### ## Evaluator # evl = Evaluator(dataset, args.prompt_id, out_dir, dev_x, test_x, dev_df, test_df) ############################################################################################################################### ## Training # logger.info( '----------------------------------------------------------------') logger.info('Initial Evaluation:') evl.evaluate(model, -1, print_info=True) total_train_time = 0 total_eval_time = 0 for ii in range(args.epochs): # Training t0 = time() train_history = model.fit(train_x, train_y, batch_size=args.batch_size, epochs=1, verbose=0) tr_time = time() - t0 total_train_time += tr_time # Evaluate t0 = time() evl.evaluate(model, ii) evl_time = time() - t0 total_eval_time += evl_time # Print information train_loss = train_history.history['loss'][0] train_metric = train_history.history[metric_name][0] logger.info('Epoch %d, train: %is, evaluation: %is' % (ii, tr_time, evl_time)) logger.info('[Train] loss: %.4f, metric: %.4f' % (train_loss, train_metric)) evl.print_info() ############################################################################################################################### ## Summary of the results # logger.info('Training: %i seconds in total' % total_train_time) logger.info('Evaluation: %i seconds in total' % total_eval_time) evl.print_final_info()
def data(): from keras.utils import np_utils from keras.preprocessing import sequence import keras.backend as K import numpy as np import pickle as pk import deepats.asap_reader as dataset from deepats.w2vEmbReader import W2VEmbReader as EmbReader from deepats.config import get_args import logging logger = logging.getLogger(__name__) args = get_args() if args.seed > 0: np.random.seed(args.seed) emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) emb_words = emb_reader.load_words() dataset.set_score_range(args.data_set) (train_x, train_y, train_pmt), (dev_x, dev_y, dev_pmt), ( test_x, test_y, test_pmt ), vocab, vocab_size, overal_maxlen, num_outputs = dataset.get_data( (args.train_path, args.dev_path, args.test_path), args.prompt_id, args.vocab_size, args.maxlen, tokenize_text=True, to_lower=True, sort_by_len=False, vocab_path=args.vocab_path, min_word_freq=args.min_word_freq, emb_words=emb_words) abs_vocab_file = args.abs_out_path + '/vocab.pkl' with open(abs_vocab_file, 'wb') as vocab_file: pk.dump(vocab, vocab_file) train_x = sequence.pad_sequences(train_x, maxlen=overal_maxlen) dev_x = sequence.pad_sequences(dev_x, maxlen=overal_maxlen) test_x = sequence.pad_sequences(test_x, maxlen=overal_maxlen) train_y = np.array(train_y, dtype=K.floatx()) dev_y = np.array(dev_y, dtype=K.floatx()) test_y = np.array(test_y, dtype=K.floatx()) if args.prompt_id: train_pmt = np.array(train_pmt, dtype='int32') dev_pmt = np.array(dev_pmt, dtype='int32') test_pmt = np.array(test_pmt, dtype='int32') dev_y_org = dev_y.astype(dataset.get_ref_dtype()) test_y_org = test_y.astype(dataset.get_ref_dtype()) train_y = dataset.get_model_friendly_scores(train_y, train_pmt) dev_y = dataset.get_model_friendly_scores(dev_y, dev_pmt) test_y = dataset.get_model_friendly_scores(test_y, test_pmt) return train_x, train_y, dev_x, dev_y, test_x, test_y, dev_y_org, test_y_org, overal_maxlen
def model(train_x, train_y, dev_x, dev_y, test_x, test_y, dev_y_org, test_y_org, overal_maxlen): from keras.models import Sequential from keras.layers import Dense, Dropout, Activation, GlobalAveragePooling1D from keras.layers.embeddings import Embedding from keras.layers.recurrent import LSTM from keras import optimizers import keras.backend as K import pickle as pk import numpy as np from deepats.optimizers import get_optimizer from deepats.asap_evaluator import Evaluator import deepats.asap_reader as dataset from deepats.config import get_args from deepats.my_layers import MeanOverTime from deepats.rwa import RWA import string import random def random_id(size=6, chars=string.ascii_uppercase + string.digits): return ''.join(random.choice(chars) for _ in range(size)) import time ms = int(round(time.time() * 1000)) rand_seed = ms % (2**32 - 1) random.seed(rand_seed) args = get_args() model_id = random_id() def kappa_metric(t, x): u = 0.5 * K.sum(K.square(x - t)) v = K.dot(K.transpose(x), t - K.mean(t)) return v / (v + u) def kappa_loss(t, x): u = K.sum(K.square(x - t)) v = K.dot(K.squeeze(x, 1), K.squeeze(t - K.mean(t), 1)) return u / (2 * v + u) lr = {{lognormal(-3 * 2.3, .8)}} lr = lr * 2 rho = {{normal(.875, .04)}} clipnorm = {{uniform(1, 15)}} eps = 1e-6 opt = optimizers.RMSprop(lr=lr, rho=rho, clipnorm=clipnorm, epsilon=eps) loss = kappa_loss metric = kappa_metric dataset.set_score_range(args.data_set) evl = Evaluator(dataset, args.prompt_id, args.abs_out_path, dev_x, test_x, dev_y, test_y, dev_y_org, test_y_org, model_id=model_id) abs_vocab_file = args.abs_out_path + '/vocab.pkl' with open(abs_vocab_file, 'rb') as vocab_file: vocab = pk.load(vocab_file) train_y_mean = train_y.mean(axis=0) if train_y_mean.ndim == 0: train_y_mean = np.expand_dims(train_y_mean, axis=1) num_outputs = len(train_y_mean) mask_zero = False emb_dim = {{choice([50, 100, 200, 300])}} rnn_dim = {{uniform(50, 300)}} rnn_dim = int(rnn_dim) model = Sequential() model.add(Embedding(args.vocab_size, emb_dim, mask_zero=mask_zero)) model.add(RWA(rnn_dim)) model.add(Dense(num_outputs)) if not args.skip_init_bias: bias_value = (np.log(train_y_mean) - np.log(1 - train_y_mean)).astype( K.floatx()) model.layers[-1].bias.set_value(bias_value) model.add(Activation('tanh')) model.emb_index = 0 emb_path = 'embed/glove.6B.{}d.txt'.format(emb_dim) abs_emb_path = args.abs_root + emb_path from deepats.w2vEmbReader import W2VEmbReader as EmbReader emb_reader = EmbReader(abs_emb_path, emb_dim=emb_dim) emb_reader.load_embeddings(vocab) emb_wts = emb_reader.get_emb_matrix_given_vocab( vocab, model.layers[model.emb_index].get_weights()[0]) wts = model.layers[model.emb_index].get_weights() wts[0] = emb_wts model.layers[model.emb_index].set_weights(wts) model.compile(loss=loss, optimizer=opt, metrics=[metric]) model_yaml = model.to_yaml() print('model_id: %s' % (model_id)) print(model_yaml) print('optimizer: lr= %.4f, rho= %.4f, clipnorm= %.4f, epsilon= %.4f' % (lr, rho, clipnorm, eps)) print('PARAMS\t\ %s\t\ lr= %.4f\t\ rho= %.4f\t\ clip= %.4f\t\ emb= %.4f\t\ rnn= %.4f' % (model_id, lr, rho, clipnorm, emb_dim, rnn_dim)) for i in range(args.epochs): train_history = model.fit(train_x, train_y, batch_size=args.batch_size, epochs=1, verbose=0) evl.evaluate(model, i) evl.output_info() if i > 5 and evl.dev_metric < 0.4: break if i > 10 and evl.dev_metric < 0.5: break if i > 15 and evl.dev_metric < 0.6: break best_dev_kappa = evl.best_dev best_test_kappa = evl.best_test print('Test kappa:', best_dev_kappa) return { 'loss': 1 - best_dev_kappa, 'status': STATUS_OK, 'model': model.to_yaml(), 'weights': pk.dumps(model.get_weights()) }
def model(train_x, train_y, dev_x, dev_y, test_x, test_y, overal_maxlen, qwks): from keras.models import Sequential from keras.layers import Dense, Dropout, Activation, GlobalAveragePooling1D from keras.layers.embeddings import Embedding from keras.layers.recurrent import LSTM from keras.initializers import Constant from keras import optimizers import keras.backend as K from deepats.my_layers import MeanOverTime from deepats.rwa import RWA import pickle as pk import numpy as np import string import random import os from deepats.optimizers import get_optimizer from deepats.ets_evaluator import Evaluator import deepats.ets_reader as dataset from deepats.ets_config import get_args import GPUtil def random_id(size=6, chars=string.ascii_uppercase + string.digits): return ''.join(random.choice(chars) for _ in range(size)) def kappa_metric(t, x): u = 0.5 * K.sum(K.square(x - t)) v = K.dot(K.transpose(x), t - K.mean(t)) return v / (v + u) def kappa_loss(t, x): u = K.sum(K.square(x - t)) v = K.dot(K.squeeze(x, 1), K.squeeze(t - K.mean(t), 1)) return u / (2 * v + u) import time ms = int(round(time.time() * 1000)) rand_seed = ms % (2**32 - 1) random.seed(rand_seed) args = get_args() model_id = random_id() abs_vocab_file = os.path.join(args.abs_out, 'vocab.pkl') with open(abs_vocab_file, 'rb') as vocab_file: vocab = pk.load(vocab_file) vocab_size = len(vocab) acts = ['tanh', 'relu', 'hard_sigmoid'] emb_dim = {{choice([50, 100, 200, 300])}} rnn_dim = {{uniform(50, 500)}} rnn_dim = int(rnn_dim) rec_act = {{choice([0, 1, 2])}} rec_act = acts[rec_act] dropout = {{uniform(0.2, 0.95)}} epochs = args.epochs n_emb = vocab_size * emb_dim n_rwa = (903 + 2 * rnn_dim) * rnn_dim n_tot = n_emb + n_rwa + rnn_dim + 1 lr = {{lognormal(-3 * 2.3, .8)}} lr = 1.5 * lr rho = {{normal(.875, .04)}} clipnorm = {{uniform(1, 15)}} eps = {{loguniform(-8 * 2.3, -5 * 2.3)}} opt = optimizers.RMSprop(lr=lr, rho=rho, clipnorm=clipnorm, epsilon=eps) loss = kappa_loss metric = kappa_metric evl = Evaluator(dataset, args.prompt_id, args.abs_out, dev_x, test_x, dev_df, test_df, model_id=model_id) train_y_mean = train_y.mean(axis=0) if train_y_mean.ndim == 0: train_y_mean = np.expand_dims(train_y_mean, axis=1) num_outputs = len(train_y_mean) mask_zero = False model = Sequential() model.add(Embedding(vocab_size, emb_dim, mask_zero=mask_zero)) model.add(RWA(rnn_dim, recurrent_activation=rec_act)) model.add(Dropout(dropout)) bias_value = (np.log(train_y_mean) - np.log(1 - train_y_mean)).astype( K.floatx()) model.add(Dense(num_outputs, bias_initializer=Constant(value=bias_value))) model.add(Activation('tanh')) model.emb_index = 0 from deepats.w2vEmbReader import W2VEmbReader as EmbReader emb_reader = EmbReader(args.emb_path, emb_dim) emb_reader.load_embeddings(vocab) emb_wts = emb_reader.get_emb_matrix_given_vocab( vocab, model.layers[model.emb_index].get_weights()[0]) wts = model.layers[model.emb_index].get_weights() wts[0] = emb_wts model.layers[model.emb_index].set_weights(wts) model.compile(loss=loss, optimizer=opt, metrics=[metric]) model_yaml = model.to_yaml() import GPUtil if GPUtil.avail_mem() < 0.1: return {'loss': 1, 'status': STATUS_OK, 'model': '', 'weights': None} print('model_id: %s' % (model_id)) print(model_yaml) print('PARAMS\t\ %s\t\ lr= %.4f\t\ rho= %.4f\t\ clip= %.4f\t\ eps= %.4f\t\ embDim= %.4f\t\ rnnDim= %.4f\t\ drop= %.4f\t\ recAct= %s' % (model_id, lr, rho, clipnorm, np.log(eps) / 2.3, emb_dim, rnn_dim, dropout, rec_act)) for i in range(epochs): train_history = model.fit(train_x, train_y, batch_size=args.batch_size, epochs=1, verbose=0) evl.evaluate(model, i) evl.output_info() p = evl.stats[3] / qwks[0] if i > 10 and p < 0.9: break i = evl.comp_idx j = i + 2 best_dev_kappa = evl.best_dev[i] best_test_kappa = evl.best_dev[j] print('Test kappa:', best_dev_kappa) return { 'loss': 1 - best_dev_kappa, 'status': STATUS_OK, 'model': model.to_yaml(), 'weights': pk.dumps(model.get_weights()) }
def run(argv=None): parser = argparse.ArgumentParser() parser.add_argument("-o", "--out-dir", dest="out_dir_path", type=str, metavar='<str>', required=True, help="The path to the output directory") parser.add_argument( "-p", "--prompt", dest="prompt_id", type=int, metavar='<int>', required=False, help="Promp ID for ASAP dataset. '0' means all prompts.") parser.add_argument("-t", "--type", dest="model_type", type=str, metavar='<str>', default='regp', help="Model type (reg|regp|breg|bregp) (default=regp)") parser.add_argument( "-u", "--rec-unit", dest="recurrent_unit", type=str, metavar='<str>', default='lstm', help="Recurrent unit type (lstm|gru|simple) (default=lstm)") parser.add_argument( "-a", "--algorithm", dest="algorithm", type=str, metavar='<str>', default='rmsprop', help= "Optimization algorithm (rmsprop|sgd|adagrad|adadelta|adam|adamax) (default=rmsprop)" ) parser.add_argument("-l", "--loss", dest="loss", type=str, metavar='<str>', default='mse', help="Loss function (mse|mae) (default=mse)") parser.add_argument("-e", "--embdim", dest="emb_dim", type=int, metavar='<int>', default=50, help="Embeddings dimension (default=50)") parser.add_argument( "-c", "--cnndim", dest="cnn_dim", type=int, metavar='<int>', default=0, help="CNN output dimension. '0' means no CNN layer (default=0)") parser.add_argument("-w", "--cnnwin", dest="cnn_window_size", type=int, metavar='<int>', default=3, help="CNN window size. (default=3)") parser.add_argument( "-r", "--rnndim", dest="rnn_dim", type=int, metavar='<int>', default=300, help="RNN dimension. '0' means no RNN layer (default=300)") parser.add_argument("-b", "--batch-size", dest="batch_size", type=int, metavar='<int>', default=32, help="Batch size (default=32)") parser.add_argument("-v", "--vocab-size", dest="vocab_size", type=int, metavar='<int>', default=4000, help="Vocab size (default=4000)") parser.add_argument( "--aggregation", dest="aggregation", type=str, metavar='<str>', default='mot', help= "The aggregation method for regp and bregp types (mot|attsum|attmean) (default=mot)" ) parser.add_argument( "--dropout", dest="dropout_prob", type=float, metavar='<float>', default=0.5, help= "The dropout probability. To disable, give a negative number (default=0.5)" ) parser.add_argument( "--vocab-path", dest="vocab_path", type=str, metavar='<str>', help="(Optional) The path to the existing vocab file (*.pkl)") parser.add_argument("--skip-init-bias", dest="skip_init_bias", action='store_true', help="Skip initialization of the last layer bias") parser.add_argument( "--emb", dest="emb_path", type=str, metavar='<str>', help="The path to the word embeddings file (Word2Vec format)") parser.add_argument("--epochs", dest="epochs", type=int, metavar='<int>', default=100, help="Number of epochs (default=50)") parser.add_argument( "--maxlen", dest="maxlen", type=int, metavar='<int>', default=0, help= "Maximum allowed number of words during training. '0' means no limit (default=0)" ) parser.add_argument("--seed", dest="seed", type=int, metavar='<int>', default=1234, help="Random seed (default=1234)") ## dsv parser.add_argument("--min-word-freq", dest="min_word_freq", type=int, metavar='<int>', default=2, help="Min word frequency") parser.add_argument("--stack", dest="stack", type=int, metavar='<int>', default=1, help="how deep to stack core RNN") parser.add_argument("--skip-emb-preload", dest="skip_emb_preload", action='store_true', help="Skip preloading embeddings") parser.add_argument("--tokenize-old", dest="tokenize_old", action='store_true', help="use old tokenizer") parser.add_argument("-ar", "--abs-root", dest="abs_root", type=str, metavar='<str>', required=False, help="Abs path to root directory") parser.add_argument("-ad", "--abs-data", dest="abs_data", type=str, metavar='<str>', required=False, help="Abs path to data directory") parser.add_argument("-ao", "--abs-out", dest="abs_out", type=str, metavar='<str>', required=False, help="Abs path to output directory") parser.add_argument("-dp", "--data-path", dest="data_path", type=str, metavar='<str>', required=False, help="Abs path to output directory") ## if argv is None: args = parser.parse_args() else: args = parser.parse_args(argv) out_dir = args.abs_out U.mkdir_p(os.path.join(out_dir, 'preds')) U.set_logger(out_dir) U.print_args(args) assert args.model_type in {'reg', 'regp', 'breg', 'bregp', 'rwa'} assert args.algorithm in { 'rmsprop', 'sgd', 'adagrad', 'adadelta', 'adam', 'adamax' } assert args.loss in {'mse', 'mae', 'kappa', 'soft_kappa'} assert args.recurrent_unit in {'lstm', 'gru', 'simple', 'rwa'} assert args.aggregation in {'mot', 'attsum', 'attmean'} if args.seed > 0: RANDSEED = args.seed else: RANDSEED = np.random.randint(10000) np.random.seed(RANDSEED) ####################### #from deepats.util import GPUtils as GPU import GPUtil as GPU mem = GPU.avail_mem() logger.info('AVAIL GPU MEM == %.4f' % mem) # if mem < 0.05: # return None ############################################################################################################################### ## Prepare data # emb_words = None if not args.skip_emb_preload: #if args.emb_path: from deepats.w2vEmbReader import W2VEmbReader as EmbReader logger.info('Loading embedding vocabulary...') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) emb_words = emb_reader.load_words() train_df, dev_df, test_df, vocab, overal_maxlen, qwks = dataset.get_data( args.data_path, emb_words=emb_words, seed=RANDSEED) vocab_size = len(vocab) train_x = train_df['text'].values train_y = train_df['y'].values dev_x = dev_df['text'].values dev_y = dev_df['y'].values test_x = test_df['text'].values test_y = test_df['y'].values # Dump vocab abs_vocab_file = os.path.join(out_dir, 'vocab.pkl') with open(os.path.join(out_dir, 'vocab.pkl'), 'wb') as vocab_file: pk.dump(vocab, vocab_file) if args.recurrent_unit == 'rwa': setattr(args, 'model_type', 'rwa') # Pad sequences for mini-batch processing from keras.preprocessing import sequence train_x = sequence.pad_sequences(train_x, maxlen=overal_maxlen) dev_x = sequence.pad_sequences(dev_x, maxlen=overal_maxlen) test_x = sequence.pad_sequences(test_x, maxlen=overal_maxlen) ############################################################################################################################### ## Some statistics bincounts, mfs_list = U.bincounts(train_y) with open(os.path.join(out_dir, 'bincounts.txt'), 'w') as output_file: for bincount in bincounts: output_file.write(str(bincount) + '\n') train_mean = train_y.mean(axis=0) train_std = train_y.std(axis=0) dev_mean = dev_y.mean(axis=0) dev_std = dev_y.std(axis=0) test_mean = test_y.mean(axis=0) test_std = test_y.std(axis=0) logger.info('Statistics:') logger.info(' PROMPT_ID\t= ' + U.b_green(args.prompt_id)) logger.info( ' TEST KAPPAS\t= {} (float, int)'.format(U.b_green('%.4f (%.4f)')) % (qwks[1], qwks[0])) logger.info(' RANDSEED\t= ' + U.b_green(str(RANDSEED))) logger.info(' train_x shape: ' + str(np.array(train_x).shape)) logger.info(' dev_x shape: ' + str(np.array(dev_x).shape)) logger.info(' test_x shape: ' + str(np.array(test_x).shape)) logger.info(' train_y shape: ' + str(train_y.shape)) logger.info(' dev_y shape: ' + str(dev_y.shape)) logger.info(' test_y shape: ' + str(test_y.shape)) logger.info(' train_y mean: %s, stdev: %s, MFC: %s' % (str(train_mean), str(train_std), str(mfs_list))) logger.info(' overal_maxlen: ' + str(overal_maxlen)) ############################################################################################################################### ## Optimizaer algorithm from keras import optimizers from deepats.optimizers import get_optimizer #optimizer = get_optimizer(args) ## RMS-PROP #optimizer = optimizers.RMSprop(lr=0.00075, rho=0.9, clipnorm=1) #optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, clipnorm=1) optimizer = optimizers.RMSprop(lr=0.003, rho=0.88, epsilon=1e-6, clipnorm=10) #optimizer = optimizers.RMSprop(lr=0.0018, rho=0.88, epsilon=1e-6, clipnorm=10) #optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-8, clipnorm=10) #optimizer = optimizers.RMSprop(lr=0.004, rho=0.85, epsilon=1e-6, clipnorm=10)# best 2.1 (RWA) #optimizer = optimizers.RMSprop(lr=0.0025, rho=0.8, epsilon=1e-8, clipnorm=10) # best 2.1 (RWA) #optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-8, clipnorm=10) # best 2.3 (RWA) #optimizer = optimizers.RMSprop(lr=0.0025, rho=0.88, epsilon=1e-8, clipnorm=10) # best 2.3 (RWA) #optimizer = optimizers.RMSprop(lr=0.004, rho=0.85, epsilon=1e-8, clipnorm=10) # best 2.10 (RWA) ## OTHER METHODS #optimizer = optimizers.Adam(lr=0.0018, clipnorm=5) #optimizer = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, clipnorm=1) #optimizer = optimizers.Adam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-06, clipnorm=10) #optimizer = optimizers.Nadam(lr=0.002) #optimizer = optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, clipnorm=10) #optimizer = optimizers.SGD(lr=0.05, momentum=0, decay=0.0, nesterov=False, clipnorm=10) #optimizer = optimizers.Adagrad(lr=0.03, epsilon=1e-08, clipnorm=10) #optimizer = optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=1e-06, clipnorm=10) ############################################################################################################################### ## Building model # loss = kappa_loss metric_name = 'kappa' ######################################################## from deepats.models import create_model model = create_model(args, train_y.mean(axis=0), overal_maxlen, vocab) ############################################ ''' # test yaml serialization/de-serialization yaml = model.to_yaml() print yaml from deepats.my_layers import MeanOverTime from deepats.rwa import RWA model = model_from_yaml(yaml, custom_objects={'MeanOverTime': MeanOverTime, 'RWA':RWA}) ''' ############################################ model.compile(loss=loss, optimizer=optimizer, metrics=[kappa]) print(model.summary()) ############################################################################################################################### ## Callbacks callbacks = [] ############################## ''' Evaluate test_kappa ''' class Eval(Callback): def __init__(self, x, y, funcs, prefix='test', batch_size=128): super(Eval, self).__init__() self.x = x self.y = y self.funcs = funcs self.prefix = prefix self.batch_size = batch_size self.epoch = 0 def on_epoch_end(self, batch, logs={}): self.epoch += 1 p = np.asarray( self.model.predict(self.x, batch_size=self.batch_size).squeeze()) for func in self.funcs: f = func(self.y, p) name = '{}_{}'.format(self.prefix, func.__name__) logs[name] = f print(' - {0}: {1:0.4f}'.format(name, f)) eval = Eval(test_x, test_df['y'].values, [nkappa], 'test') callbacks.append(eval) ############################## ''' ModelCheckpoint ''' wt_path = os.path.join(out_dir, 'weights.{}.hdf5'.format('rwa')) checkpt = ModelCheckpoint(wt_path, monitor='val_kappa', verbose=1, save_best_only=True, mode='max') callbacks.append(checkpt) ############################## ''' LRplateau ''' class LRplateau(ReduceLROnPlateau): def __init__(self, monitor='val_loss', factor=0.1, patience=10, verbose=0, mode='auto', epsilon=1e-4, cooldown=0, min_lr=0, checkpoint=None): super(LRplateau, self).__init__(monitor, factor, patience, verbose, mode, epsilon, cooldown, min_lr) self.checkpoint = checkpoint def on_lr_reduce(self, epoch): if self.checkpoint: if self.verbose > 0: print('Epoch {}: loading wts from {}.\n'.format( epoch, self.checkpoint.filepath)) self.model.load_weights(self.checkpoint.filepath) def on_epoch_end(self, epoch, logs=None): logs = logs or {} logs['lr'] = K.get_value(self.model.optimizer.lr) current = logs.get(self.monitor) if current is None: warnings.warn( 'Learning Rate Plateau Reducing requires %s available!' % self.monitor, RuntimeWarning) else: if self.in_cooldown(): self.cooldown_counter -= 1 self.wait = 0 if self.monitor_op(current, self.best): self.best = current self.wait = 0 elif not self.in_cooldown(): if self.wait >= self.patience: old_lr = float(K.get_value(self.model.optimizer.lr)) if old_lr > self.min_lr + self.lr_epsilon: new_lr = old_lr * self.factor new_lr = max(new_lr, self.min_lr) K.set_value(self.model.optimizer.lr, new_lr) if self.verbose > 0: print( '\nEpoch {0}: reducing learning rate to {1:0.4g}.' .format(epoch, new_lr)) self.cooldown_counter = self.cooldown self.wait = 0 self.on_lr_reduce(epoch) self.wait += 1 reduce_lr = LRplateau(monitor='val_kappa', mode='max', patience=3, factor=0.33, min_lr=0.00001, verbose=1, checkpoint=checkpt) callbacks.append(reduce_lr) ############################################################################################################################### ## Training model.fit(train_x, train_y, validation_data=(dev_x, dev_df['y'].values), batch_size=args.batch_size, epochs=args.epochs, callbacks=callbacks, verbose=1)
def run(argv=None): parser = argparse.ArgumentParser() parser.add_argument("-o", "--out-dir", dest="out_dir_path", type=str, metavar='<str>', required=True, help="The path to the output directory") parser.add_argument( "-p", "--prompt", dest="prompt_id", type=str, metavar='<str>', required=False, help="Promp ID for ASAP dataset. '0' means all prompts.") parser.add_argument("-t", "--type", dest="model_type", type=str, metavar='<str>', default='regp', help="Model type (reg|regp|breg|bregp) (default=regp)") parser.add_argument( "-u", "--rec-unit", dest="recurrent_unit", type=str, metavar='<str>', default='lstm', help="Recurrent unit type (lstm|gru|simple) (default=lstm)") parser.add_argument( "-a", "--algorithm", dest="algorithm", type=str, metavar='<str>', default='rmsprop', help= "Optimization algorithm (rmsprop|sgd|adagrad|adadelta|adam|adamax) (default=rmsprop)" ) parser.add_argument("-l", "--loss", dest="loss", type=str, metavar='<str>', default='mse', help="Loss function (mse|mae) (default=mse)") parser.add_argument("-e", "--embdim", dest="emb_dim", type=int, metavar='<int>', default=50, help="Embeddings dimension (default=50)") parser.add_argument( "-c", "--cnndim", dest="cnn_dim", type=int, metavar='<int>', default=0, help="CNN output dimension. '0' means no CNN layer (default=0)") parser.add_argument("-w", "--cnnwin", dest="cnn_window_size", type=int, metavar='<int>', default=3, help="CNN window size. (default=3)") parser.add_argument( "-r", "--rnndim", dest="rnn_dim", type=int, metavar='<int>', default=300, help="RNN dimension. '0' means no RNN layer (default=300)") parser.add_argument("-b", "--batch-size", dest="batch_size", type=int, metavar='<int>', default=32, help="Batch size (default=32)") parser.add_argument("-v", "--vocab-size", dest="vocab_size", type=int, metavar='<int>', default=4000, help="Vocab size (default=4000)") parser.add_argument( "--aggregation", dest="aggregation", type=str, metavar='<str>', default='mot', help= "The aggregation method for regp and bregp types (mot|attsum|attmean) (default=mot)" ) parser.add_argument( "--dropout", dest="dropout_prob", type=float, metavar='<float>', default=0.5, help= "The dropout probability. To disable, give a negative number (default=0.5)" ) parser.add_argument( "--vocab-path", dest="vocab_path", type=str, metavar='<str>', help="(Optional) The path to the existing vocab file (*.pkl)") parser.add_argument("--skip-init-bias", dest="skip_init_bias", action='store_true', help="Skip initialization of the last layer bias") parser.add_argument( "--emb", dest="emb_path", type=str, metavar='<str>', help="The path to the word embeddings file (Word2Vec format)") parser.add_argument("--epochs", dest="epochs", type=int, metavar='<int>', default=100, help="Number of epochs (default=50)") parser.add_argument( "--maxlen", dest="maxlen", type=int, metavar='<int>', default=0, help= "Maximum allowed number of words during training. '0' means no limit (default=0)" ) parser.add_argument("--seed", dest="seed", type=int, metavar='<int>', default=0, help="Random seed (default=1234)") parser.add_argument("--mode", dest="run_mode", type=str, metavar='<str>', default='train', help="run mode") ## dsv parser.add_argument("--min-word-freq", dest="min_word_freq", type=int, metavar='<int>', default=2, help="Min word frequency") parser.add_argument("--stack", dest="stack", type=int, metavar='<int>', default=1, help="how deep to stack core RNN") parser.add_argument("--skip-emb-preload", dest="skip_emb_preload", action='store_true', help="Skip preloading embeddings") parser.add_argument("--tokenize-old", dest="tokenize_old", action='store_true', help="use old tokenizer") parser.add_argument("-ar", "--abs-root", dest="abs_root", type=str, metavar='<str>', required=False, help="Abs path to root directory") parser.add_argument("-ad", "--abs-data", dest="abs_data", type=str, metavar='<str>', required=False, help="Abs path to data directory") parser.add_argument("-ao", "--abs-out", dest="abs_out", type=str, metavar='<str>', required=False, help="Abs path to output directory") parser.add_argument("-dp", "--data-path", dest="data_path", type=str, metavar='<str>', required=False, help="Abs path to output directory") ## if argv is None: args = parser.parse_args() else: args = parser.parse_args(argv) out_dir = args.abs_out U.mkdir_p(os.path.join(out_dir, 'preds')) U.set_logger(out_dir) U.print_args(args) assert args.model_type in {'reg', 'regp', 'breg', 'bregp', 'rwa'} assert args.algorithm in { 'rmsprop', 'sgd', 'adagrad', 'adadelta', 'adam', 'adamax' } assert args.loss in {'mse', 'mae', 'kappa', 'soft_kappa'} assert args.recurrent_unit in {'lstm', 'gru', 'simple', 'rwa'} assert args.aggregation in {'mot', 'attsum', 'attmean'} if args.seed > 0: RANDSEED = args.seed else: RANDSEED = np.random.randint(10000) np.random.seed(RANDSEED) pid = args.prompt_id mode = args.run_mode ####################### #from deepats.util import GPUtils as GPU # import GPUtil as GPU # mem = GPU.avail_mem() # logger.info('AVAIL GPU MEM == %.4f' % mem) # if mem < 0.05: # return None ############################################################################################################################### ## Prepare data # emb_words = None if not args.skip_emb_preload: #if args.emb_path: from deepats.w2vEmbReader import W2VEmbReader as EmbReader logger.info('Loading embedding vocabulary...') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) emb_words = emb_reader.load_words() vocab_path = None abs_vocab_file = os.path.join(out_dir, 'vocab.pkl') if mode == 'test': vocab_path = abs_vocab_file train_df, dev_df, test_df, vocab, overal_maxlen = ets_reader.get_mode_data( args.data_path, dev_split=0.1, emb_words=emb_words, vocab_path=vocab_path, seed=RANDSEED) train_x = train_df['text'].values train_y = train_df['yint'].values.astype('float32') dev_x = dev_df['text'].values dev_y = dev_df['yint'].values.astype('float32') test_x = test_df['text'].values test_y = test_df['yint'].values.astype('float32') # Dump vocab if mode == 'train': with open(os.path.join(out_dir, 'vocab.pkl'), 'wb') as vocab_file: pk.dump(vocab, vocab_file) if args.recurrent_unit == 'rwa': setattr(args, 'model_type', 'rwa') if args.recurrent_unit == 'lstm': setattr(args, 'model_type', 'regp') # Pad sequences for mini-batch processing from keras.preprocessing import sequence train_x = sequence.pad_sequences(train_x, maxlen=overal_maxlen) dev_x = sequence.pad_sequences(dev_x, maxlen=overal_maxlen) test_x = sequence.pad_sequences(test_x, maxlen=overal_maxlen) ############################################################################################################################### ## Some statistics bincounts, mfs_list = U.bincounts(train_y) with open(os.path.join(out_dir, 'bincounts.txt'), 'w') as output_file: for bincount in bincounts: output_file.write(str(bincount) + '\n') train_mean = train_y.mean(axis=0) train_std = train_y.std(axis=0) dev_mean = dev_y.mean(axis=0) dev_std = dev_y.std(axis=0) test_mean = test_y.mean(axis=0) test_std = test_y.std(axis=0) logger.info('Statistics:') logger.info(' PROMPT_ID\t= ' + U.b_green(args.prompt_id)) logger.info(' RANDSEED\t= ' + U.b_green(str(RANDSEED))) logger.info(' train_x shape: ' + str(np.array(train_x).shape)) logger.info(' dev_x shape: ' + str(np.array(dev_x).shape)) logger.info(' test_x shape: ' + str(np.array(test_x).shape)) logger.info(' train_y shape: ' + str(train_y.shape)) logger.info(' dev_y shape: ' + str(dev_y.shape)) logger.info(' test_y shape: ' + str(test_y.shape)) logger.info(' train_y mean: %s, stdev: %s, MFC: %s' % (str(train_mean), str(train_std), str(mfs_list))) logger.info(' overal_maxlen: ' + str(overal_maxlen)) ############################################################################################################################### ## Optimizaer algorithm from keras import optimizers from deepats.optimizers import get_optimizer #optimizer = get_optimizer(args) # optimizer = optimizers.Adam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, clipnorm=10)#***RWA*** optimizer = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, clipnorm=1) # optimizer = optimizers.Nadam(lr=0.001, clipnorm=10) # optimizer = optimizers.Nadam(lr=0.002, clipnorm=1) # optimizer = optimizers.RMSprop(lr=0.0015, rho=0.9, epsilon=1e-8, clipnorm=10) # optimizer = optimizers.RMSprop(lr=0.003, rho=0.88, epsilon=1e-6, clipnorm=10) # optimizer = optimizers.RMSprop(lr=0.0025, rho=0.8, epsilon=1e-8, clipnorm=10) # best 2.1 (RWA) ## OTHER METHODS #optimizer = optimizers.Adam(lr=0.0018, clipnorm=5) #optimizer = optimizers.Nadam(lr=0.002) #optimizer = optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, clipnorm=10) #optimizer = optimizers.SGD(lr=0.05, momentum=0, decay=0.0, nesterov=False, clipnorm=10) #optimizer = optimizers.Adagrad(lr=0.03, epsilon=1e-08, clipnorm=10) ############################################################################################################################### ## Building model from deepats.models import create_model #loss = kappa_loss #metrics = [kappa,'mean_squared_error'] if args.loss == 'mse': loss = 'mean_squared_error' metrics = ['acc'] # metrics = [kappa] monitor = 'val_kappa' elif args.loss == 'kappa': loss = kappa_loss metrics = [kappa] monitor = 'val_kappa' model = create_model(args, train_y.mean(axis=0), overal_maxlen, vocab) model.compile(loss=loss, optimizer=optimizer, metrics=metrics) print(model.summary()) ############################################################################################################################### ## Callbacks callbacks = [] ############################## ''' Evaluate test_kappa ''' from sklearn.metrics import roc_auc_score as auc, average_precision_score def map(y_true, y_prob): return average_precision_score(y_true, y_prob) class Eval(Callback): def __init__(self, x, y, funcs, prefix='test', batch_size=128): super(Eval, self).__init__() self.x = x self.y = y self.funcs = funcs self.prefix = prefix self.batch_size = batch_size self.epoch = 0 def on_epoch_end(self, epoch, logs={}): self.epoch += 1 p = np.asarray( self.model.predict(self.x, batch_size=self.batch_size).squeeze()) for func in self.funcs: f = func(self.y, p) name = '{}_{}'.format(self.prefix, func.__name__) logs[name] = f print(' - {0}: {1:0.4f}'.format(name, f)) #sys.stdout.write(' - {0}: {1:0.4f} '.format(name,f)) eval = Eval(dev_x, dev_df['yint'].values, [map], 'val') callbacks.append(eval) monitor = 'val_map' # eval = Eval(test_x, test_df['yint'].values, [qwk,auc], 'test'); callbacks.append(eval) eval = Eval(test_x, test_df['yint'].values, [map, qwk], 'test') callbacks.append(eval) # monitor = 'test_map' ############################## ''' ModelCheckpoint ''' wt_path = os.path.join(out_dir, 'weights.{}.hdf5'.format(pid)) checkpt = ModelCheckpoint(wt_path, monitor=monitor, verbose=1, save_best_only=True, mode='max') callbacks.append(checkpt) ############################## ''' PR Curve ''' from sklearn.metrics import precision_recall_curve import matplotlib.pyplot as plt class PR(object): def __init__(self, model, checkpoint, x, y, prefix='test', batch_size=128): self.model = model self.checkpoint = checkpoint self.x = x self.y = y self.prefix = prefix self.batch_size = batch_size def predict(self): self.model.load_weights(self.checkpoint.filepath) self.p = np.asarray( self.model.predict(self.x, batch_size=self.batch_size).squeeze()) def pr_curve(self, y, p, s=''): aps = average_precision_score(y, p) precision, recall, _ = precision_recall_curve(y, p) name = '{}_{}'.format(self.prefix, 'pr_curve') plt.figure() plt.step(recall, precision, color='b', alpha=0.2, where='post') plt.fill_between(recall, precision, step='post', alpha=0.2, color='b') plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('PR curve (mode={1}): {2}, AUC={0:0.4f}'.format( aps, pid, s)) def run_sample(self, q, n=1000): (y, p) = down_sample_bootstrap(self.y, self.p, q, n) ## draw curve self.pr_curve(y, p, s='{0}% off-mode'.format(int(q * 100))) ## make table print('\nMode={2}, {0}% off-mode (#samples={1}):'.format( int(q * 100), n, pid)) print tabulate(stats((y, p), n), headers="firstrow", floatfmt='.3f') def run(self, Q=[0.1, 0.01]): self.predict() for q in Q: self.run_sample(q) return self.y, self.p pr = PR(model, checkpt, test_x, test_df['yint'].values, 'test') ############################## ''' LRplateau ''' class LRplateau(ReduceLROnPlateau): def __init__(self, monitor='val_loss', factor=0.1, patience=10, verbose=0, mode='auto', epsilon=1e-4, cooldown=0, min_lr=0, checkpoint=None): super(LRplateau, self).__init__(monitor, factor, patience, verbose, mode, epsilon, cooldown, min_lr) self.checkpoint = checkpoint def on_lr_reduce(self, epoch): if self.checkpoint: if self.verbose > 0: print('Epoch {}: loading wts from {}.\n'.format( epoch, self.checkpoint.filepath)) self.model.load_weights(self.checkpoint.filepath) def on_epoch_end(self, epoch, logs=None): logs = logs or {} logs['lr'] = K.get_value(self.model.optimizer.lr) current = logs.get(self.monitor) if current is None: warnings.warn( 'Learning Rate Plateau Reducing requires %s available!' % self.monitor, RuntimeWarning) else: if self.in_cooldown(): self.cooldown_counter -= 1 self.wait = 0 if self.monitor_op(current, self.best): self.best = current self.wait = 0 elif not self.in_cooldown(): if self.wait >= self.patience: old_lr = float(K.get_value(self.model.optimizer.lr)) if old_lr > self.min_lr + self.lr_epsilon: new_lr = old_lr * self.factor new_lr = max(new_lr, self.min_lr) K.set_value(self.model.optimizer.lr, new_lr) if self.verbose > 0: print( '\nEpoch {0}: reducing learning rate to {1:0.4g}.' .format(epoch, new_lr)) self.cooldown_counter = self.cooldown self.wait = 0 self.on_lr_reduce(epoch) self.wait += 1 reduce_lr = LRplateau(monitor=monitor, mode='max', patience=3, factor=0.33, min_lr=0.00001, verbose=1, checkpoint=checkpt) callbacks.append(reduce_lr) ############################################################################################################################### ## Training if mode == 'train': model.fit(train_x, train_y, validation_data=(dev_x, dev_df['yint'].values), batch_size=args.batch_size, epochs=args.epochs, callbacks=callbacks, verbose=1) ## Evaluate ############################################### y, p = pr.run(Q=[0.2, 0.1, 0.05]) return y, p