Esempio n. 1
0
def model(train_x, train_y, dev_x, dev_y, test_x, test_y, overal_maxlen, qwks):
    from keras.models import Sequential
    from keras.layers import Dense, Dropout, Activation, GlobalAveragePooling1D
    from keras.layers.embeddings import Embedding
    from keras.layers.recurrent import LSTM
    from keras.initializers import Constant
    from keras import optimizers
    import keras.backend as K
    from deepats.my_layers import MeanOverTime
    from deepats.rwa import RWA
    import pickle as pk
    import numpy as np
    import string
    import random
    import os
    from deepats.optimizers import get_optimizer

    from deepats.ets_evaluator import Evaluator
    import deepats.ets_reader as dataset
    from deepats.ets_config import get_args
    import GPUtil

    def random_id(size=6, chars=string.ascii_uppercase + string.digits):
        return ''.join(random.choice(chars) for _ in range(size))

    def kappa_metric(t, x):
        u = 0.5 * K.sum(K.square(x - t))
        v = K.dot(K.transpose(x), t - K.mean(t))
        return v / (v + u)

    def kappa_loss(t, x):
        u = K.sum(K.square(x - t))
        v = K.dot(K.squeeze(x, 1), K.squeeze(t - K.mean(t), 1))
        return u / (2 * v + u)

    import time
    ms = int(round(time.time() * 1000))
    rand_seed = ms % (2**32 - 1)
    random.seed(rand_seed)

    args = get_args()
    model_id = random_id()

    abs_vocab_file = os.path.join(args.abs_out, 'vocab.pkl')
    with open(abs_vocab_file, 'rb') as vocab_file:
        vocab = pk.load(vocab_file)
    vocab_size = len(vocab)

    acts = ['tanh', 'relu', 'hard_sigmoid']
    emb_dim = {{choice([50, 100, 200, 300])}}
    rnn_dim = {{uniform(50, 500)}}
    rnn_dim = int(rnn_dim)
    rec_act = {{choice([0, 1, 2])}}
    rec_act = acts[rec_act]
    dropout = {{uniform(0.2, 0.95)}}

    epochs = args.epochs
    n_emb = vocab_size * emb_dim
    n_rwa = (903 + 2 * rnn_dim) * rnn_dim
    n_tot = n_emb + n_rwa + rnn_dim + 1

    lr = {{lognormal(-3 * 2.3, .8)}}
    lr = 1.5 * lr
    rho = {{normal(.875, .04)}}
    clipnorm = {{uniform(1, 15)}}
    eps = {{loguniform(-8 * 2.3, -5 * 2.3)}}

    opt = optimizers.RMSprop(lr=lr, rho=rho, clipnorm=clipnorm, epsilon=eps)
    loss = kappa_loss
    metric = kappa_metric

    evl = Evaluator(dataset,
                    args.prompt_id,
                    args.abs_out,
                    dev_x,
                    test_x,
                    dev_df,
                    test_df,
                    model_id=model_id)

    train_y_mean = train_y.mean(axis=0)
    if train_y_mean.ndim == 0:
        train_y_mean = np.expand_dims(train_y_mean, axis=1)
    num_outputs = len(train_y_mean)

    mask_zero = False

    model = Sequential()
    model.add(Embedding(vocab_size, emb_dim, mask_zero=mask_zero))
    model.add(RWA(rnn_dim, recurrent_activation=rec_act))
    model.add(Dropout(dropout))
    bias_value = (np.log(train_y_mean) - np.log(1 - train_y_mean)).astype(
        K.floatx())
    model.add(Dense(num_outputs, bias_initializer=Constant(value=bias_value)))
    model.add(Activation('tanh'))
    model.emb_index = 0

    from deepats.w2vEmbReader import W2VEmbReader as EmbReader
    emb_reader = EmbReader(args.emb_path, emb_dim)
    emb_reader.load_embeddings(vocab)
    emb_wts = emb_reader.get_emb_matrix_given_vocab(
        vocab, model.layers[model.emb_index].get_weights()[0])
    wts = model.layers[model.emb_index].get_weights()
    wts[0] = emb_wts
    model.layers[model.emb_index].set_weights(wts)

    model.compile(loss=loss, optimizer=opt, metrics=[metric])
    model_yaml = model.to_yaml()

    import GPUtil
    if GPUtil.avail_mem() < 0.1:
        return {'loss': 1, 'status': STATUS_OK, 'model': '', 'weights': None}

    print('model_id: %s' % (model_id))
    print(model_yaml)
    print('PARAMS\t\
    %s\t\
    lr= %.4f\t\
    rho= %.4f\t\
    clip= %.4f\t\
    eps= %.4f\t\
    embDim= %.4f\t\
    rnnDim= %.4f\t\
    drop= %.4f\t\
    recAct= %s' % (model_id, lr, rho, clipnorm, np.log(eps) / 2.3, emb_dim,
                   rnn_dim, dropout, rec_act))

    for i in range(epochs):
        train_history = model.fit(train_x,
                                  train_y,
                                  batch_size=args.batch_size,
                                  epochs=1,
                                  verbose=0)
        evl.evaluate(model, i)
        evl.output_info()

        p = evl.stats[3] / qwks[0]
        if i > 10 and p < 0.9:
            break

    i = evl.comp_idx
    j = i + 2
    best_dev_kappa = evl.best_dev[i]
    best_test_kappa = evl.best_dev[j]

    print('Test kappa:', best_dev_kappa)
    return {
        'loss': 1 - best_dev_kappa,
        'status': STATUS_OK,
        'model': model.to_yaml(),
        'weights': pk.dumps(model.get_weights())
    }
Esempio n. 2
0
def run(argv=None):

    parser = argparse.ArgumentParser()
    parser.add_argument("-o",
                        "--out-dir",
                        dest="out_dir_path",
                        type=str,
                        metavar='<str>',
                        required=True,
                        help="The path to the output directory")
    parser.add_argument(
        "-p",
        "--prompt",
        dest="prompt_id",
        type=int,
        metavar='<int>',
        required=False,
        help="Promp ID for ASAP dataset. '0' means all prompts.")
    parser.add_argument("-t",
                        "--type",
                        dest="model_type",
                        type=str,
                        metavar='<str>',
                        default='regp',
                        help="Model type (reg|regp|breg|bregp) (default=regp)")
    parser.add_argument(
        "-u",
        "--rec-unit",
        dest="recurrent_unit",
        type=str,
        metavar='<str>',
        default='lstm',
        help="Recurrent unit type (lstm|gru|simple) (default=lstm)")
    parser.add_argument(
        "-a",
        "--algorithm",
        dest="algorithm",
        type=str,
        metavar='<str>',
        default='rmsprop',
        help=
        "Optimization algorithm (rmsprop|sgd|adagrad|adadelta|adam|adamax) (default=rmsprop)"
    )
    parser.add_argument("-l",
                        "--loss",
                        dest="loss",
                        type=str,
                        metavar='<str>',
                        default='mse',
                        help="Loss function (mse|mae) (default=mse)")
    parser.add_argument("-e",
                        "--embdim",
                        dest="emb_dim",
                        type=int,
                        metavar='<int>',
                        default=50,
                        help="Embeddings dimension (default=50)")
    parser.add_argument(
        "-c",
        "--cnndim",
        dest="cnn_dim",
        type=int,
        metavar='<int>',
        default=0,
        help="CNN output dimension. '0' means no CNN layer (default=0)")
    parser.add_argument("-w",
                        "--cnnwin",
                        dest="cnn_window_size",
                        type=int,
                        metavar='<int>',
                        default=3,
                        help="CNN window size. (default=3)")
    parser.add_argument(
        "-r",
        "--rnndim",
        dest="rnn_dim",
        type=int,
        metavar='<int>',
        default=300,
        help="RNN dimension. '0' means no RNN layer (default=300)")
    parser.add_argument("-b",
                        "--batch-size",
                        dest="batch_size",
                        type=int,
                        metavar='<int>',
                        default=32,
                        help="Batch size (default=32)")
    parser.add_argument("-v",
                        "--vocab-size",
                        dest="vocab_size",
                        type=int,
                        metavar='<int>',
                        default=4000,
                        help="Vocab size (default=4000)")
    parser.add_argument(
        "--aggregation",
        dest="aggregation",
        type=str,
        metavar='<str>',
        default='mot',
        help=
        "The aggregation method for regp and bregp types (mot|attsum|attmean) (default=mot)"
    )
    parser.add_argument(
        "--dropout",
        dest="dropout_prob",
        type=float,
        metavar='<float>',
        default=0.5,
        help=
        "The dropout probability. To disable, give a negative number (default=0.5)"
    )
    parser.add_argument(
        "--vocab-path",
        dest="vocab_path",
        type=str,
        metavar='<str>',
        help="(Optional) The path to the existing vocab file (*.pkl)")
    parser.add_argument("--skip-init-bias",
                        dest="skip_init_bias",
                        action='store_true',
                        help="Skip initialization of the last layer bias")
    parser.add_argument(
        "--emb",
        dest="emb_path",
        type=str,
        metavar='<str>',
        help="The path to the word embeddings file (Word2Vec format)")
    parser.add_argument("--epochs",
                        dest="epochs",
                        type=int,
                        metavar='<int>',
                        default=100,
                        help="Number of epochs (default=50)")
    parser.add_argument(
        "--maxlen",
        dest="maxlen",
        type=int,
        metavar='<int>',
        default=0,
        help=
        "Maximum allowed number of words during training. '0' means no limit (default=0)"
    )
    parser.add_argument("--seed",
                        dest="seed",
                        type=int,
                        metavar='<int>',
                        default=1234,
                        help="Random seed (default=1234)")
    ## dsv
    parser.add_argument("--min-word-freq",
                        dest="min_word_freq",
                        type=int,
                        metavar='<int>',
                        default=2,
                        help="Min word frequency")
    parser.add_argument("--stack",
                        dest="stack",
                        type=int,
                        metavar='<int>',
                        default=1,
                        help="how deep to stack core RNN")
    parser.add_argument("--skip-emb-preload",
                        dest="skip_emb_preload",
                        action='store_true',
                        help="Skip preloading embeddings")
    parser.add_argument("--tokenize-old",
                        dest="tokenize_old",
                        action='store_true',
                        help="use old tokenizer")

    parser.add_argument("-ar",
                        "--abs-root",
                        dest="abs_root",
                        type=str,
                        metavar='<str>',
                        required=False,
                        help="Abs path to root directory")
    parser.add_argument("-ad",
                        "--abs-data",
                        dest="abs_data",
                        type=str,
                        metavar='<str>',
                        required=False,
                        help="Abs path to data directory")
    parser.add_argument("-ao",
                        "--abs-out",
                        dest="abs_out",
                        type=str,
                        metavar='<str>',
                        required=False,
                        help="Abs path to output directory")
    parser.add_argument("-dp",
                        "--data-path",
                        dest="data_path",
                        type=str,
                        metavar='<str>',
                        required=False,
                        help="Abs path to output directory")
    ##

    if argv is None:
        args = parser.parse_args()
    else:
        args = parser.parse_args(argv)

    out_dir = args.abs_out
    U.mkdir_p(os.path.join(out_dir, 'preds'))
    U.set_logger(out_dir)
    U.print_args(args)

    assert args.model_type in {'reg', 'regp', 'breg', 'bregp', 'rwa'}
    assert args.algorithm in {
        'rmsprop', 'sgd', 'adagrad', 'adadelta', 'adam', 'adamax'
    }
    assert args.loss in {'mse', 'mae', 'kappa', 'soft_kappa'}
    assert args.recurrent_unit in {'lstm', 'gru', 'simple', 'rwa'}
    assert args.aggregation in {'mot', 'attsum', 'attmean'}

    if args.seed > 0:
        RANDSEED = args.seed
    else:
        RANDSEED = np.random.randint(10000)
    np.random.seed(RANDSEED)

    #######################

    #from deepats.util import GPUtils as GPU
    import GPUtil as GPU
    mem = GPU.avail_mem()
    logger.info('AVAIL GPU MEM == %.4f' % mem)
    # 	if mem < 0.05:
    # 		return None
    ###############################################################################################################################
    ## Prepare data
    #

    emb_words = None
    if not args.skip_emb_preload:  #if args.emb_path:
        from deepats.w2vEmbReader import W2VEmbReader as EmbReader
        logger.info('Loading embedding vocabulary...')
        emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
        emb_words = emb_reader.load_words()

    train_df, dev_df, test_df, vocab, overal_maxlen, qwks = dataset.get_data(
        args.data_path, emb_words=emb_words, seed=RANDSEED)
    vocab_size = len(vocab)

    train_x = train_df['text'].values
    train_y = train_df['y'].values
    dev_x = dev_df['text'].values
    dev_y = dev_df['y'].values
    test_x = test_df['text'].values
    test_y = test_df['y'].values

    # Dump vocab

    abs_vocab_file = os.path.join(out_dir, 'vocab.pkl')
    with open(os.path.join(out_dir, 'vocab.pkl'), 'wb') as vocab_file:
        pk.dump(vocab, vocab_file)

    if args.recurrent_unit == 'rwa':
        setattr(args, 'model_type', 'rwa')

    # Pad sequences for mini-batch processing
    from keras.preprocessing import sequence

    if args.model_type in {'breg', 'bregp', 'rwa'}:
        assert args.rnn_dim > 0
        train_x = sequence.pad_sequences(train_x, maxlen=overal_maxlen)
        dev_x = sequence.pad_sequences(dev_x, maxlen=overal_maxlen)
        test_x = sequence.pad_sequences(test_x, maxlen=overal_maxlen)
    else:
        train_x = sequence.pad_sequences(train_x)
        dev_x = sequence.pad_sequences(dev_x)
        test_x = sequence.pad_sequences(test_x)

    ###############################################################################################################################
    ## Some statistics


# 	train_y = np.array(train_y, dtype=K.floatx())
# 	dev_y = np.array(dev_y, dtype=K.floatx())
# 	test_y = np.array(test_y, dtype=K.floatx())

    bincounts, mfs_list = U.bincounts(train_y)
    with open(os.path.join(out_dir, 'bincounts.txt'), 'w') as output_file:
        for bincount in bincounts:
            output_file.write(str(bincount) + '\n')

    train_mean = train_y.mean(axis=0)
    train_std = train_y.std(axis=0)
    dev_mean = dev_y.mean(axis=0)
    dev_std = dev_y.std(axis=0)
    test_mean = test_y.mean(axis=0)
    test_std = test_y.std(axis=0)

    logger.info('Statistics:')
    logger.info('  TEST KAPPAS (float, int)= \033[92m%.4f (%.4f)\033[0m ' %
                (qwks[1], qwks[0]))
    logger.info('  RANDSEED =   ' + str(RANDSEED))
    logger.info('  train_x shape: ' + str(np.array(train_x).shape))
    logger.info('  dev_x shape:   ' + str(np.array(dev_x).shape))
    logger.info('  test_x shape:  ' + str(np.array(test_x).shape))
    logger.info('  train_y shape: ' + str(train_y.shape))
    logger.info('  dev_y shape:   ' + str(dev_y.shape))
    logger.info('  test_y shape:  ' + str(test_y.shape))
    logger.info('  train_y mean: %s, stdev: %s, MFC: %s' %
                (str(train_mean), str(train_std), str(mfs_list)))
    logger.info('  overal_maxlen:  ' + str(overal_maxlen))

    ###############################################################################################################################
    ## Optimizaer algorithm
    #

    from deepats.optimizers import get_optimizer
    #optimizer = get_optimizer(args)

    from keras import optimizers

    ## RMS-PROP

    #optimizer = optimizers.RMSprop(lr=0.00075, rho=0.9, clipnorm=1)
    #optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, clipnorm=1)
    optimizer = optimizers.RMSprop(lr=0.001,
                                   rho=0.9,
                                   epsilon=1e-6,
                                   clipnorm=10)
    #optimizer = optimizers.RMSprop(lr=0.0018, rho=0.88, epsilon=1e-6, clipnorm=10)

    #optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-8, clipnorm=10)
    #optimizer = optimizers.RMSprop(lr=0.004, rho=0.85, epsilon=1e-6, clipnorm=10)# best 2.1 (RWA)
    #optimizer = optimizers.RMSprop(lr=0.0025, rho=0.8, epsilon=1e-8, clipnorm=10) # best 2.1 (RWA)
    #optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-8, clipnorm=10) # best 2.3 (RWA)
    #optimizer = optimizers.RMSprop(lr=0.0025, rho=0.88, epsilon=1e-8, clipnorm=10) # best 2.3 (RWA)
    #optimizer = optimizers.RMSprop(lr=0.004, rho=0.85, epsilon=1e-8, clipnorm=10) # best 2.10 (RWA)

    ## OTHER METHODS
    #optimizer = optimizers.Adam(lr=0.0018, clipnorm=5)
    #optimizer = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, clipnorm=1)
    #optimizer = optimizers.Adam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-06, clipnorm=10)

    #optimizer = optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, clipnorm=10)
    #optimizer = optimizers.SGD(lr=0.05, momentum=0, decay=0.0, nesterov=False, clipnorm=10)
    #optimizer = optimizers.Adagrad(lr=0.03, epsilon=1e-08, clipnorm=10)
    #optimizer = optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=1e-06, clipnorm=10)

    ###############################################################################################################################
    ## Building model
    #
    if args.loss == 'mse':
        loss = 'mean_squared_error'
        metric = kappa_metric
        metric_name = 'kappa_metric'
    elif args.loss == 'mae':
        loss = 'mean_absolute_error'
        metric = kappa_metric
        metric_name = 'kappa_metric'
    elif args.loss == 'kappa':
        loss = kappa_loss
        metric = kappa_metric
        metric_name = 'kappa_metric'

    ########################################################

    from deepats.models import create_model
    model = create_model(args, train_y.mean(axis=0), overal_maxlen, vocab)

    ############################################
    '''
	# test yaml serialization/de-serialization
	yaml = model.to_yaml()
	print yaml
	from deepats.my_layers import MeanOverTime
	from deepats.rwa import RWA
	model = model_from_yaml(yaml, custom_objects={'MeanOverTime': MeanOverTime, 'RWA':RWA})
	'''
    ############################################

    model.compile(loss=loss, optimizer=optimizer, metrics=[metric])

    print(model.summary())

    ###############################################################################################################################
    ## Plotting model
    #
    # 	from keras.utils.visualize_util import plot
    # 	plot(model, to_file = os.path.join(out_dir,'model.png'))

    ###############################################################################################################################
    ## Save model architecture
    #

    logger.info('Saving model architecture')
    with open(os.path.join(out_dir, 'model_arch.json'), 'w') as arch:
        arch.write(model.to_json(indent=2))
    logger.info('  Done')

    ###############################################################################################################################
    ## Evaluator
    #
    evl = Evaluator(dataset, args.prompt_id, out_dir, dev_x, test_x, dev_df,
                    test_df)

    ###############################################################################################################################
    ## Training
    #

    logger.info(
        '----------------------------------------------------------------')
    logger.info('Initial Evaluation:')
    evl.evaluate(model, -1, print_info=True)

    total_train_time = 0
    total_eval_time = 0

    for ii in range(args.epochs):
        # Training
        t0 = time()
        train_history = model.fit(train_x,
                                  train_y,
                                  batch_size=args.batch_size,
                                  epochs=1,
                                  verbose=0)
        tr_time = time() - t0
        total_train_time += tr_time

        # Evaluate
        t0 = time()
        evl.evaluate(model, ii)
        evl_time = time() - t0
        total_eval_time += evl_time

        # Print information
        train_loss = train_history.history['loss'][0]
        train_metric = train_history.history[metric_name][0]
        logger.info('Epoch %d, train: %is, evaluation: %is' %
                    (ii, tr_time, evl_time))
        logger.info('[Train] loss: %.4f, metric: %.4f' %
                    (train_loss, train_metric))
        evl.print_info()

    ###############################################################################################################################
    ## Summary of the results
    #

    logger.info('Training:   %i seconds in total' % total_train_time)
    logger.info('Evaluation: %i seconds in total' % total_eval_time)

    evl.print_final_info()
Esempio n. 3
0
def run(argv=None):

    parser = argparse.ArgumentParser()
    parser.add_argument("-o",
                        "--out-dir",
                        dest="out_dir_path",
                        type=str,
                        metavar='<str>',
                        required=True,
                        help="The path to the output directory")
    parser.add_argument(
        "-p",
        "--prompt",
        dest="prompt_id",
        type=int,
        metavar='<int>',
        required=False,
        help="Promp ID for ASAP dataset. '0' means all prompts.")
    parser.add_argument("-t",
                        "--type",
                        dest="model_type",
                        type=str,
                        metavar='<str>',
                        default='regp',
                        help="Model type (reg|regp|breg|bregp) (default=regp)")
    parser.add_argument(
        "-u",
        "--rec-unit",
        dest="recurrent_unit",
        type=str,
        metavar='<str>',
        default='lstm',
        help="Recurrent unit type (lstm|gru|simple) (default=lstm)")
    parser.add_argument(
        "-a",
        "--algorithm",
        dest="algorithm",
        type=str,
        metavar='<str>',
        default='rmsprop',
        help=
        "Optimization algorithm (rmsprop|sgd|adagrad|adadelta|adam|adamax) (default=rmsprop)"
    )
    parser.add_argument("-l",
                        "--loss",
                        dest="loss",
                        type=str,
                        metavar='<str>',
                        default='mse',
                        help="Loss function (mse|mae) (default=mse)")
    parser.add_argument("-e",
                        "--embdim",
                        dest="emb_dim",
                        type=int,
                        metavar='<int>',
                        default=50,
                        help="Embeddings dimension (default=50)")
    parser.add_argument(
        "-c",
        "--cnndim",
        dest="cnn_dim",
        type=int,
        metavar='<int>',
        default=0,
        help="CNN output dimension. '0' means no CNN layer (default=0)")
    parser.add_argument("-w",
                        "--cnnwin",
                        dest="cnn_window_size",
                        type=int,
                        metavar='<int>',
                        default=3,
                        help="CNN window size. (default=3)")
    parser.add_argument(
        "-r",
        "--rnndim",
        dest="rnn_dim",
        type=int,
        metavar='<int>',
        default=300,
        help="RNN dimension. '0' means no RNN layer (default=300)")
    parser.add_argument("-b",
                        "--batch-size",
                        dest="batch_size",
                        type=int,
                        metavar='<int>',
                        default=32,
                        help="Batch size (default=32)")
    parser.add_argument("-v",
                        "--vocab-size",
                        dest="vocab_size",
                        type=int,
                        metavar='<int>',
                        default=4000,
                        help="Vocab size (default=4000)")
    parser.add_argument(
        "--aggregation",
        dest="aggregation",
        type=str,
        metavar='<str>',
        default='mot',
        help=
        "The aggregation method for regp and bregp types (mot|attsum|attmean) (default=mot)"
    )
    parser.add_argument(
        "--dropout",
        dest="dropout_prob",
        type=float,
        metavar='<float>',
        default=0.5,
        help=
        "The dropout probability. To disable, give a negative number (default=0.5)"
    )
    parser.add_argument(
        "--vocab-path",
        dest="vocab_path",
        type=str,
        metavar='<str>',
        help="(Optional) The path to the existing vocab file (*.pkl)")
    parser.add_argument("--skip-init-bias",
                        dest="skip_init_bias",
                        action='store_true',
                        help="Skip initialization of the last layer bias")
    parser.add_argument(
        "--emb",
        dest="emb_path",
        type=str,
        metavar='<str>',
        help="The path to the word embeddings file (Word2Vec format)")
    parser.add_argument("--epochs",
                        dest="epochs",
                        type=int,
                        metavar='<int>',
                        default=100,
                        help="Number of epochs (default=50)")
    parser.add_argument(
        "--maxlen",
        dest="maxlen",
        type=int,
        metavar='<int>',
        default=0,
        help=
        "Maximum allowed number of words during training. '0' means no limit (default=0)"
    )
    parser.add_argument("--seed",
                        dest="seed",
                        type=int,
                        metavar='<int>',
                        default=1234,
                        help="Random seed (default=1234)")
    ## dsv
    parser.add_argument("--min-word-freq",
                        dest="min_word_freq",
                        type=int,
                        metavar='<int>',
                        default=2,
                        help="Min word frequency")
    parser.add_argument("--stack",
                        dest="stack",
                        type=int,
                        metavar='<int>',
                        default=1,
                        help="how deep to stack core RNN")
    parser.add_argument("--skip-emb-preload",
                        dest="skip_emb_preload",
                        action='store_true',
                        help="Skip preloading embeddings")
    parser.add_argument("--tokenize-old",
                        dest="tokenize_old",
                        action='store_true',
                        help="use old tokenizer")

    parser.add_argument("-ar",
                        "--abs-root",
                        dest="abs_root",
                        type=str,
                        metavar='<str>',
                        required=False,
                        help="Abs path to root directory")
    parser.add_argument("-ad",
                        "--abs-data",
                        dest="abs_data",
                        type=str,
                        metavar='<str>',
                        required=False,
                        help="Abs path to data directory")
    parser.add_argument("-ao",
                        "--abs-out",
                        dest="abs_out",
                        type=str,
                        metavar='<str>',
                        required=False,
                        help="Abs path to output directory")
    parser.add_argument("-dp",
                        "--data-path",
                        dest="data_path",
                        type=str,
                        metavar='<str>',
                        required=False,
                        help="Abs path to output directory")
    ##

    if argv is None:
        args = parser.parse_args()
    else:
        args = parser.parse_args(argv)

    out_dir = args.abs_out
    U.mkdir_p(os.path.join(out_dir, 'preds'))
    U.set_logger(out_dir)
    U.print_args(args)

    assert args.model_type in {'reg', 'regp', 'breg', 'bregp', 'rwa'}
    assert args.algorithm in {
        'rmsprop', 'sgd', 'adagrad', 'adadelta', 'adam', 'adamax'
    }
    assert args.loss in {'mse', 'mae', 'kappa', 'soft_kappa'}
    assert args.recurrent_unit in {'lstm', 'gru', 'simple', 'rwa'}
    assert args.aggregation in {'mot', 'attsum', 'attmean'}

    if args.seed > 0:
        RANDSEED = args.seed
    else:
        RANDSEED = np.random.randint(10000)
    np.random.seed(RANDSEED)

    #######################

    #from deepats.util import GPUtils as GPU
    import GPUtil as GPU
    mem = GPU.avail_mem()
    logger.info('AVAIL GPU MEM == %.4f' % mem)
    # 	if mem < 0.05:
    # 		return None
    ###############################################################################################################################
    ## Prepare data
    #

    emb_words = None
    if not args.skip_emb_preload:  #if args.emb_path:
        from deepats.w2vEmbReader import W2VEmbReader as EmbReader
        logger.info('Loading embedding vocabulary...')
        emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
        emb_words = emb_reader.load_words()

    train_df, dev_df, test_df, vocab, overal_maxlen, qwks = dataset.get_data(
        args.data_path, emb_words=emb_words, seed=RANDSEED)
    vocab_size = len(vocab)

    train_x = train_df['text'].values
    train_y = train_df['y'].values
    dev_x = dev_df['text'].values
    dev_y = dev_df['y'].values
    test_x = test_df['text'].values
    test_y = test_df['y'].values

    # Dump vocab

    abs_vocab_file = os.path.join(out_dir, 'vocab.pkl')
    with open(os.path.join(out_dir, 'vocab.pkl'), 'wb') as vocab_file:
        pk.dump(vocab, vocab_file)

    if args.recurrent_unit == 'rwa':
        setattr(args, 'model_type', 'rwa')

    # Pad sequences for mini-batch processing
    from keras.preprocessing import sequence

    train_x = sequence.pad_sequences(train_x, maxlen=overal_maxlen)
    dev_x = sequence.pad_sequences(dev_x, maxlen=overal_maxlen)
    test_x = sequence.pad_sequences(test_x, maxlen=overal_maxlen)

    ###############################################################################################################################
    ## Some statistics

    bincounts, mfs_list = U.bincounts(train_y)
    with open(os.path.join(out_dir, 'bincounts.txt'), 'w') as output_file:
        for bincount in bincounts:
            output_file.write(str(bincount) + '\n')

    train_mean = train_y.mean(axis=0)
    train_std = train_y.std(axis=0)
    dev_mean = dev_y.mean(axis=0)
    dev_std = dev_y.std(axis=0)
    test_mean = test_y.mean(axis=0)
    test_std = test_y.std(axis=0)

    logger.info('Statistics:')
    logger.info('  PROMPT_ID\t= ' + U.b_green(args.prompt_id))
    logger.info(
        '  TEST KAPPAS\t= {} (float, int)'.format(U.b_green('%.4f (%.4f)')) %
        (qwks[1], qwks[0]))
    logger.info('  RANDSEED\t= ' + U.b_green(str(RANDSEED)))
    logger.info('  train_x shape: ' + str(np.array(train_x).shape))
    logger.info('  dev_x shape:   ' + str(np.array(dev_x).shape))
    logger.info('  test_x shape:  ' + str(np.array(test_x).shape))
    logger.info('  train_y shape: ' + str(train_y.shape))
    logger.info('  dev_y shape:   ' + str(dev_y.shape))
    logger.info('  test_y shape:  ' + str(test_y.shape))
    logger.info('  train_y mean: %s, stdev: %s, MFC: %s' %
                (str(train_mean), str(train_std), str(mfs_list)))
    logger.info('  overal_maxlen:  ' + str(overal_maxlen))

    ###############################################################################################################################
    ## Optimizaer algorithm

    from keras import optimizers
    from deepats.optimizers import get_optimizer
    #optimizer = get_optimizer(args)

    ## RMS-PROP

    #optimizer = optimizers.RMSprop(lr=0.00075, rho=0.9, clipnorm=1)
    #optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, clipnorm=1)
    optimizer = optimizers.RMSprop(lr=0.003,
                                   rho=0.88,
                                   epsilon=1e-6,
                                   clipnorm=10)
    #optimizer = optimizers.RMSprop(lr=0.0018, rho=0.88, epsilon=1e-6, clipnorm=10)

    #optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-8, clipnorm=10)
    #optimizer = optimizers.RMSprop(lr=0.004, rho=0.85, epsilon=1e-6, clipnorm=10)# best 2.1 (RWA)
    #optimizer = optimizers.RMSprop(lr=0.0025, rho=0.8, epsilon=1e-8, clipnorm=10) # best 2.1 (RWA)
    #optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-8, clipnorm=10) # best 2.3 (RWA)
    #optimizer = optimizers.RMSprop(lr=0.0025, rho=0.88, epsilon=1e-8, clipnorm=10) # best 2.3 (RWA)
    #optimizer = optimizers.RMSprop(lr=0.004, rho=0.85, epsilon=1e-8, clipnorm=10) # best 2.10 (RWA)

    ## OTHER METHODS
    #optimizer = optimizers.Adam(lr=0.0018, clipnorm=5)
    #optimizer = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, clipnorm=1)
    #optimizer = optimizers.Adam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-06, clipnorm=10)

    #optimizer = optimizers.Nadam(lr=0.002)

    #optimizer = optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, clipnorm=10)
    #optimizer = optimizers.SGD(lr=0.05, momentum=0, decay=0.0, nesterov=False, clipnorm=10)
    #optimizer = optimizers.Adagrad(lr=0.03, epsilon=1e-08, clipnorm=10)
    #optimizer = optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=1e-06, clipnorm=10)

    ###############################################################################################################################
    ## Building model
    #
    loss = kappa_loss
    metric_name = 'kappa'

    ########################################################

    from deepats.models import create_model
    model = create_model(args, train_y.mean(axis=0), overal_maxlen, vocab)

    ############################################
    '''
	# test yaml serialization/de-serialization
	yaml = model.to_yaml()
	print yaml
	from deepats.my_layers import MeanOverTime
	from deepats.rwa import RWA
	model = model_from_yaml(yaml, custom_objects={'MeanOverTime': MeanOverTime, 'RWA':RWA})
	'''
    ############################################

    model.compile(loss=loss, optimizer=optimizer, metrics=[kappa])
    print(model.summary())

    ###############################################################################################################################
    ## Callbacks
    callbacks = []

    ##############################
    ''' Evaluate test_kappa '''
    class Eval(Callback):
        def __init__(self, x, y, funcs, prefix='test', batch_size=128):
            super(Eval, self).__init__()
            self.x = x
            self.y = y
            self.funcs = funcs
            self.prefix = prefix
            self.batch_size = batch_size
            self.epoch = 0

        def on_epoch_end(self, batch, logs={}):
            self.epoch += 1
            p = np.asarray(
                self.model.predict(self.x,
                                   batch_size=self.batch_size).squeeze())
            for func in self.funcs:
                f = func(self.y, p)
                name = '{}_{}'.format(self.prefix, func.__name__)
                logs[name] = f
                print(' - {0}: {1:0.4f}'.format(name, f))

    eval = Eval(test_x, test_df['y'].values, [nkappa], 'test')
    callbacks.append(eval)

    ##############################
    ''' ModelCheckpoint '''

    wt_path = os.path.join(out_dir, 'weights.{}.hdf5'.format('rwa'))
    checkpt = ModelCheckpoint(wt_path,
                              monitor='val_kappa',
                              verbose=1,
                              save_best_only=True,
                              mode='max')
    callbacks.append(checkpt)

    ##############################
    ''' LRplateau '''

    class LRplateau(ReduceLROnPlateau):
        def __init__(self,
                     monitor='val_loss',
                     factor=0.1,
                     patience=10,
                     verbose=0,
                     mode='auto',
                     epsilon=1e-4,
                     cooldown=0,
                     min_lr=0,
                     checkpoint=None):
            super(LRplateau, self).__init__(monitor, factor, patience, verbose,
                                            mode, epsilon, cooldown, min_lr)
            self.checkpoint = checkpoint

        def on_lr_reduce(self, epoch):
            if self.checkpoint:
                if self.verbose > 0:
                    print('Epoch {}: loading wts from {}.\n'.format(
                        epoch, self.checkpoint.filepath))
                self.model.load_weights(self.checkpoint.filepath)

        def on_epoch_end(self, epoch, logs=None):
            logs = logs or {}
            logs['lr'] = K.get_value(self.model.optimizer.lr)
            current = logs.get(self.monitor)
            if current is None:
                warnings.warn(
                    'Learning Rate Plateau Reducing requires %s available!' %
                    self.monitor, RuntimeWarning)
            else:
                if self.in_cooldown():
                    self.cooldown_counter -= 1
                    self.wait = 0

                if self.monitor_op(current, self.best):
                    self.best = current
                    self.wait = 0
                elif not self.in_cooldown():
                    if self.wait >= self.patience:
                        old_lr = float(K.get_value(self.model.optimizer.lr))
                        if old_lr > self.min_lr + self.lr_epsilon:
                            new_lr = old_lr * self.factor
                            new_lr = max(new_lr, self.min_lr)
                            K.set_value(self.model.optimizer.lr, new_lr)
                            if self.verbose > 0:
                                print(
                                    '\nEpoch {0}: reducing learning rate to {1:0.4g}.'
                                    .format(epoch, new_lr))
                            self.cooldown_counter = self.cooldown
                            self.wait = 0
                            self.on_lr_reduce(epoch)
                    self.wait += 1

    reduce_lr = LRplateau(monitor='val_kappa',
                          mode='max',
                          patience=3,
                          factor=0.33,
                          min_lr=0.00001,
                          verbose=1,
                          checkpoint=checkpt)

    callbacks.append(reduce_lr)

    ###############################################################################################################################
    ## Training

    model.fit(train_x,
              train_y,
              validation_data=(dev_x, dev_df['y'].values),
              batch_size=args.batch_size,
              epochs=args.epochs,
              callbacks=callbacks,
              verbose=1)