Example #1
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('-batsize', default=100)  ##100
    parser.add_argument('-nlayers', default=1, type=int, help='num_hid_layers before output')
    parser.add_argument('-hdim', default=200, type=int) ##200 for freyfaces
    parser.add_argument('-zdim', default=2, type=int)  ##2
    parser.add_argument('-lmbda', default=0., type=float, help='weight decay coeff') ##0.001
    parser.add_argument('-lr', default=0.01, type=float, help='learning rate')  ##0.01
    parser.add_argument('-epochs', default=100, type=int)  ##1000
    parser.add_argument('-print_every', default=5, type=int)  ##100
    parser.add_argument('-save_every', default=50, type=int)  ##1
    parser.add_argument('-outfile', default='vae_model.pk')
    parser.add_argument('-dset', default='mnist') ##mnist freyfaces
    parser.add_argument('-COV', default=False, type=bool)
    parser.add_argument('-decM', default='gaussian', help='bernoulli | gaussian')
    args = parser.parse_args()

    batsize = args.batsize
    dset = args.dset
    data = load_dataset(dset)
    valid_fg = 0
    dec_nonlin = T.nnet.relu  ##T.nnet.softplus
    if dset=='mnist':
        train_x, train_y = data['train']  ##mnist: (N,784)
        valid_x, valid_y = data['valid']
        num_valid_bats = valid_x.shape[0] / batsize
        print "valid data shape: ", valid_x.shape
        valid_fg = 1
    elif dset=='freyfaces':
        train_x = data
    print "training data shape: ", train_x.shape

    model = VAE(train_x.shape[1], args, dec_nonlin=dec_nonlin)

    num_train_bats = train_x.shape[0] / batsize  ##discard last <batsize

    begin = time.time()
    for i in xrange(args.epochs):
        for k in xrange(num_train_bats):
            x = train_x[k*batsize : (k+1)*batsize, :]
            eps = np.random.randn(x.shape[0], args.zdim).astype(floatX)
            cost = model.train(x, eps, i)  ##update_times=epochs*num_train_bats
        j = i+1
        if j % args.print_every == 0:  ##(b+1)
            end = time.time()
            print('epoch %d, cost %.2f, time %.2fs' % (j, cost, end-begin))
            begin = end
            if valid_fg == 1:
                valid_cost = 0
                for l in xrange(num_valid_bats):
                    x_val = valid_x[l*batsize:(l+1)*batsize, :]
                    eps_val = np.zeros((x_val.shape[0], args.zdim), dtype=floatX)
                    valid_cost = valid_cost + model.test(x_val, eps_val)
                valid_cost = valid_cost / num_valid_bats
                print('valid cost: %f' % valid_cost)
        if j % args.save_every == 0:  ##
            with open(args.outfile, 'wb') as f:
                pk.dump(model, f, protocol=pk.HIGHEST_PROTOCOL)
            print('model saved')
Example #2
0
def main():
    args = utils.get_args()
    dataset = utils.load_dataset(os.path.join(args.data_path, DATASET_FILE))
    index2word, word2index = utils.load_dicts(os.path.join(args.data_path, VOCABULARY_FILE))
    
    print("Use dataset with {} sentences".format(dataset.shape[0]))
    
    batch_size = args.batch_size
    noise_size = args.noise_size
    with tf.Graph().as_default(), tf.Session() as session:   
        lstm_gan = LSTMGAN(
            SENTENCE_SIZE,
            VOCABULARY_SIZE,
            word2index[SENTENCE_START_TOKEN],
            hidden_size_gen = args.hid_gen,
            hidden_size_disc = args.hid_disc,
            input_noise_size = noise_size,
            batch_size = batch_size,
            dropout = args.dropout,
            lr = args.lr,
            grad_cap = args.grad_clip
        )
        
        session.run(tf.initialize_all_variables())

        if args.save_model or args.load_model:
            saver = tf.train.Saver()

        if args.load_model:
            try:
                saver.restore(session, utils.SAVER_FILE)
            except ValueError:
                print("Cant find model file")
                sys.exit(1)
        while True:
            offset = 0.
            for dataset_part in utils.iterate_over_dataset(dataset, batch_size*args.disc_count):
                print("Start train discriminator wih offset {}...".format(offset))
                for ind, batch in enumerate(utils.iterate_over_dataset(dataset_part, batch_size)):
                    noise = np.random.random(size=(batch_size, noise_size))
                    cost = lstm_gan.train_disc_on_batch(session, noise, batch)
                    print("Processed {} sentences with train cost = {}".format((ind+1)*batch_size, cost))

                print("Start train generator...")
                for ind in range(args.gen_count):
                    noise = np.random.random(size=(batch_size, noise_size))
                    cost = lstm_gan.train_gen_on_batch(session, noise)
                    if args.gen_sent:
                        sent = lstm_gan.generate_sent(session, np.random.random(size=(noise_size, )))
                        print(' '.join(index2word[i] for i in sent))
                    print("Processed {} noise inputs with train cost {}".format((ind+1)*batch_size, cost))
                
                offset += batch_size*args.disc_count
                if args.save_model:
                    saver.save(sess, utils.SAVER_FILE)
                    print("Model saved")
def trainer(model_params):
    """Train a sketch-rnn model."""
    np.set_printoptions(precision=8, edgeitems=6, linewidth=200, suppress=True)

    print('Loading data files.')
    train_set, model_params = utils.load_dataset(FLAGS.root_dir, FLAGS.dataset, model_params)

    reset_graph()
    model = sketch_rnn_model.Model(model_params)

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    if FLAGS.load_pretrain:
        load_pretrain(sess, FLAGS.vae_type, FLAGS.enc_type, FLAGS.dataset, FLAGS.basenet, FLAGS.log_root)

    if FLAGS.resume_training:
        resume_train(sess, FLAGS.load_dir, FLAGS.dataset, FLAGS.enc_type, FLAGS.basenet, FLAGS.feat_type, FLAGS.log_root)

    train(sess, model, train_set)
Example #4
0
def main():
    args = parse_arguments()
    hidden_size = 512
    embed_size = 256
    assert torch.cuda.is_available()

    print("[!] preparing dataset...")
    train_iter, val_iter, test_iter, DE, EN = load_dataset(args.batch_size)
    de_size, en_size = len(DE.vocab), len(EN.vocab)
    print("[TRAIN]:%d (dataset:%d)\t[TEST]:%d (dataset:%d)"
          % (len(train_iter), len(train_iter.dataset),
             len(test_iter), len(test_iter.dataset)))
    print("[DE_vocab]:%d [en_vocab]:%d" % (de_size, en_size))

    print("[!] Instantiating models...")
    encoder = Encoder(de_size, embed_size, hidden_size,
                      n_layers=2, dropout=0.5)
    decoder = Decoder(embed_size, hidden_size, en_size,
                      n_layers=1, dropout=0.5)
    seq2seq = Seq2Seq(encoder, decoder).cuda()
    optimizer = optim.Adam(seq2seq.parameters(), lr=args.lr)
    print(seq2seq)

    best_val_loss = None
    for e in range(1, args.epochs+1):
        train(e, seq2seq, optimizer, train_iter,
              en_size, args.grad_clip, DE, EN)
        val_loss = evaluate(seq2seq, val_iter, en_size, DE, EN)
        print("[Epoch:%d] val_loss:%5.3f | val_pp:%5.2fS"
              % (e, val_loss, math.exp(val_loss)))

        # Save the model if the validation loss is the best we've seen so far.
        if not best_val_loss or val_loss < best_val_loss:
            print("[!] saving model...")
            if not os.path.isdir(".save"):
                os.makedirs(".save")
            torch.save(seq2seq.state_dict(), './.save/seq2seq_%d.pt' % (e))
            best_val_loss = val_loss
    test_loss = evaluate(seq2seq, test_iter, en_size, DE, EN)
    print("[TEST] loss:%5.2f" % test_loss)
def tester(model_params):
    """Test model."""
    np.set_printoptions(precision=8, edgeitems=6, linewidth=200, suppress=True)

    print('Hyperparams:')
    for key, val in model_params.values().iteritems():
        print('%s = %s' % (key, str(val)))
    print('Loading data files.')
    test_set, sample_model_params, gen_model_params  = utils.load_dataset(FLAGS.root_dir, FLAGS.dataset, model_params, inference_mode=True)

    reset_graph()
    sample_model = sketch_rnn_model.Model(sample_model_params)
    gen_model = sketch_rnn_model.Model(gen_model_params, reuse=True)

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    if FLAGS.dataset in ['shoesv2f_sup', 'shoesv2f_train']:
        dataset = 'shoesv2'
    else:
        dataset = FLAGS.dataset

    if FLAGS.resume_training:
        if FLAGS.load_dir == '':
            FLAGS.load_dir = FLAGS.log_root.split('runs')[0] + 'model_to_test/%s/' % dataset
            # set dir to load the model for testing
            FLAGS.load_dir = os.path.join(FLAGS.load_dir, FLAGS.basenet)
        load_checkpoint(sess, FLAGS.load_dir)

    # Write config file to json file.
    tf.gfile.MakeDirs(FLAGS.log_root)
    with tf.gfile.Open(
            os.path.join(FLAGS.log_root, 'model_config.json'), 'w') as f:
        json.dump(model_params.values(), f, indent=True)

    sample_test(sess, sample_model, gen_model, test_set, model_params.max_seq_len)
Example #6
0
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from utils import load_json, load_dataset

asd_data_path = '/home/elliot/PycharmProjects/abide/processed_data_files/asd_raw/debug_raw_img_asd.json'
ctl_data_path = '/home/elliot/PycharmProjects/abide/processed_data_files/control_raw/debug_raw_img_ctl.json'

from tensorflow.keras.optimizers import Adam

lr = 0.0001

if __name__ == '__main__':
    x_train, y_train, x_valid, y_valid = load_dataset(asd_data_path,
                                                      ctl_data_path)
    assert tf.keras.backend.image_data_format() == 'channels_last'
    x_train = np.expand_dims(x_train, axis=-1)
    x_valid = np.expand_dims(x_valid, axis=-1)
    input_shape = (61, 73, 61, 1)
    model = tf.keras.models.Sequential()
    model.add(
        layers.Conv3D(32,
                      kernel_size=(3, 3, 3),
                      activation='relu',
                      input_shape=input_shape))
    model.add(layers.Conv3D(32, kernel_size=(3, 3, 3), activation='relu'))
    model.add(layers.MaxPooling3D(pool_size=(2, 2, 2)))

    # model.add(layers.Dropout(0.25))
    # model.add(layers.Conv3D(64, kernel_size=(3, 3, 3), activation='relu'))
Example #7
0

parser = argparse.ArgumentParser(description='MatchineTranslation')
parser.add_argument('--model',default="MyTransformer", type=str, help='choose a model: Transformer')
args = parser.parse_args()


if __name__ == '__main__':
    dataset = 'Data'  # 数据集
    model_name = args.model  # MyTransformer

    x = import_module('models.' + model_name) #一个函数运行需要根据不同项目的配置,动态导入对应的配置文件运行。
    config = x.Config(dataset) #进入到对应模型的__init__方法进行参数初始化
    start_time = time.time()
    print("Loading data...")
    train_dataset, valid_dataset, en_tokenizer, zh_tokenizer = load_dataset(config.dataset_path, config, config.num_samples)

    config.input_vocab_size = en_tokenizer.vocab_size + 2
    config.target_vocab_size = zh_tokenizer.vocab_size + 2

    model = x.MyModel(config)
    transformer = model.createModel()

    learning_rate = CustomizedSchedule(config.d_model)
    optimizer = keras.optimizers.Adam(learning_rate,
                                      beta_1=0.9,
                                      beta_2=0.98,
                                      epsilon=1e-9)

    checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                     model=transformer)
Example #8
0
def main(args):
    """ set up params, log dir, splits, encode the data, and run the training """

    logger.info("software version {}".format(utils.__version__))

    # set up log directory & save the args file to it
    log_dir = log_dir_name(args)
    logger.info("log directory is {}".format(log_dir))
    utils.mkdir(log_dir)
    save_args(vars(args), join(log_dir, "args.txt"))

    # load the dataset
    if args.dataset_name != "":
        dataset_file = constants.DATASETS[args.dataset_name]["ds_fn"]
    else:
        dataset_file = args.dataset_file
    logger.info("loading dataset from {}".format(dataset_file))
    ds = utils.load_dataset(ds_fn=dataset_file)

    # load the dataset split or create one
    if args.split_dir != "":
        if isdir(args.split_dir):
            logger.info("loading split from {}".format(args.split_dir))
            split = sd.load_split_dir(args.split_dir)
            if isinstance(split, list):
                raise ValueError(
                    "this script doesn't support multiple reduced train size replicates in a single run. "
                    "run each one individually by specifying the split dir of the replicate. "
                )
        else:
            raise FileNotFoundError(
                "specified split dir doesn't exist: {}".format(args.split_dir))
    else:
        # create a classic train-tune-test split based on the specified args
        logger.info(
            "creating a train/test split with tr={}, tu={}, and te={}, seed={}"
            .format(args.train_size, args.tune_size, args.test_size,
                    args.split_rseed))
        split, _ = sd.train_tune_test(ds,
                                      train_size=args.train_size,
                                      tune_size=args.tune_size,
                                      test_size=args.test_size,
                                      rseed=args.split_rseed)

    # error checking for split -- make sure we have a train set
    if "train" not in split:
        raise ValueError(
            "no train set in dataset split. specify a split with a train set to proceed."
        )
    if "tune" not in split:
        raise ValueError(
            "no tune set in dataset split. specify a split with a tune set to proceed. "
            "the tune set is used for early stopping and logging statistics to tensorboard. "
            "if you dont want a tune set, and instead just prefer to have a train and test set, "
            "just name your test set as the tune set so it is compatible with the script. "
        )

    # save the split indices that are going to be used for this model to the log directory for the model
    # this isn't as good as explicitly saving a split using split_dataset.py because the directory name will
    # not be informative. todo if loading a split_dir, it would be good to copy over the directory name
    logger.info("backing up split to log dir {}".format(join(log_dir,
                                                             "split")))
    sd.save_split(split, join(log_dir, "split"))

    # figure out the wt_aa and wt_offset for encoding data
    if args.dataset_name != "":
        wt_aa = constants.DATASETS[args.dataset_name]["wt_aa"]
        wt_ofs = constants.DATASETS[args.dataset_name]["wt_ofs"]
    else:
        wt_aa = args.wt_aa
        wt_ofs = args.wt_ofs

    # create the dataset dictionary, containing encoded data, scores, etc, based on the splits
    data = collections.defaultdict(dict)
    data["ds"] = ds
    for set_name, idxs in split.items():
        data["idxs"][set_name] = idxs
        data["variants"][set_name] = ds.iloc[idxs]["variant"].tolist()
        # we are using "score" as the default target, but support for multiple scores could be added here
        data["scores"][set_name] = ds.iloc[idxs]["score"].to_numpy()
        # encode the data
        logger.info("encoding {} set variants using {} encoding".format(
            set_name, args.encoding))
        data["encoded_data"][set_name] = enc.encode(
            encoding=args.encoding,
            variants=data["variants"][set_name],
            wt_aa=wt_aa,
            wt_offset=wt_ofs)

    evaluations = run_training(data, log_dir, args)
Example #9
0
def train_seq_malGAN():
    """
    main training function: first train subD, then alternately train boxD and malG
    :return: None
    """

    max_seq_len = 1024
    # make workspace directory for current mission and copy code
    timeTag = datetime.now().strftime('%Y-%m-%d')
    #timeTag = '2017-11-19'
    dir_path = '../tensorflow_result/'
    if not os.path.exists(dir_path):
        os.mkdir(dir_path)
    dir_path = '../tensorflow_result/' + timeTag
    if not os.path.exists(dir_path):
        os.mkdir(dir_path)
    if os.path.exists(os.path.join(dir_path, 'code')):
        shutil.rmtree(os.path.join(dir_path, 'code'))
    shutil.copytree('.', os.path.join(dir_path, 'code'))
    log_path = dir_path + '/log.txt'
    score_template = 'TPR %(TPR)f\tFPR %(FPR)f\tAccuracy %(Accuracy)f\tAUC %(AUC)f'
    print((str(datetime.now()) + '\tStart training seq_malGAN.'))

    # define substituteD as subD, black box D as boxD and malware Genarator as G
    boxD = blackboxDiscriminator(cell_type='LSTM',
                                 rnn_layers=[128],
                                 is_bidirectional=True,
                                 attention_layers=[128],
                                 ff_layers=[128],
                                 batch_size=64,
                                 num_token=161,
                                 max_seq_len=max_seq_len * 2,
                                 num_class=2,
                                 learning_rate=0.001,
                                 scope='black_box_D',
                                 model_path=dir_path + '/black_box_D_model')
    # boxD_params = {'vocab_num': 160, 'embedding_dim': 160, 'hidden_dim': 128, 'is_bidirectional': False,
    #                'max_seq_len': 1024, 'attention_layers': None, 'ff_layers': [512], 'class_num': 2}
    # G_params = {}
    print((str(datetime.now()) + '\tFinish defining subD, boxD and G.'))

    # load data
    X_malware, seqLen_malware, X_benigh, seqLen_benigh = \
        load_dataset('../data/API_rand_trainval_len_2048.txt', max_seq_len, 0)
    X = np.vstack((X_malware, X_benigh))
    seqLen = np.hstack((seqLen_malware, seqLen_benigh))
    Y = np.array([1] * len(X_malware) + [0] * len(X_benigh))
    X_malware_test, seqLen_malware_test, X_benigh_test, seqLen_benigh_test = \
        load_dataset('../data/API_rand_test_len_2048.txt', max_seq_len, 0)
    X_test = np.vstack((X_malware_test, X_benigh_test))
    seqLen_test = np.hstack((seqLen_malware_test, seqLen_benigh_test))
    Y_test = np.array([1] * len(X_malware_test) + [0] * len(X_benigh_test))
    print((str(datetime.now()) + '\tFinish loading data.'))
    print((str(datetime.now()) +
           '\tlen(X)=%d\tlen(X_malware)=%d\tlen(X_benigh)=%d\t' %
           (len(X), len(X_malware), len(X_benigh))))
    print((str(datetime.now()) +
           '\tlen(X_test)=%d\tlen(X_malware_test)=%d\tlen(X_benigh_test)=%d' %
           (len(X_test), len(X_malware_test), len(X_benigh_test))))

    # train substitute Discrimanator first
    print((str(datetime.now()) + '\tStart training black box Discriminator.'))
    boxD.train(np.hstack((X, np.zeros_like(X))),
               seqLen,
               Y,
               max_epochs=50,
               max_epochs_val=5)
    print((str(datetime.now()) + '\tFinish training subD.'))
    print((str(datetime.now()) + '\tTraining set result:'))
    print((score_template % evaluate(boxD, np.hstack(
        (X, np.zeros_like(X))), seqLen, Y)))
    print((str(datetime.now()) + '\tTest set result:'))
    print((score_template %
           evaluate(boxD, np.hstack(
               (X_test, np.zeros_like(X_test))), seqLen_test, Y_test)))
Example #10
0
import os
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cross_validation import KFold
from sklearn.cross_validation import cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from utils import CHART_DIR, DATA_DIR, load_dataset

features, labels = load_dataset('seeds.tsv') #had to write custom function to parse the file since it contains float and string data

# initialize a classifier instance
classifier = KNeighborsClassifier(n_neighbors=5, weights='uniform', 
                                 algorithm='auto', 
                                 leaf_size=30, 
                                 p=2, 
                                 metric='minkowski', 
                                 metric_params=None)

# compute 10-fold cross-validation
means = []
k = 1
for k in range(1, 20, 2):
    classifier.n_neighbors = k

    # normalize all features to same scale
    classifier = Pipeline([('norm', StandardScaler()), ('knn', classifier)])
    
    for training, testing in KFold(features.shape[0], n_folds=10, shuffle=True):  #need to shuffle the features first before creating folds since the labels are created in contiguous manner
        classifier.fit(features[training], labels[training])
        predictions = classifier.predict(features[testing])
Example #11
0
        Q, mask, A = get_batch(begin,end,q_test,a_test,batch_size,max_q,Na)
        a_pred = sess.run(model_outputs['answer_pred'], 
                          feed_dict={model_outputs['question']:Q,
                                     model_outputs['mask']:mask, 
                                     model_outputs['answer']:A})
        equals = 1*np.equal(A.argmax(axis=1),a_pred)
        equals = list(equals[:end-begin])
        acc += equals
    acc = tf.reduce_mean(tf.to_float(acc))
    acc_s = tf.scalar_summary("acc_tf",acc,name="acc_tf")
    acc,acc_s = sess.run([acc,acc_s])
    writer.add_summary(acc_s,step)
    return acc

if __name__=="__main__":
    q_train = load_dataset('datasets/coco/train/questions.idxs')
    q_test = load_dataset('datasets/coco/test/questions.idxs')
    a_train = load_dataset('datasets/coco/train/answers.idxs')
    a_test = load_dataset('datasets/coco/test/answers.idxs')

    q_i2w, q_w2i = load_vocab('datasets/coco/train/questions.vocab')
    a_i2w, a_w2i = load_vocab('datasets/coco/train/answers.vocab')

    max_q = len(max(q_train, key=lambda x:len(x)))+1
    Nq = len(q_i2w)
    Na = len(a_i2w)

    dh = 50 #LSTM hidden state dimension
    dq = 75 #Question embedding dimension
    da = 50 #Answer embedding dimension
    batch_size = 64
Example #12
0
from compute_cost import compute_cost
from gradient_descent import gradient_descent
from predict import predict
from utils import load_dataset, add_x0, feature_normalize
import numpy as np
import matplotlib.pyplot as plt

data = load_dataset("data.txt")

X_Original = data[:, 0:2]
y = data[:, 2:3]

plt.scatter(X_Original[:, 0],
            X_Original[:, 1],
            c=y,
            s=50,
            cmap=plt.cm.Spectral)

X, mu, sigma = feature_normalize(X_Original)

plt.show()

X = add_x0(X)
m = X.shape[0]
n = X.shape[1]
learning_rate = .3
theta = np.zeros((n, 1))
max_iter = 400

his = np.zeros((max_iter, 1))
Example #13
0
                           betas=(opt.beta1, 0.999))

# --------- loss functions ------------------------------------
mse_criterion = nn.MSELoss()
bce_criterion = nn.BCELoss()

# --------- transfer to gpu ------------------------------------
netEP.cuda()
netEC.cuda()
netD.cuda()
netC.cuda()
mse_criterion.cuda()
bce_criterion.cuda()

# --------- load a dataset ------------------------------------
train_data, test_data = utils.load_dataset(opt)

train_loader = DataLoader(train_data,
                          num_workers=opt.data_threads,
                          batch_size=opt.batch_size,
                          shuffle=True,
                          drop_last=True,
                          pin_memory=True)
test_loader = DataLoader(test_data,
                         num_workers=opt.data_threads,
                         batch_size=opt.batch_size,
                         shuffle=True,
                         drop_last=True,
                         pin_memory=True)

Example #14
0
 def set_dataset(self, data_path, labels_path=''):
     self.dataset = utils.load_dataset(data_path, self._graph, labels_path)
     return
Example #15
0
if __name__ == "__main__":
    argv = sys.argv[1:]

    parser = argparse.ArgumentParser()
    parser.add_argument('-q','--question', required=True, 
        choices=["1.1", "1.2", "2.1", "2.2", "3.1", "4.3"])

    io_args = parser.parse_args()
    question = io_args.question

    if question == "1.1":
        # Q1.1 - This should print the answers to Q 1.1

        # Load the fluTrends dataset
        X, names = utils.load_dataset("fluTrends")

        # part 1: min, max, mean, median and mode
        print "Min = %.3f" % np.amin(X)
        print "Max = %.3f" % np.amax(X)
        print "Mean = %.3f" % np.mean(X)
        print "Median = %.3f" % np.median(X)
        print "Mode = %.3f" % utils.mode(X)

        # part 2: quantiles
        print "10th quantile = %.3f" % np.percentile(X, 10)
        print "25th quantile = %.3f" % np.percentile(X, 25)
        print "50th quantile = %.3f" % np.percentile(X, 50)
        print "75th quantile = %.3f" % np.percentile(X, 75)
        print "90th quantile = %.3f" % np.percentile(X, 90)
Example #16
0
def main():
    args = parse_arguments()
    hidden_size = 512
    embed_size = 256
    assert torch.cuda.is_available()

    # visdom for plotting
    vis_g = VisdomWriter("Generator Loss",
                         xlabel='Iteration', ylabel='Loss')
    vis_d = VisdomWriter("Negative Discriminator Loss",
                         xlabel='Iteration', ylabel='Loss')

    print("[!] preparing dataset...")
    train_iter, val_iter, test_iter, DE, EN = load_dataset(args.batch_size)
    de_size, en_size = len(DE.vocab), len(EN.vocab)
    print("de_vocab_size: %d en_vocab_size: %d" % (de_size, en_size))

    print("[!] Instantiating models...")
    encoder = Encoder(de_size, embed_size, hidden_size,
                      n_layers=2, dropout=0.5)
    decoder = Decoder(embed_size, hidden_size, en_size,
                      n_layers=1, dropout=0.5)
    G = Seq2Seq(encoder, decoder).cuda()
    D = Discriminator(en_size, embed_size, hidden_size).cuda()
    optimizer_D = optim.Adam(D.parameters(), lr=2e-4, betas=(0.5, 0.9))
    optimizer_G = optim.Adam(G.parameters(), lr=1e-4, betas=(0.5, 0.9))
    # TTUR paper https://arxiv.org/abs/1706.08500

    # pretrained
    # G.load_state_dict(torch.load("./.tmp/21.pt"))

    curriculum = 1
    dis_loss = []
    gen_loss = []
    for e in range(1, args.epochs+1):
        # Training
        for b, batch in enumerate(train_iter):
            src, len_src = batch.src
            trg, len_trg = batch.trg
            src, trg = src.cuda(), trg.cuda()
            # (1) Update D network
            enable_gradients(D)
            disable_gradients(G)
            G.eval()
            D.train()
            # clamp parameters to a cube
            for p in D.parameters():
                p.data.clamp_(-0.01, 0.01)
            D.zero_grad()
            loss_d = D_loss(D, G, src, trg, args.lamb, curriculum)
            loss_d.backward()
            optimizer_D.step()
            dis_loss.append(loss_d.data[0])
            # (2) Update G network
            if b % 10 == 0:
                enable_gradients(G)
                disable_gradients(D)
                D.eval()
                G.train()
                G.zero_grad()
                loss_g = G_loss(D, G, src, trg, curriculum)
                loss_g.backward()
                optimizer_G.step()
                gen_loss.append(loss_g.data[0])
            # plot losses
            if b % 10 == 0 and b > 1:
                vis_d.update(-loss_d.data[0])
                vis_g.update(loss_g.data[0])
        if e % 10 == 0 and e > 1:
            ce_loss = evaluate(e, G, val_iter, en_size, DE, EN, curriculum)
            print(ce_loss)
        if e % 100 == 0 and e > 1:
            curriculum += 1
Example #17
0
    parser.add_argument("--verbose", default=1, type=int)
    parser.add_argument("--evaluate", default=1, type=int)
    parser.add_argument("--glovefile", default="data/glove.6B.300d.txt", type=str)
    args = parser.parse_args()

    w2v = args.vectorization_method
    PoS = args.PoS_method
    NER = args.NER_method
    regressor = args.regressor

    if w2v == "glove":
        _define_global(args.glovefile)

    if args.evaluate:

        X, y = load_dataset(args.training_set, args.verbose)

        distance_estimator = _build_distance_estimator(X, y, w2v, PoS, NER, regressor, verbose=1)

        pickle.dump(distance_estimator, open("traning_distance_model.pickle", "wb"), protocol=pickle.HIGHEST_PROTOCOL)

        score = dict()
        X_test, y_test = load_dataset(args.test_set_headlines, verbose=1)
        score["headlines_score"] = sts_score(distance_estimator, X_test, y_test)
        X_test, y_test = load_dataset(args.test_set_images, verbose=1)
        score["images_score"] = sts_score(distance_estimator, X_test, y_test)
        X_test, y_test = load_dataset(args.test_set_answers_students, verbose=1)
        score["answers_students_score"] = sts_score(distance_estimator, X_test, y_test)

        if args.verbose == 1:
            print score
Example #18
0
def train(data_path, *, 
    base_output_path="models",
    run_name=None, 
    data_name=None,
    net_name="leap_cnn",
    clean=False, 
    box_dset="box", 
    confmap_dset="confmaps", 
    val_size=0.15, 
    preshuffle=True,
    filters=64, 
    rotate_angle=15, 
    epochs=100, 
    batch_size=32, 
    batches_per_epoch=50, 
    val_batches_per_epoch=10, 
    viz_idx=0, 
    reduce_lr_factor=0.1, 
    reduce_lr_patience=3, 
    reduce_lr_min_delta=1e-5, 
    reduce_lr_cooldown=0, 
    reduce_lr_min_lr=1e-10):
    """
    Trains the network and saves the intermediate results to an output directory.

    :param data_path: Path to an HDF5 file with box and confmaps datasets
    :param base_output_path: Path to folder in which the run data folder will be saved
    :param run_name: Name of the training run. If not specified, will be formatted according to other parameters.
    :param data_name: Name of the dataset for use in formatting run_name
    :param net_name: Name of the network for use in formatting run_name
    :param clean: If True, deletes the contents of the run output path
    :param box_dset: Name of the box dataset in the HDF5 data file
    :param confmap_dset: Name of the confidence maps dataset in the HDF5 data file
    :param preshuffle: If True, shuffle prior to splitting the dataset, otherwise validation set will be the last frames
    :param val_size: Fraction of dataset to use as validation
    :param filters: Number of filters to use as baseline (see create_model)
    :param rotate_angle: Images will be augmented by rotating by +-rotate_angle
    :param epochs: Number of epochs to train for
    :param batch_size: Number of samples per batch
    :param batches_per_epoch: Number of batches per epoch (validation is evaluated at the end of the epoch)
    :param val_batches_per_epoch: Number of batches for validation
    :param viz_idx: Index of the sample image to use for visualization
    :param reduce_lr_factor: Factor to reduce the learning rate by (see ReduceLROnPlateau)
    :param reduce_lr_patience: How many epochs to wait before reduction (see ReduceLROnPlateau)
    :param reduce_lr_min_delta: Minimum change in error required before reducing LR (see ReduceLROnPlateau)
    :param reduce_lr_cooldown: How many epochs to wait after reduction before LR can be reduced again (see ReduceLROnPlateau)
    :param reduce_lr_min_lr: Minimum that the LR can be reduced down to (see ReduceLROnPlateau)
    """

    # Load
    box, confmap = load_dataset(data_path, X_dset=box_dset, Y_dset=confmap_dset)
    viz_sample = (box[viz_idx], confmap[viz_idx])
    box, confmap, val_box, val_confmap, train_idx, val_idx = train_val_split(box, confmap, val_size=val_size, shuffle=preshuffle)

    # Pull out metadata
    img_size = box.shape[1:]
    num_output_channels = confmap.shape[-1]
    print("img_size:", img_size)
    print("num_output_channels:", num_output_channels)

    # Build run name if needed
    if data_name == None:
        data_name = os.path.splitext(os.path.basename(data_path))[0]
    if run_name == None:
        # Ex: "WangMice-DiegoCNN_v1.0_filters=64_rot=15_lrfactor=0.1_lrmindelta=1e-05"
        run_name = "%s-%s_filters=%d_rot=%d_lrfactor=%.1f_lrmindelta=%g" % (data_name, net_name, filters, rotate_angle, reduce_lr_factor, reduce_lr_min_delta)
    print("data_name:", data_name)
    print("run_name:", run_name)

    # Create network
    model = create_model(net_name, img_size, num_output_channels, filters=filters, summary=True)
    if model == None:
        print("Could not find model:", net_name)
        return

    # Initialize run directories
    run_path = create_run_folders(run_name, base_path=base_output_path, clean=clean)
    savemat(os.path.join(run_path, "training_info.mat"), 
            {"data_path": data_path, "val_idx": val_idx, "train_idx": train_idx,
             "base_output_path": base_output_path, "run_name": run_name, "data_name": data_name,
             "net_name": net_name, "clean": clean, "box_dset": box_dset, "confmap_dset": confmap_dset,
             "preshuffle": preshuffle, "val_size": val_size, "filters": filters, "rotate_angle": rotate_angle,
             "epochs": epochs, "batch_size": batch_size, "batches_per_epoch": batches_per_epoch,
             "val_batches_per_epoch": val_batches_per_epoch, "viz_idx": viz_idx, "reduce_lr_factor": reduce_lr_factor,
             "reduce_lr_patience": reduce_lr_patience, "reduce_lr_min_delta": reduce_lr_min_delta,
             "reduce_lr_cooldown": reduce_lr_cooldown, "reduce_lr_min_lr": reduce_lr_min_lr})

    # Save initial network
    model.save(os.path.join(run_path, "initial_model.h5"))
    input_layers = [x.name for x in model.input_layers]
    output_layers = [x.name for x in model.output_layers]

    # Data augmentation
    if len(input_layers) > 1 or len(output_layers) > 1:
        train_datagen = MultiInputOutputPairedImageAugmenter(input_layers, output_layers, box, confmap, batch_size=batch_size, shuffle=True, theta=(-rotate_angle, rotate_angle))
        val_datagen = MultiInputOutputPairedImageAugmenter(input_layers, output_layers, val_box, val_confmap, batch_size=batch_size, shuffle=True, theta=(-rotate_angle, rotate_angle))
    else:
        train_datagen = PairedImageAugmenter(box, confmap, batch_size=batch_size, shuffle=True, theta=(-rotate_angle, rotate_angle))
        val_datagen = PairedImageAugmenter(val_box, val_confmap, batch_size=batch_size, shuffle=True, theta=(-rotate_angle, rotate_angle))

    # Initialize training callbacks
    history_callback = LossHistory(run_path=run_path)
    reduce_lr_callback = ReduceLROnPlateau(monitor="val_loss", factor=reduce_lr_factor, 
                                          patience=reduce_lr_patience, verbose=1, mode="auto",
                                          epsilon=reduce_lr_min_delta, cooldown=reduce_lr_cooldown,
                                          min_lr=reduce_lr_min_lr)
    checkpointer = ModelCheckpoint(filepath=os.path.join(run_path, "weights/weights.{epoch:03d}-{val_loss:.9f}.h5"), verbose=1, save_best_only=False)
    viz_grid_callback = LambdaCallback(on_epoch_end=lambda epoch, logs: show_confmap_grid(model, *viz_sample, plot=True, save_path=os.path.join(run_path, "viz_confmaps/confmaps_%03d.png" % epoch), show_figure=False))
    viz_pred_callback = LambdaCallback(on_epoch_end=lambda epoch, logs: show_pred(model, *viz_sample, save_path=os.path.join(run_path, "viz_pred/pred_%03d.png" % epoch), show_figure=False))
    
    # Train!
    epoch0 = 0
    t0_train = time()
    training = model.fit_generator(
            train_datagen,
            initial_epoch=epoch0,
            epochs=epochs,
            verbose=1,
    #         use_multiprocessing=True,
    #         workers=8,
            steps_per_epoch=batches_per_epoch,
            max_queue_size=512,
            shuffle=False,
            validation_data=val_datagen,
            validation_steps=val_batches_per_epoch,
            callbacks = [
                reduce_lr_callback,
                checkpointer,
                history_callback,
                viz_pred_callback,
                viz_grid_callback
            ]
        )

    # Compute total elapsed time for training
    elapsed_train = time() - t0_train
    print("Total runtime: %.1f mins" % (elapsed_train / 60))
        
    # Save final model
    model.history = history_callback.history
    model.save(os.path.join(run_path, "final_model.h5"))
def f(l):
    def g(w):
        return 1 / 2 * w**2 - 2 * w + 5 / 2 + l * abs(w)**(1 / 2)

    return g


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-q', '--question', required=True)
    io_args = parser.parse_args()
    question = io_args.question

    if question == "2":
        data = utils.load_dataset("logisticData")
        XBin, yBin = data['X'], data['y']
        XBinValid, yBinValid = data['Xvalid'], data['yvalid']

        model = linear_model.logReg(maxEvals=400)
        model.fit(XBin, yBin)

        print("\nlogReg Training error %.3f" %
              utils.classification_error(model.predict(XBin), yBin))
        print("logReg Validation error %.3f" %
              utils.classification_error(model.predict(XBinValid), yBinValid))
        print("# nonZeros: %d" % (model.w != 0).sum())

    elif question == "2.1":
        data = utils.load_dataset("logisticData")
        XBin, yBin = data['X'], data['y']
Example #20
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--batch_size', default=100)
    # XXX using sample size of one
    parser.add_argument('--nlayers', default=1, type=int, help='number of hidden layers in MLP before output layers')
    parser.add_argument('--hdim', default=500, type=int, help='dimension of hidden layer')
    parser.add_argument('--zdim', default=2, type=int, help='dimension of continuous latent variable')
    parser.add_argument('--lmbda', default=0.001, type=float, help='weight decay coefficient')
    parser.add_argument('--lr', default=0.01, type=float, help='learning rate')
    parser.add_argument('--epochs', default=1000, type=int, help='number of passes over dataset')
    parser.add_argument('--print_every', default=100, type=int, help='how often to print cost')
    parser.add_argument('--save_every', default=1, type=int, help='how often to save model (in terms of epochs)')
    parser.add_argument('--outfile', default='vae_model.pk', help='output file to save model to')
    parser.add_argument('--dset', default='mnist', choices=['mnist'],
            help='dataset to use')
    args = parser.parse_args()
    print(args)

    # run SGVB algorithm

    # N x d
    data = load_dataset(dset=args.dset)
    train_x, train_y = data['train']
    #print(train_x[0, :])  # values in [0, 1]
    #print(train_y[0:10])  # seems to already be shuffled
    valid_x, valid_y = data['valid']

    decs = {'mnist': 'bernoulli'}
    model = VAE(train_x.shape[1], args, dec=decs[args.dset])

    expcost = None
    num_train_batches = train_x.shape[0] / args.batch_size
    num_valid_batches = valid_x.shape[0] / args.batch_size
    valid_freq = num_train_batches

    for b in xrange(args.epochs * num_train_batches):
        k = b % num_train_batches
        x = train_x[k * args.batch_size:(k + 1) * args.batch_size, :]
        eps = np.random.randn(x.shape[0], args.zdim).astype(floatX)
        cost = model.train(x, eps)
        if not expcost:
            expcost = cost
        else:
            expcost = 0.01 * cost + 0.99 * expcost
        if (b + 1) % args.print_every == 0:
            print('iter %d, cost %f, expcost %f' % (b + 1, cost, expcost))
        if (b + 1) % valid_freq == 0:
            valid_cost = 0
            for l in xrange(num_valid_batches):
                x_val = valid_x[l * args.batch_size:(l + 1) * args.batch_size, :]
                eps_val = np.zeros((x_val.shape[0], args.zdim), dtype=floatX)
                valid_cost = valid_cost + model.test(x_val, eps_val)
            valid_cost = valid_cost / num_valid_batches
            print('valid cost: %f' % valid_cost)
        if (b + 1) % (num_train_batches * args.save_every) == 0:
            print('saving model')
            with open(args.outfile, 'wb') as f:
                pickle.dump(model, f, protocol=pickle.HIGHEST_PROTOCOL)

    # XXX just pickling the entire model for now
    print('saving final model')
    with open(args.outfile, 'wb') as f:
        pickle.dump(model, f, protocol=pickle.HIGHEST_PROTOCOL)
Example #21
0
def main(argv):
    del argv  # unused arg
    tf.enable_v2_behavior()

    dataset_train, ds_info = utils.load_dataset(tfds.Split.TRAIN,
                                                with_info=True)
    dataset_test = utils.load_dataset(tfds.Split.TEST)
    dataset_train = dataset_train.batch(FLAGS.batch_size)
    dataset_test = dataset_test.batch(FLAGS.batch_size)

    model = deterministic.resnet_v1(
        input_shape=ds_info.features['image'].shape,
        depth=20,
        num_classes=ds_info.features['label'].num_classes,
        l2=0.)
    logging.info('Model input shape: %s', model.input_shape)
    logging.info('Model output shape: %s', model.output_shape)
    logging.info('Model number of weights: %s', model.count_params())

    # Search for checkpoints from their index file; then remove the index suffix.
    ensemble_filenames = tf.io.gfile.glob(
        os.path.join(FLAGS.output_dir, '**/*.ckpt.index'))
    ensemble_filenames = [filename[:-6] for filename in ensemble_filenames]
    ensemble_size = len(ensemble_filenames)
    logging.info('Ensemble size: %s', ensemble_size)
    logging.info('Ensemble number of weights: %s',
                 ensemble_size * model.count_params())
    logging.info('Ensemble filenames: %s', str(ensemble_filenames))

    # Collect the logits output for each ensemble member and train/test data
    # point. We also collect the labels.
    # TODO(trandustin): Refactor data loader so you can get the full dataset in
    # memory without looping.
    logits_train = []
    logits_test = []
    labels_train = []
    labels_test = []
    for m, ensemble_filename in enumerate(ensemble_filenames):
        model.load_weights(ensemble_filename)
        logits = []
        for features, labels in dataset_train:
            logits.append(model(features, training=False))
            if m == 0:
                labels_train.append(labels)

        logits = tf.concat(logits, axis=0)
        logits_train.append(logits)
        if m == 0:
            labels_train = tf.concat(labels_train, axis=0)

        logits = []
        for features, labels in dataset_test:
            logits.append(model(features, training=False))
            if m == 0:
                labels_test.append(labels)

        logits = tf.concat(logits, axis=0)
        logits_test.append(logits)
        if m == 0:
            labels_test = tf.concat(labels_test, axis=0)
        logging.info('Predictions completed for checkpoint %s',
                     ensemble_filename)

    metrics = {}

    # Compute the ensemble's NLL and Gibbs cross entropy for each data point.
    # Then average over the dataset.
    nll_train = ensemble_negative_log_likelihood(labels_train, logits_train)
    nll_test = ensemble_negative_log_likelihood(labels_test, logits_test)
    gibbs_ce_train = gibbs_cross_entropy(labels_train, logits_train)
    gibbs_ce_test = gibbs_cross_entropy(labels_test, logits_test)
    metrics['train_nll'] = tf.reduce_mean(nll_train)
    metrics['test_nll'] = tf.reduce_mean(nll_test)
    metrics['train_gibbs_cross_entropy'] = tf.reduce_mean(gibbs_ce_train)
    metrics['test_gibbs_cross_entropy'] = tf.reduce_mean(gibbs_ce_test)

    # Given the per-element logits tensor of shape [ensemble_size, dataset_size,
    # num_classes], average over the ensemble members' probabilities. Then
    # compute accuracy and average over the dataset.
    probs_train = tf.reduce_mean(tf.nn.softmax(logits_train), axis=0)
    probs_test = tf.reduce_mean(tf.nn.softmax(logits_test), axis=0)
    accuracy_train = tf.keras.metrics.sparse_categorical_accuracy(
        labels_train, probs_train)
    accuracy_test = tf.keras.metrics.sparse_categorical_accuracy(
        labels_test, probs_test)
    metrics['train_accuracy'] = tf.reduce_mean(accuracy_train)
    metrics['test_accuracy'] = tf.reduce_mean(accuracy_test)
    logging.info('Metrics: %s', metrics)
Example #22
0
import os

if __name__ == "__main__":
    argv = sys.argv[1:]

    parser = argparse.ArgumentParser()
    parser.add_argument('-q',
                        '--question',
                        required=True,
                        choices=["2.1", "2.2", "3.1", "4.1", "4.3"])
    io_args = parser.parse_args()
    question = io_args.question

    if question == "2.1":
        # Load the data in the form of dictionary
        data = utils.load_dataset("basisData")
        X = data['X']
        y = data['y']
        Xtest = data['Xtest']
        ytest = data['ytest']

        # get the number of rows(n) and columns(d)
        n, d = X.shape
        t = Xtest.shape[0]

        # Fit least-squares model
        model = linear_model.LeastSquares()
        model.fit(X, y)

        # Compute training error
        yhat = model.predict(X)
Example #23
0
File: term.py Project: sangyf/cute
    if len(args) == 0:
        print_usage()
        sys.exit(-1)

    separator = "|"
    length_threshold = 4
    payload_max_length = 50
    prune = False
    count = False

    for opt, value in opts:
        if opt == "-t":
            separator = value
        elif opt == "-l":
            length_threshold = int(value)
        elif opt == "-p":
            payload_max_length = int(value)
        elif opt == "-x":
            prune = True
        elif opt == "-c":
            count = True

    if prune:
        TermFrequencyUtils.prune_terms(args[0])
    elif count:
        TermFrequencyUtils.multiply_by_count(args[0], args[1])
    else:
        dataset = utils.load_dataset(args[0])
        tf = TermFrequencyUtils.find_common_term_frequencies(dataset, payload_max_length, length_threshold)
        TermFrequencyUtils.serialize_term_frequencies(tf)
Example #24
0
def main():
    # Set hyper-parameters.
    batch_size = 32
    epochs = 100
    model_path = 'atmodel.h5'
    enc_arch = 'encoder.json'
    dec_arch = 'decoder.json'
    data_path = '../data/w16to19abeconv.txt'
    num_words = 10000
    num_data = 12755

    # Data loading.
    en_texts, ja_texts = load_dataset(data_path)
    en_texts, ja_texts = en_texts[:num_data], ja_texts[:num_data]

    # Preprocessings.
    #ja_texts = preprocess_ja(ja_texts)
    ja_texts = preprocess_dataset(ja_texts)
    en_texts = preprocess_dataset(en_texts)
    x_train, x_test, y_train, y_test = train_test_split(en_texts,
                                                        ja_texts,
                                                        test_size=0.2,
                                                        random_state=42)

    en_vocab = build_vocabulary(x_train, num_words)
    ja_vocab = build_vocabulary(y_train, num_words)
    print(x_train[:3])
    print(y_train[:3])
    x_train, y_train = create_dataset(x_train, y_train, en_vocab, ja_vocab)

    print(en_vocab.word_index)
    print(ja_vocab.word_index)

    # Build a simple model.
    encoder = Encoder(num_words)
    decoder = Decoder(num_words)
    # Build an attention model.
    #encoder = Encoder(num_words, return_sequences=True)
    #decoder = AttentionDecoder(num_words)
    seq2seq = Seq2seq(encoder, decoder)
    model = seq2seq.build()
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

    # Train the model.
    callbacks = [
        EarlyStopping(patience=10),
        ModelCheckpoint(model_path,
                        save_best_only=True,
                        save_weights_only=True)
    ]
    """
    model.fit(x=x_train,
              y=y_train,
              batch_size=batch_size,
              epochs=epochs,
              callbacks=callbacks,
              validation_split=0.1)"""
    encoder.save_as_json(enc_arch)
    decoder.save_as_json(dec_arch)

    # Inference.
    encoder = Encoder.load(enc_arch, model_path)
    decoder = Decoder.load(dec_arch, model_path)
    api = InferenceAPI(encoder, decoder, en_vocab, ja_vocab)
    #api = InferenceAPIforAttention(encoder, decoder, en_vocab, ja_vocab)
    texts = sorted(set(en_texts[:50]), key=len)
    texts = ["お聞きしたいと思います", "さっき の 答弁 全く 納得 できません", "全く 納得 い き ません", "ありがとうございました", "おはようございます",\
            "よろしいでしょうか", "是非 よろしくお願いいたします", "もう少し 具体的に 教えて いただける と 助 か る んですけれども", "ちょっと 待 って", "質問 主 意 書 では 当然 混 同 は しておりません",\
            "正 式 な 要求 でいい んですか", "時間ですので まとめて ください", "ちょっと 静粛に お願いします", "よろしいですか", "静粛に お願いします",\
            "答弁 を まとめて ください", "時間 ですから", "驚 き の答弁 ですね", "それは いつ ごろ でしょうか", "そのとおり です"
    ]
    for text in texts:
        decoded = api.predict(text=text)
        print('入力: {}'.format(text))
        print('応答: {}'.format(decoded))

    y_test = [y.split(' ')[1:-1] for y in y_test]
    bleu_score = evaluate_bleu(x_test, y_test, api)
    print('BLEU: {}'.format(bleu_score))
Example #25
0
def main():
    # Get arguments
    args = parse_args()

    # Set random seed
    torch.manual_seed(args.seed)

    # Cuda
    use_cuda = False
    if torch.cuda.is_available():
        if not args.cuda:
            print("WARNING: You have a CUDA device, so you \
            should probably run with --cuda")
        else:
            use_cuda = True
            torch.cuda.manual_seed(args.seed)

    # Load data + text fields
    print('=' * 89)
    train_iter, val_iter, test_iter, SRC, TRG = utils.load_dataset(
        batch_size=args.batch_size,
        use_pretrained_emb=args.pretrained_emb,
        save_dir=SAVE_DIR
    )
    print('=' * 89)

    # Intialize model
    enc = models.EncoderRNN(
        input_size=len(SRC.vocab),
        emb_size=(SRC.vocab.vectors.size(1)
                  if args.pretrained_emb == 'fastText'
                  else args.emb_size),
        embeddings=(SRC.vocab.vectors
                    if args.pretrained_emb == 'fastText'
                    else None),
        max_norm=args.emb_maxnorm,
        padding_idx=SRC.vocab.stoi['<pad>'],
        hidden_size=args.hidden_size,
        num_layers=args.num_layers,
        dropout=args.dropout,
        bidirectional=args.bidirectional
    )
    decoder = models.AttnDecoderRNN if args.attention else models.DecoderRNN
    dec = decoder(
        enc_num_directions=enc.num_directions,
        enc_hidden_size=args.hidden_size,
        use_context=args.use_context,
        input_size=len(TRG.vocab),
        emb_size=(TRG.vocab.vectors.size(1)
                  if args.pretrained_emb
                  else args.emb_size),
        embeddings=(TRG.vocab.vectors
                    if args.pretrained_emb
                    else None),
        max_norm=args.emb_maxnorm,
        padding_idx=TRG.vocab.stoi['<pad>'],
        hidden_size=args.hidden_size,
        num_layers=args.num_layers,
        dropout=args.dropout,
        bidirectional=False # args.bidirectional
    )
    model = models.Seq2Seq(enc, dec, use_cuda=use_cuda)
    if use_cuda:
        model.cuda()
    print(model)

    # Intialize loss
    criterion = torch.nn.CrossEntropyLoss(
        ignore_index=TRG.vocab.stoi["<pad>"])

    # Create optimizer
    if args.optimizer == 'Adam':
        optim = torch.optim.Adam
    elif args.optimizer == 'Adadelta':
        optim = torch.optim.Adadelta
    elif args.optimizer == 'Adagrad':
        optim = torch.optim.Adagrad
    else:
        optim = torch.optim.SGD
    optimizer = optim(model.parameters(), lr=args.lr)

    # Create scheduler
    lambda_lr = lambda epoch: 0.5 if epoch > 8 else 1
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda_lr)

    # Train
    best_val_loss = None
    fname = './{}/{}.pt'.format(SAVE_DIR, args.save)

    print('=' * 89)
    try:
        for epoch in range(1, args.epochs+1):
            epoch_start_time = time.time()

            attns = train(epoch, model, train_iter, criterion, optimizer,
                  use_cuda, args, SRC, TRG)
            val_loss = evaluate(model, val_iter, criterion, use_cuda)

            # Log results
            print('-' * 89)
            print('| end of epoch {:3d} | time: {:5.2f}s '
                  '| valid loss {:5.2f} | valid ppl {:8.2f}'.format(
                      epoch, (time.time() - epoch_start_time),
                      val_loss, math.exp(val_loss)))
            print('-' * 89)

            # Save the model if validation loss is best we've seen so far
            if not best_val_loss or val_loss < best_val_loss:
                if not os.path.isdir(SAVE_DIR):
                    os.makedirs(SAVE_DIR)
                torch.save(model, fname)
                best_val_loss = val_loss

            # Anneal learning rate
            scheduler.step()
    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')

    # Load the best saved model
    with open(fname, 'rb') as f:
        model = torch.load(f)

    # Run on test data
    test_loss = evaluate(model, test_iter, criterion, use_cuda)

    # Log results
    print('=' * 89)
    print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
        test_loss, math.exp(test_loss)))
    print('=' * 89)
Example #26
0
    parser.add_argument("--glovefile",
                        default='data/glove.6B.300d.txt',
                        type=str)
    args = parser.parse_args()

    w2v = args.vectorization_method
    PoS = args.PoS_method
    NER = args.NER_method
    regressor = args.regressor

    if w2v == 'glove':
        _load_glove(args.glovefile, verbose=args.verbose)

    if args.evaluate:
        if args.training_estimator is None:
            X, y = load_dataset(args.training_set, args.verbose)

            distance_estimator = _build_distance_estimator(X,
                                                           y,
                                                           w2v,
                                                           PoS,
                                                           NER,
                                                           regressor,
                                                           verbose=1)

            pickle.dump(distance_estimator,
                        open("traning_distance_model" + regressor + ".pickle",
                             "wb"),
                        protocol=pickle.HIGHEST_PROTOCOL)
        else:
            distance_estimator = pickle.load(
 def __init__(self, path):
     self.meta, self.elems = load_dataset(path)
     self.samples = self._create_samples()
Example #28
0
def main():
    parser = argparse.ArgumentParser(description='Graphs')
    parser.add_argument(
        '-p', dest='pickle_folder',
        default='./out_pickles')
    parser.add_argument('-d', dest='dataset', required=True,
                        choices=['adult', 'recidivism', 'lending'],
                        help='dataset to use')
    parser.add_argument('-m', dest='model', required=True,
                        choices=['xgboost', 'logistic', 'nn'],
                        help='model: xgboost, logistic or nn')
    parser.add_argument(
        '-o', dest='output_folder',
        default='./results')

    args = parser.parse_args()
    dataset = utils.load_dataset(args.dataset, balance=True)
    dataset_name = args.dataset
    algorithm = args.model
    z_anchor = pickle.load(
        open(os.path.join(args.pickle_folder, '%s-anchor-%s' % (
            dataset_name, algorithm))))
    z_lime = pickle.load(
        open(os.path.join(args.pickle_folder, '%s-lime-%s' % (
            dataset_name, algorithm))))
    preds_validation = z_anchor['model'].predict(
        z_anchor['encoder'].transform(
            dataset.data[z_anchor['validation_idx']]))
    preds_test = z_anchor['model'].predict(
        z_anchor['encoder'].transform(
            dataset.data[z_anchor['test_idx']]))
    ret = {}
    ret['accuracy'] = sklearn.metrics.accuracy_score(
        dataset.labels[z_anchor['test_idx']], preds_test)
    print('accuracy', ret['accuracy'])

    print('Lime weights')
    val_weights, val_vals = utils.compute_lime_weight_vals(
        z_lime['exps'], dataset.data[z_lime['validation_idx']],
        dataset.data[z_lime['validation_idx']])

    print('Submodular anchor')
    picked, precs, recs = submodular_anchor_precrecall(
        z_anchor, dataset, preds_validation, preds_test, 10)
    ret['anchor_submodular'] = (picked, precs, recs)
    anchor_prec = precs[-1]

    print('Submodular lime pred')
    picked, precs, recs, t1, t2 = submodular_lime_precrecall(
        z_lime, dataset, preds_validation, preds_test, 10, val_weights,
        val_vals, desired_precision=anchor_prec, to_change='pred',
        verbose=True)

    ret['lime_pred_submodular'] = (picked, precs, recs)
    ret['lime_pred_submodular_threshold'] = t2

    print('Random anchor')
    (prec, cov, prec_std, cov_std) = random_anchor_precrecall(
        z_anchor, dataset, preds_validation, preds_test, 1, do_all=True)
    ret['anchor_1'] = (prec, cov, prec_std, cov_std)

    print('Random lime')
    (prec, cov, prec_std, cov_std, _, _) = random_lime_precrecall(
        z_lime, dataset, preds_validation, preds_test, k=1,
        desired_precision=0.0, to_change='distance', verbose=True,
        do_all=True)
    ret['lime_naive_1'] = (prec, cov, prec_std, cov_std)

    # print('Distance random lime')
    # (prec, cov, prec_std, cov_std, t1, t2) = random_lime_precrecall(
    #     z_lime, dataset, preds_validation, preds_test, k=1,
    #     desired_precision=0.0, to_change='distance', verbose=True,
    #     do_all=True, threshold=ret['lime_distance_submodular_threshold'])
    # ret['lime_distance_1'] = (prec, cov, prec_std, cov_std)
    # ret['lime_distance_1_threshold'] = t1

    print('Pred random lime')
    (prec, cov, prec_std, cov_std, t1, t2) = random_lime_precrecall(
        z_lime, dataset, preds_validation, preds_test, k=1,
        desired_precision=0.0, to_change='pred', verbose=True,
        do_all=True, pred_threshold=ret['lime_pred_submodular_threshold'])
    ret['lime_pred_1'] = (prec, cov, prec_std, cov_std)
    ret['lime_pred_1_threshold'] = t2

    def random_fn_lime(k):
        return random_lime_precrecall(
            z_lime, dataset, preds_validation, preds_test, k=k,
            desired_precision=0.0, to_change='pred', verbose=True,
            do_all=False,
            pred_threshold=ret['lime_pred_submodular_threshold'])[:4]

    def random_fn_anchor(k):
        return random_anchor_precrecall(
            z_anchor, dataset, preds_validation, preds_test, k, do_all=False)

    ret['anchor_random'] = random_until_k(random_fn_anchor, 10)
    ret['lime_pred_random'] = random_until_k(random_fn_lime, 10)

    path = os.path.join(args.output_folder, '%s-%s.pickle' % (
        dataset_name, algorithm))

    pickle.dump(ret, open(path, 'w'))
import numpy as np

from utils import load_dataset, im_convert

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# load model
path = 'D:/model'
model = torch.load(path)

# classes
classes = ('ant', 'bee')

# load data
data_path = 'ants_and_bees'
_, validation_loader = load_dataset(data_path)

dataiter = iter(validation_loader)
images, labels = dataiter.next()
images = images.to(device)
labels = labels.to(device)
output = model(images)
_, preds = torch.max(output, 1)

fig = plt.figure(figsize=(25, 4))

for idx in np.arange(20):
    ax = fig.add_subplot(2, 10, idx + 1, xticks=[], yticks=[])
    plt.imshow(im_convert(images[idx]))
    ax.set_title("{} ({})".format(str(classes[preds[idx].item()]),
                                  str(classes[labels[idx].item()])),
    args = parser.parse_args()

    logging.basicConfig(level=logging.INFO)

    with open(os.path.join(args.dsave, 'config.json')) as f:
        j = json.load(f)
        args_save = Namespace(**j)
        print('args_save', type(args_save))
        args_save.gpu = args.gpu
        args_save.forward_pass_time = args.forward_pass_time
        args_save.batch_size = args.batch_size
        args_save.old_encoder = args.old_encoder
    pprint(args_save)

    dataset, ontology, vocab, Eword = load_dataset(args.dataset)

    model = load_model(args_save.model, args_save, ontology, vocab)
    model.load_best_save(directory=args.dsave)
    if args.gpu is not None:
        model.cuda(args.gpu)

    print(dataset.keys())
    if args.split not in dataset.keys():
        print(splits + ' file not found')

    #dataset[args.split].dialogues = dataset[args.split].dialogues[:1117]
    logging.info('Making predictions for {} dialogues and {} turns'.format(
        len(dataset[args.split]), len(list(dataset[args.split].iter_turns()))))
    start = time.time()
    preds, attention_best_pass, most_attentive_arc_weights, all_attention_arcs, padded_confnet_words = model.run_pred(
        '''
        # TODO: Update with actual prediction logic
        N = user_id_N.size
        yhat_N = ag_np.ones(N)
        return yhat_N

    def calc_loss_wrt_parameter_dict(self, param_dict, data_tuple):
        ''' Compute loss at given parameters

        Args
        ----
        param_dict : dict
            Keys are string names of parameters
            Values are *numpy arrays* of parameter values

        Returns
        -------
        loss : float scalar
        '''
        # TODO compute loss
        y_N = data_tuple[2]
        yhat_N = self.predict(data_tuple[0], data_tuple[1], **param_dict)
        loss_total = 0.0
        return loss_total


if __name__ == '__main__':
    train_tuple, valid_tuple, test_tuple, n_users, n_items = load_dataset()
    model = CollabFilterMeanOnly(n_epochs=50)
    model.init_parameter_dict(n_users, n_items, train_tuple)
    model.fit(train_tuple, valid_tuple)
def main(dataset_name, disease_label, evaluated_dataset):
    """Calculate the performance of the classifier in each iteration of the bootstrap method."""
    # ----------------------------------------------------------------------------
    n_bootstrap = 1000

    participants_path = PROJECT_ROOT / 'data' / evaluated_dataset / 'participants.tsv'
    freesurfer_path = PROJECT_ROOT / 'data' / evaluated_dataset / 'freesurferData.csv'

    outputs_dir = PROJECT_ROOT / 'outputs'
    ids_path = outputs_dir / (evaluated_dataset + '_homogeneous_ids.csv')

    hc_label = 1

    # ----------------------------------------------------------------------------
    # Set random seed
    random_seed = 42
    np.random.seed(random_seed)
    rn.seed(random_seed)

    classifier_dir = PROJECT_ROOT / 'outputs' / 'classifier_analysis'
    classifier_dataset_dir = classifier_dir / dataset_name
    classifier_dataset_analysis_dir = classifier_dataset_dir / '{:02d}_vs_{:02d}'.format(
        hc_label, disease_label)

    classifier_storage_dir = classifier_dataset_analysis_dir / 'models'
    generalization_dir = classifier_dataset_analysis_dir / 'generalization'
    generalization_dir.mkdir(exist_ok=True)

    evaluated_dataset_df = load_dataset(participants_path, ids_path,
                                        freesurfer_path)

    aucs_test = []
    # ----------------------------------------------------------------------------
    for i_bootstrap in tqdm(range(n_bootstrap)):
        rvm = load(classifier_storage_dir /
                   '{:03d}_rvr.joblib'.format(i_bootstrap))
        scaler = load(classifier_storage_dir /
                      '{:03d}_scaler.joblib'.format(i_bootstrap))

        x_data = evaluated_dataset_df[COLUMNS_NAME].values

        tiv = evaluated_dataset_df['EstimatedTotalIntraCranialVol'].values
        tiv = tiv[:, np.newaxis]

        x_data = (np.true_divide(x_data, tiv)).astype('float32')

        x_data = np.concatenate(
            (x_data[evaluated_dataset_df['Diagn'] == hc_label],
             x_data[evaluated_dataset_df['Diagn'] == disease_label]),
            axis=0)

        y_data = np.concatenate(
            (np.zeros(sum(evaluated_dataset_df['Diagn'] == hc_label)),
             np.ones(sum(evaluated_dataset_df['Diagn'] == disease_label))))

        # Scaling using inter-quartile
        x_data = scaler.transform(x_data)

        pred = rvm.predict(x_data)
        predictions_proba = rvm.predict_proba(x_data)

        auc = roc_auc_score(y_data, predictions_proba[:, 1])

        aucs_test.append(auc)

    aucs_df = pd.DataFrame(columns=['AUCs'], data=aucs_test)
    aucs_df.to_csv(generalization_dir /
                   '{:}_aucs.csv'.format(evaluated_dataset),
                   index=False)

    results = pd.DataFrame(columns=['Measure', 'Value'])
    results = results.append({
        'Measure': 'mean',
        'Value': np.mean(aucs_test)
    },
                             ignore_index=True)
    results = results.append(
        {
            'Measure': 'upper_limit',
            'Value': np.percentile(aucs_test, 97.5)
        },
        ignore_index=True)
    results = results.append(
        {
            'Measure': 'lower_limit',
            'Value': np.percentile(aucs_test, 2.5)
        },
        ignore_index=True)
    results.to_csv(generalization_dir /
                   '{:}_aucs_summary.csv'.format(evaluated_dataset),
                   index=False)
Example #33
0
import torch
import numpy as np
import os
import json
from utils import load_dataset, count_parameters
from config import Config
from tqdm import tqdm
from model import Model

config = Config()
dataset, ontology, vocab = load_dataset()

print('Slots: ', ontology.slots)
slot_dict = {s: {'slot_id': idx} for idx, s in enumerate(ontology.slots)}
for s in ontology.slots:
    if s != 'request':
        slot_dict[s]['values'] = {
            value: {
                'value_id': idx,
                'num': [vocab.word2index(w) for w in value.split()]
            }
            for idx, value in enumerate([config.NONE_TOKEN] +
                                        ontology.values[s])
        }
    else:
        slot_dict[s]['values'] = {
            value: {
                'value_id': idx,
                'num': [vocab.word2index(w) for w in value.split()]
            }
            for idx, value in enumerate(ontology.values[s])
Example #34
0
                            activation=None,
                            kernel_regularizer=l2(1e-5))

    def call(self, x):
        out = self.dense1(x)
        out = self.odeblock(out)
        out = self.dense2(out)
        return out

    def compute_output_shape(self, input_shape):
        return tf.TensorShape([input_shape[0], self.output_dim])


if not os.path.isfile('experiments/datasets/single_pendulum_x_train.npy'):
    x_train, y_train, x_val, y_val = create_dataset()
x_train, y_train, x_val, y_val = load_dataset()
if args.synthetic_derivative:
    y_train = np.gradient(x_train)[1] / 0.01

x_train = np.reshape(x_train, (-1, 2))
y_train = np.reshape(y_train, (-1, 2))
x_val = np.reshape(x_val, (-1, 2))
y_val = np.reshape(y_val, (-1, 2))

c = np.arange(len(x_train))
np.random.shuffle(c)
x_train = x_train[c[::int(100 / args.dataset_size)]]
y_train = y_train[c[::int(100 / args.dataset_size)]]

model = ODENet(hidden_dim=8, output_dim=y_train.shape[-1])
Example #35
0
    parser = ArgumentParser()
    parser.add_argument('dsave', help='save location of model')
    parser.add_argument('--split', help='split to evaluate on', default='test')
    parser.add_argument('--gpu', type=int, help='gpu to use', default=0)
    parser.add_argument('--fout',
                        help='optional save file to store the predictions')
    args = parser.parse_args()

    logging.basicConfig(level=logging.INFO)

    with open(os.path.join(args.dsave, 'config.json')) as f:
        args_save = Namespace(**json.load(f))
        args_save.gpu = args.gpu
    pprint(args_save)

    dataset, ontology, vocab, Eword = load_dataset()

    model = Tracker(args_save.model, args_save, ontology, vocab)
    model.load_best_save(directory=args.dsave)
    if args.gpu is not None:
        model.cuda(args.gpu)

    logging.info('Making predictions for {} dialogues and {} turns'.format(
        len(dataset[args.split]), len(list(dataset[args.split].iter_turns()))))
    preds = model.run_pred(dataset[args.split], args_save)
    pprint(dataset[args.split].evaluate_preds(preds))

    if args.fout:
        with open(args.fout, 'wt') as f:
            # predictions is a list of sets, need to convert to list of lists to make it JSON serializable
            json.dump([list(p) for p in preds], f, indent=2)
Example #36
0
import os
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cross_validation import KFold
from sklearn.cross_validation import cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from utils import CHART_DIR, DATA_DIR, load_dataset

features, labels = load_dataset(
    'seeds.tsv'
)  #had to write custom function to parse the file since it contains float and string data

# initialize a classifier instance
classifier = KNeighborsClassifier(n_neighbors=5,
                                  weights='uniform',
                                  algorithm='auto',
                                  leaf_size=30,
                                  p=2,
                                  metric='minkowski',
                                  metric_params=None)

# compute 10-fold cross-validation
means = []
k = 1
for k in range(1, 20, 2):
    classifier.n_neighbors = k

    # normalize all features to same scale
    classifier = Pipeline([('norm', StandardScaler()), ('knn', classifier)])
Example #37
0
if args.adjoint:
    from tfdiffeq import odeint_adjoint as odeint
else:
    from tfdiffeq import odeint

PLOT_DIR = 'plots/mass_spring_damper/learnedode/'
TIME_OF_RUN = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
device = 'gpu:' + str(args.gpu) if len(gpus) else 'cpu:0'

t = tf.linspace(0., 10., args.data_size)
if args.dtype == 'float64':
    t = tf.cast(t, tf.float64)

if not os.path.isfile('experiments/datasets/mass_spring_damper_x_train.npy'):
    x_train, _, x_val, _ = create_dataset()
x_train, _, x_val, _ = load_dataset()
x_train = x_train.astype(args.dtype)
x_val = x_val.astype(args.dtype)

x_val_extrap = tf.convert_to_tensor(x_val[0].reshape(-1, 1, 2))
x_val_interp = tf.convert_to_tensor(x_val[1].reshape(-1, 1, 2))

makedirs(PLOT_DIR)


def get_batch():
    # pick random data series
    n = np.random.choice(np.arange(x_train.shape[0], dtype=np.int64),
                         args.batch_size,
                         replace=True)
    # pick random starting time
Example #38
0
def mnist(args):
    data = load_dataset(dset='mnist')
    train_x, train_y = balanced_subset(data, 'train', args.data_frac)
    valid_x, valid_y = balanced_subset(data, 'valid', args.data_frac)
    test_x, test_y = data['test']

    num_train_batches = train_x.shape[0] / args.batch_size
    num_valid_batches = valid_x.shape[0] / args.batch_size
    valid_freq = num_train_batches

    model = SoftMax(train_x.shape[1], NUM_CLASSES, args.optimizer)

    expcost = None

    vcosts=[]
    perfs = []
    model_best = None
    prev_perf = 0.0
    perf = 0.0
    for b in xrange(args.epochs * num_train_batches):
        k = b % num_train_batches
        x = train_x[k * args.batch_size:(k + 1) * args.batch_size, :]
        y = train_y[k * args.batch_size:(k + 1) * args.batch_size]
        cost = model.train(x, y, args.lr)
        if not expcost:
            expcost = cost
        else:
            expcost = 0.01 * cost + 0.99 * expcost
        if (b + 1) % args.print_every == 0:
            print('iter %d, cost %f, expcost %f' % (b + 1, cost, expcost))
        if (b + 1) % valid_freq == 0:
            perf, _ = measure_perf(valid_x, valid_y, model, args)
            perfs.append(perf)
            print('correct/total: %f' % (perf))
            if len(perfs) > 32:
                old_perf = perfs.pop(0)
                max_perf = max(perfs)
                if old_perf >= max_perf:
                    print('Peak perf: %f (data_frac=%f, lr=%f)' % (old_perf, args.data_frac, args.lr))
                    test_perf, _ = measure_perf(test_x, test_y, model, args)
                    print('Test perf: %f (data_frac=%f, lr=%f)' % (test_perf, args.data_frac, args.lr))
                    return test_perf
                    break
#            perf, vcost = measure_perf(valid_x, valid_y, model, args)
#            vcosts.append(vcost)
#            print('validation perf, cost: %f, %f' % (perf, vcost))
#            if len(vcosts) > 32:
#                old_vcost = vcosts.pop(0)
#                low_vcost = min(vcosts)
#                if old_vcost <= low_vcost:
#                    print('Low vcost: %f (hdim=%d, nlayers=%d, data_frac=%f, lr=%f)' % (old_vcost, args.hdim, args.nlayers, args.data_frac, args.lr))
#                    test_perf, _ = measure_perf(test_x, test_y, model, args)
#                    print('Test perf: %f (hdim=%d, nlayers=%d, data_frac=%f, lr=%f)' % (test_perf, args.hdim, args.nlayers, args.data_frac, args.lr))
#                    return test_perf
#                    break
        if (b + 1) % (num_train_batches * args.save_every) == 0:
            if perf > prev_perf:
                prev_perf = perf
                print('saving model')
                with open(pjoin(args.expdir, 'model.pk'), 'wb') as f:
                    pickle.dump(model, f, protocol=pickle.HIGHEST_PROTOCOL)

    # XXX just pickling the entire model for now
    if perf > prev_perf:
        print('saving final model')
        with open(pjoin(args.expdir, 'model.pk'), 'wb') as f:
            pickle.dump(model, f, protocol=pickle.HIGHEST_PROTOCOL)
Example #39
0
 def set_dataset(self,batch_size,num_samples,noise,random_state):
     self.train_epoch, self.data, self.test_epoch = utils.load_dataset(batch_size, self.load_func,False, num_samples, noise, random_state)
     self.X_train, self.y_train, self.X_test, self.y_test = self.data
     self.save_training_pts()
Example #40
0
                        default=10,
                        dest='critic_iters',
                        help='The number of discriminator weight updates per generator update (default: 10)')
    
    parser.add_argument('--lambda', '-p',
                        type=int,
                        default=10,
                        dest='lamb',
                        help='The gradient penalty lambda hyperparameter (default: 10)')
    
    return parser.parse_args()

args = parse_args()

lines, charmap, inv_charmap = utils.load_dataset(
    path=args.training_data,
    max_length=args.seq_length
)

if not os.path.isdir(args.output_dir):
    os.makedirs(args.output_dir)

if not os.path.isdir(os.path.join(args.output_dir, 'checkpoints')):
    os.makedirs(os.path.join(args.output_dir, 'checkpoints'))

if not os.path.isdir(os.path.join(args.output_dir, 'samples')):
    os.makedirs(os.path.join(args.output_dir, 'samples'))

# pickle to avoid encoding errors with json
with open(os.path.join(args.output_dir, 'charmap.pickle'), 'wb') as f:
    pickle.dump(charmap, f)
Example #41
0
def question3(dataset_id, train_test, save_plots = False, no_outputs = False):

	x, y = utils.load_dataset(dataset_id, 'train')

	x0 = x[np.where(y[:, 0] == 0)]
	x1 = x[np.where(y[:, 0] == 1)]

	x_bias = np.hstack((np.ones((x.shape[0], 1)), x))

	inner_prod_inv = np.linalg.inv(x_bias.transpose().dot(x_bias))

	# Normal equation solution
	w = (inner_prod_inv.dot(x_bias.transpose())).dot(y)


	feat1 = np.linspace(np.min(x[:, 0]), np.max(x[:, 0]), 20)
	feat2 = (-w[1]*feat1 - w[0] + 0.5)/w[2]


	if (not no_outputs):
		# Outputs
		print('Parameter vector for dataset',dataset_id,train_test,':',w)
		plt.scatter(x0[:, 0], x0[:, 1], c ='r', marker = 'x', label = 'Class 0')
		plt.scatter(x1[:, 0], x1[:, 1], c ='b', marker = 'x', label = 'Class 1')	
	
		plt.plot(feat1, feat2, c = 'g', label = 'Decision Boundary')	

		plt.legend(loc='lower left', scatterpoints = 1)
		plt.xlabel('Feature 1')
		plt.ylabel('Feature 2')

		if (save_plots):
			name = 'Q3_' + dataset_id + '_' + train_test + '.png'
			plt.savefig(name)

		plt.show()	
		plt.clf()


	#--------- Question 4 ----------------

	if (train_test == 'test'):

		x, y = utils.load_dataset(dataset_id, 'test')
		x_bias = np.hstack((np.ones((x.shape[0], 1)), x))

	correct = 0
	
	for idx in range(x.shape[0]):
		
		sample = x_bias[idx, :]
		label = y[idx, 0]
		
		if (w.transpose().dot(sample) >= 0.5):
			y_tilde = 1.0
		else:
			y_tilde = 0.0

		correct += (label == y_tilde)

	misclassif_error = 1 - correct/x.shape[0]
	
	return misclassif_error
Example #42
0
import tensorflow as tf
from keras.utils import plot_model
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

import utils

n_signals = 12
win_size = 128

experiment_name = "2020-07-31_8-23-46_device1"
win_data_file = f"data/win_data_{experiment_name}.txt"
win_label_file = f"data/win_label_{experiment_name}.txt"

# Load data: X has the form of [n_wins, win_size, n_signals]
X, y = utils.load_dataset(win_data_file, win_label_file, win_size, n_signals)
""" ************************************* HYPER-PARAMETERS *************************************"""
MODEL_NAME = "model_stacked_LSTM"  # Ex: model_LSTM, model_stacked_LSTM, model_CNN1D_LSTM_v1 (model zoo from utils)
n_hiddens = 128  # for LSTM layers
n_frames = 4  # for Timedistributed layer-based models (win_size should be divided by n_frames with no remainder)
verbose, epochs, batch_size = 2, 100, 128
"""******************************** CHECKPOINT ********************************"""
# The callback takes a couple of arguments to configure checkpointing.
checkpoint_path = f"pretrained_models/ckp_{MODEL_NAME}"
bool_save_ckp = True

# Create checkpoint callback
cp_callback = ModelCheckpoint(filepath=checkpoint_path,
                              monitor='val_loss',
                              save_best_only=False,
                              save_weights_only=False,
Example #43
0
def main(argv):
    del argv  # unused arg
    tf.io.gfile.makedirs(FLAGS.output_dir)
    logging.info('Saving checkpoints at %s', FLAGS.output_dir)
    tf.random.set_seed(FLAGS.seed)

    if FLAGS.use_gpu:
        logging.info('Use GPU')
        strategy = tf.distribute.MirroredStrategy()
    else:
        logging.info('Use TPU at %s',
                     FLAGS.tpu if FLAGS.tpu is not None else 'local')
        resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
            tpu=FLAGS.tpu)
        tf.config.experimental_connect_to_cluster(resolver)
        tf.tpu.experimental.initialize_tpu_system(resolver)
        strategy = tf.distribute.TPUStrategy(resolver)

    per_core_batch_size = FLAGS.per_core_batch_size // FLAGS.ensemble_size
    batch_size = per_core_batch_size * FLAGS.num_cores
    check_bool = FLAGS.train_proportion > 0 and FLAGS.train_proportion <= 1
    assert check_bool, 'Proportion of train set has to meet 0 < prop <= 1.'

    drop_remainder_validation = True
    if not FLAGS.use_gpu:
        # This has to be True for TPU traing, otherwise the batchsize of images in
        # the validation set can't be determined by TPU compile.
        assert drop_remainder_validation, 'drop_remainder must be True in TPU mode.'

    train_dataset = utils.load_dataset(split=tfds.Split.TRAIN,
                                       name=FLAGS.dataset,
                                       batch_size=batch_size,
                                       use_bfloat16=FLAGS.use_bfloat16,
                                       repeat=True,
                                       proportion=FLAGS.train_proportion)
    validation_proportion = 1 - FLAGS.train_proportion
    validation_dataset = utils.load_dataset(
        split=tfds.Split.VALIDATION,
        name=FLAGS.dataset,
        batch_size=batch_size,
        use_bfloat16=FLAGS.use_bfloat16,
        repeat=True,
        proportion=validation_proportion,
        drop_remainder=drop_remainder_validation)
    clean_test_dataset = utils.load_dataset(split=tfds.Split.TEST,
                                            name=FLAGS.dataset,
                                            batch_size=batch_size,
                                            use_bfloat16=FLAGS.use_bfloat16)
    train_dataset = strategy.experimental_distribute_dataset(train_dataset)
    validation_dataset = strategy.experimental_distribute_dataset(
        validation_dataset)
    test_datasets = {
        'clean': strategy.experimental_distribute_dataset(clean_test_dataset),
    }
    if FLAGS.corruptions_interval > 0:
        if FLAGS.dataset == 'cifar10':
            load_c_dataset = utils.load_cifar10_c
        else:
            load_c_dataset = functools.partial(utils.load_cifar100_c,
                                               path=FLAGS.cifar100_c_path)
        corruption_types, max_intensity = utils.load_corrupted_test_info(
            FLAGS.dataset)
        for corruption in corruption_types:
            for intensity in range(1, max_intensity + 1):
                dataset = load_c_dataset(corruption_name=corruption,
                                         corruption_intensity=intensity,
                                         batch_size=batch_size,
                                         use_bfloat16=FLAGS.use_bfloat16)
                test_datasets['{0}_{1}'.format(corruption, intensity)] = (
                    strategy.experimental_distribute_dataset(dataset))

    ds_info = tfds.builder(FLAGS.dataset).info
    train_sample_size = ds_info.splits[
        'train'].num_examples * FLAGS.train_proportion
    steps_per_epoch = int(train_sample_size / batch_size)
    train_sample_size = int(train_sample_size)

    steps_per_eval = ds_info.splits['test'].num_examples // batch_size
    num_classes = ds_info.features['label'].num_classes

    if FLAGS.use_bfloat16:
        policy = tf.keras.mixed_precision.experimental.Policy('mixed_bfloat16')
        tf.keras.mixed_precision.experimental.set_policy(policy)

    summary_writer = tf.summary.create_file_writer(
        os.path.join(FLAGS.output_dir, 'summaries'))

    logging.info('Building Keras model.')
    depth = 28
    width = 10

    dict_ranges = {'min': FLAGS.min_l2_range, 'max': FLAGS.max_l2_range}
    ranges = [dict_ranges for _ in range(6)]  # 6 independent l2 parameters
    model_config = {
        'key_to_index': {
            'input_conv_l2_kernel': 0,
            'group_l2_kernel': 1,
            'group_1_l2_kernel': 2,
            'group_2_l2_kernel': 3,
            'dense_l2_kernel': 4,
            'dense_l2_bias': 5,
        },
        'ranges': ranges,
        'test': None
    }
    lambdas_config = LambdaConfig(model_config['ranges'],
                                  model_config['key_to_index'])

    if FLAGS.e_body_hidden_units > 0:
        e_body_arch = '({},)'.format(FLAGS.e_body_hidden_units)
    else:
        e_body_arch = '()'
    e_shared_arch = '()'
    e_activation = 'tanh'
    filters_resnet = [16]
    for i in range(0, 3):  # 3 groups of blocks
        filters_resnet.extend([16 * width * 2**i] *
                              9)  # 9 layers in each block
    # e_head dim for conv2d is just the number of filters (only
    # kernel) and twice num of classes for the last dense layer (kernel + bias)
    e_head_dims = [x for x in filters_resnet] + [2 * num_classes]

    with strategy.scope():
        e_models = e_factory(
            lambdas_config.input_shape,
            e_head_dims=e_head_dims,
            e_body_arch=eval(e_body_arch),  # pylint: disable=eval-used
            e_shared_arch=eval(e_shared_arch),  # pylint: disable=eval-used
            activation=e_activation,
            use_bias=FLAGS.e_model_use_bias,
            e_head_init=FLAGS.init_emodels_stddev)

        model = wide_resnet_hyperbatchensemble(
            input_shape=ds_info.features['image'].shape,
            depth=depth,
            width_multiplier=width,
            num_classes=num_classes,
            ensemble_size=FLAGS.ensemble_size,
            random_sign_init=FLAGS.random_sign_init,
            config=lambdas_config,
            e_models=e_models,
            l2_batchnorm_layer=FLAGS.l2_batchnorm,
            regularize_fast_weights=FLAGS.regularize_fast_weights,
            fast_weights_eq_contraint=FLAGS.fast_weights_eq_contraint,
            version=2)

        logging.info('Model input shape: %s', model.input_shape)
        logging.info('Model output shape: %s', model.output_shape)
        logging.info('Model number of weights: %s', model.count_params())
        # build hyper-batchensemble complete -------------------------

        # Initialize Lambda distributions for tuning
        lambdas_mean = tf.reduce_mean(
            log_uniform_mean([lambdas_config.log_min, lambdas_config.log_max]))
        lambdas0 = tf.random.normal((FLAGS.ensemble_size, lambdas_config.dim),
                                    lambdas_mean,
                                    0.1 * FLAGS.ens_init_delta_bounds)
        lower0 = lambdas0 - tf.constant(FLAGS.ens_init_delta_bounds)
        lower0 = tf.maximum(lower0, 1e-8)
        upper0 = lambdas0 + tf.constant(FLAGS.ens_init_delta_bounds)

        log_lower = tf.Variable(tf.math.log(lower0))
        log_upper = tf.Variable(tf.math.log(upper0))
        lambda_parameters = [log_lower, log_upper]  # these variables are tuned
        clip_lambda_parameters(lambda_parameters, lambdas_config)

        # Optimizer settings to train model weights
        # Linearly scale learning rate and the decay epochs by vanilla settings.
        # Note: Here, we don't divide the epochs by 200 as for the other uncertainty
        # baselines.
        base_lr = FLAGS.base_learning_rate * batch_size / 128
        lr_decay_epochs = [int(l) for l in FLAGS.lr_decay_epochs]

        lr_schedule = utils.LearningRateSchedule(
            steps_per_epoch,
            base_lr,
            decay_ratio=FLAGS.lr_decay_ratio,
            decay_epochs=lr_decay_epochs,
            warmup_epochs=FLAGS.lr_warmup_epochs)
        optimizer = tf.keras.optimizers.SGD(lr_schedule,
                                            momentum=0.9,
                                            nesterov=True)

        # tuner used for optimizing lambda_parameters
        tuner = tf.keras.optimizers.Adam(FLAGS.lr_tuning)

        metrics = {
            'train/negative_log_likelihood': tf.keras.metrics.Mean(),
            'train/accuracy': tf.keras.metrics.SparseCategoricalAccuracy(),
            'train/loss': tf.keras.metrics.Mean(),
            'train/ece': um.ExpectedCalibrationError(num_bins=FLAGS.num_bins),
            'train/disagreement': tf.keras.metrics.Mean(),
            'train/average_kl': tf.keras.metrics.Mean(),
            'train/cosine_similarity': tf.keras.metrics.Mean(),
            'test/negative_log_likelihood': tf.keras.metrics.Mean(),
            'test/accuracy': tf.keras.metrics.SparseCategoricalAccuracy(),
            'test/ece': um.ExpectedCalibrationError(num_bins=FLAGS.num_bins),
            'test/gibbs_nll': tf.keras.metrics.Mean(),
            'test/gibbs_accuracy':
            tf.keras.metrics.SparseCategoricalAccuracy(),
            'test/disagreement': tf.keras.metrics.Mean(),
            'test/average_kl': tf.keras.metrics.Mean(),
            'test/cosine_similarity': tf.keras.metrics.Mean(),
            'validation/loss': tf.keras.metrics.Mean(),
            'validation/loss_entropy': tf.keras.metrics.Mean(),
            'validation/loss_ce': tf.keras.metrics.Mean()
        }
        corrupt_metrics = {}

        for i in range(FLAGS.ensemble_size):
            metrics['test/nll_member_{}'.format(i)] = tf.keras.metrics.Mean()
            metrics['test/accuracy_member_{}'.format(i)] = (
                tf.keras.metrics.SparseCategoricalAccuracy())
        if FLAGS.corruptions_interval > 0:
            for intensity in range(1, max_intensity + 1):
                for corruption in corruption_types:
                    dataset_name = '{0}_{1}'.format(corruption, intensity)
                    corrupt_metrics['test/nll_{}'.format(dataset_name)] = (
                        tf.keras.metrics.Mean())
                    corrupt_metrics['test/accuracy_{}'.format(
                        dataset_name)] = (
                            tf.keras.metrics.SparseCategoricalAccuracy())
                    corrupt_metrics['test/ece_{}'.format(dataset_name)] = (
                        um.ExpectedCalibrationError(num_bins=FLAGS.num_bins))

        checkpoint = tf.train.Checkpoint(model=model,
                                         lambda_parameters=lambda_parameters,
                                         optimizer=optimizer)

        latest_checkpoint = tf.train.latest_checkpoint(FLAGS.output_dir)
        initial_epoch = 0
        if latest_checkpoint and FLAGS.restore_checkpoint:
            # checkpoint.restore must be within a strategy.scope() so that optimizer
            # slot variables are mirrored.
            checkpoint.restore(latest_checkpoint)
            logging.info('Loaded checkpoint %s', latest_checkpoint)
            initial_epoch = optimizer.iterations.numpy() // steps_per_epoch

    @tf.function
    def train_step(iterator):
        """Training StepFn."""
        def step_fn(inputs):
            """Per-Replica StepFn."""
            images, labels = inputs
            images = tf.tile(images, [FLAGS.ensemble_size, 1, 1, 1])

            # generate lambdas
            lambdas = log_uniform_sample(per_core_batch_size,
                                         lambda_parameters)
            lambdas = tf.reshape(lambdas,
                                 (FLAGS.ensemble_size * per_core_batch_size,
                                  lambdas_config.dim))

            with tf.GradientTape() as tape:
                logits = model([images, lambdas], training=True)
                if FLAGS.use_bfloat16:
                    logits = tf.cast(logits, tf.float32)

                if FLAGS.use_gibbs_ce:
                    # Average of single model CEs
                    # tiling of labels should be only done for Gibbs CE loss
                    labels = tf.tile(labels, [FLAGS.ensemble_size])
                    negative_log_likelihood = tf.reduce_mean(
                        tf.keras.losses.sparse_categorical_crossentropy(
                            labels, logits, from_logits=True))
                else:
                    # Ensemble CE uses no tiling of the labels
                    negative_log_likelihood = ensemble_crossentropy(
                        labels, logits, FLAGS.ensemble_size)
                # Note: Divide l2_loss by sample_size (this differs from uncertainty_
                # baselines implementation.)
                l2_loss = sum(model.losses) / train_sample_size
                loss = negative_log_likelihood + l2_loss
                # Scale the loss given the TPUStrategy will reduce sum all gradients.
                scaled_loss = loss / strategy.num_replicas_in_sync

            grads = tape.gradient(scaled_loss, model.trainable_variables)

            # Separate learning rate for fast weights.
            grads_and_vars = []
            for grad, var in zip(grads, model.trainable_variables):
                if (('alpha' in var.name or 'gamma' in var.name)
                        and 'batch_norm' not in var.name):
                    grads_and_vars.append(
                        (grad * FLAGS.fast_weight_lr_multiplier, var))
                else:
                    grads_and_vars.append((grad, var))
            optimizer.apply_gradients(grads_and_vars)

            probs = tf.nn.softmax(logits)
            per_probs = tf.split(probs,
                                 num_or_size_splits=FLAGS.ensemble_size,
                                 axis=0)
            per_probs_stacked = tf.stack(per_probs, axis=0)
            metrics['train/ece'].update_state(labels, probs)
            metrics['train/loss'].update_state(loss)
            metrics['train/negative_log_likelihood'].update_state(
                negative_log_likelihood)
            metrics['train/accuracy'].update_state(labels, logits)
            diversity_results = um.average_pairwise_diversity(
                per_probs_stacked, FLAGS.ensemble_size)
            for k, v in diversity_results.items():
                metrics['train/' + k].update_state(v)

            if grads_and_vars:
                grads, _ = zip(*grads_and_vars)

        strategy.run(step_fn, args=(next(iterator), ))

    @tf.function
    def tuning_step(iterator):
        """Tuning StepFn."""
        def step_fn(inputs):
            """Per-Replica StepFn."""
            images, labels = inputs
            images = tf.tile(images, [FLAGS.ensemble_size, 1, 1, 1])

            with tf.GradientTape(watch_accessed_variables=False) as tape:
                tape.watch(lambda_parameters)

                # sample lambdas
                if FLAGS.sample_and_tune:
                    lambdas = log_uniform_sample(per_core_batch_size,
                                                 lambda_parameters)
                else:
                    lambdas = log_uniform_mean(lambda_parameters)
                    lambdas = tf.repeat(lambdas, per_core_batch_size, axis=0)
                lambdas = tf.reshape(lambdas,
                                     (FLAGS.ensemble_size *
                                      per_core_batch_size, lambdas_config.dim))
                # ensemble CE
                logits = model([images, lambdas], training=False)
                ce = ensemble_crossentropy(labels, logits, FLAGS.ensemble_size)
                # entropy penalty for lambda distribution
                entropy = FLAGS.tau * log_uniform_entropy(lambda_parameters)
                loss = ce - entropy
                scaled_loss = loss / strategy.num_replicas_in_sync

            gradients = tape.gradient(loss, lambda_parameters)
            tuner.apply_gradients(zip(gradients, lambda_parameters))

            metrics['validation/loss_ce'].update_state(
                ce / strategy.num_replicas_in_sync)
            metrics['validation/loss_entropy'].update_state(
                entropy / strategy.num_replicas_in_sync)
            metrics['validation/loss'].update_state(scaled_loss)

        strategy.run(step_fn, args=(next(iterator), ))

    @tf.function
    def test_step(iterator, dataset_name, num_eval_samples=0):
        """Evaluation StepFn."""

        n_samples = num_eval_samples if num_eval_samples >= 0 else -num_eval_samples
        if num_eval_samples >= 0:
            # the +1 accounts for the fact that we add the mean of lambdas
            ensemble_size = FLAGS.ensemble_size * (1 + n_samples)
        else:
            ensemble_size = FLAGS.ensemble_size * n_samples

        def step_fn(inputs):
            """Per-Replica StepFn."""
            # Note that we don't use tf.tile for labels here
            images, labels = inputs
            images = tf.tile(images, [ensemble_size, 1, 1, 1])

            # get lambdas
            samples = log_uniform_sample(n_samples, lambda_parameters)
            if num_eval_samples >= 0:
                lambdas = log_uniform_mean(lambda_parameters)
                lambdas = tf.expand_dims(lambdas, 1)
                lambdas = tf.concat((lambdas, samples), 1)
            else:
                lambdas = samples

            # lambdas with shape (ens size, samples, dim of lambdas)
            rep_lambdas = tf.repeat(lambdas, per_core_batch_size, axis=1)
            rep_lambdas = tf.reshape(rep_lambdas,
                                     (ensemble_size * per_core_batch_size, -1))

            # eval on testsets
            logits = model([images, rep_lambdas], training=False)
            if FLAGS.use_bfloat16:
                logits = tf.cast(logits, tf.float32)
            probs = tf.nn.softmax(logits)
            per_probs = tf.split(probs,
                                 num_or_size_splits=ensemble_size,
                                 axis=0)

            # per member performance and gibbs performance (average per member perf)
            if dataset_name == 'clean':
                for i in range(FLAGS.ensemble_size):
                    # we record the first sample of lambdas per batch-ens member
                    first_member_index = i * (ensemble_size //
                                              FLAGS.ensemble_size)
                    member_probs = per_probs[first_member_index]
                    member_loss = tf.keras.losses.sparse_categorical_crossentropy(
                        labels, member_probs)
                    metrics['test/nll_member_{}'.format(i)].update_state(
                        member_loss)
                    metrics['test/accuracy_member_{}'.format(i)].update_state(
                        labels, member_probs)

                labels_tile = tf.tile(labels, [ensemble_size])
                metrics['test/gibbs_nll'].update_state(
                    tf.reduce_mean(
                        tf.keras.losses.sparse_categorical_crossentropy(
                            labels_tile, logits, from_logits=True)))
                metrics['test/gibbs_accuracy'].update_state(labels_tile, probs)

            # ensemble performance
            negative_log_likelihood = ensemble_crossentropy(
                labels, logits, ensemble_size)
            probs = tf.reduce_mean(per_probs, axis=0)
            if dataset_name == 'clean':
                metrics['test/negative_log_likelihood'].update_state(
                    negative_log_likelihood)
                metrics['test/accuracy'].update_state(labels, probs)
                metrics['test/ece'].update_state(labels, probs)
            else:
                corrupt_metrics['test/nll_{}'.format(
                    dataset_name)].update_state(negative_log_likelihood)
                corrupt_metrics['test/accuracy_{}'.format(
                    dataset_name)].update_state(labels, probs)
                corrupt_metrics['test/ece_{}'.format(
                    dataset_name)].update_state(labels, probs)

            if dataset_name == 'clean':
                per_probs_stacked = tf.stack(per_probs, axis=0)
                diversity_results = um.average_pairwise_diversity(
                    per_probs_stacked, ensemble_size)
                for k, v in diversity_results.items():
                    metrics['test/' + k].update_state(v)

        strategy.run(step_fn, args=(next(iterator), ))

    logging.info('--- Starting training using %d examples. ---',
                 train_sample_size)
    train_iterator = iter(train_dataset)
    validation_iterator = iter(validation_dataset)
    start_time = time.time()
    for epoch in range(initial_epoch, FLAGS.train_epochs):
        logging.info('Starting to run epoch: %s', epoch)
        for step in range(steps_per_epoch):
            train_step(train_iterator)
            do_tuning = (epoch >= FLAGS.tuning_warmup_epochs)
            if do_tuning and ((step + 1) % FLAGS.tuning_every_x_step == 0):
                tuning_step(validation_iterator)
                # clip lambda parameters if outside of range
                clip_lambda_parameters(lambda_parameters, lambdas_config)

            current_step = epoch * steps_per_epoch + (step + 1)
            max_steps = steps_per_epoch * FLAGS.train_epochs
            time_elapsed = time.time() - start_time
            steps_per_sec = float(current_step) / time_elapsed
            eta_seconds = (max_steps - current_step) / steps_per_sec
            message = ('{:.1%} completion: epoch {:d}/{:d}. {:.1f} steps/s. '
                       'ETA: {:.0f} min. Time elapsed: {:.0f} min'.format(
                           current_step / max_steps, epoch + 1,
                           FLAGS.train_epochs, steps_per_sec, eta_seconds / 60,
                           time_elapsed / 60))
            if step % 20 == 0:
                logging.info(message)

        # evaluate on test data
        datasets_to_evaluate = {'clean': test_datasets['clean']}
        if (FLAGS.corruptions_interval > 0
                and (epoch + 1) % FLAGS.corruptions_interval == 0):
            datasets_to_evaluate = test_datasets
        for dataset_name, test_dataset in datasets_to_evaluate.items():
            test_iterator = iter(test_dataset)
            logging.info('Testing on dataset %s', dataset_name)
            for step in range(steps_per_eval):
                if step % 20 == 0:
                    logging.info('Starting to run eval step %s of epoch: %s',
                                 step, epoch)
                test_step(test_iterator, dataset_name, FLAGS.num_eval_samples)
            logging.info('Done with testing on %s', dataset_name)

        corrupt_results = {}
        if (FLAGS.corruptions_interval > 0
                and (epoch + 1) % FLAGS.corruptions_interval == 0):
            corrupt_results = utils.aggregate_corrupt_metrics(
                corrupt_metrics, corruption_types, max_intensity)
        logging.info('Train Loss: %.4f, Accuracy: %.2f%%',
                     metrics['train/loss'].result(),
                     metrics['train/accuracy'].result() * 100)
        logging.info('Validation Loss: %.4f, CE: %.4f, Entropy: %.4f',
                     metrics['validation/loss'].result(),
                     metrics['validation/loss_ce'].result(),
                     metrics['validation/loss_entropy'].result())
        logging.info('Test NLL: %.4f, Accuracy: %.2f%%',
                     metrics['test/negative_log_likelihood'].result(),
                     metrics['test/accuracy'].result() * 100)
        for i in range(FLAGS.ensemble_size):
            logging.info(
                'Member %d Test Loss: %.4f, Accuracy: %.2f%%', i,
                metrics['test/nll_member_{}'.format(i)].result(),
                metrics['test/accuracy_member_{}'.format(i)].result() * 100)

        total_results = {
            name: metric.result()
            for name, metric in metrics.items()
        }
        total_results.update({
            name: metric.result()
            for name, metric in corrupt_metrics.items()
        })
        total_results.update(corrupt_results)
        with summary_writer.as_default():
            for name, result in total_results.items():
                tf.summary.scalar(name, result, step=epoch + 1)

        for metric in metrics.values():
            metric.reset_states()

        # save checkpoint and lambdas config
        if (FLAGS.checkpoint_interval > 0
                and (epoch + 1) % FLAGS.checkpoint_interval == 0):
            checkpoint_name = checkpoint.save(
                os.path.join(FLAGS.output_dir, 'checkpoint'))
            lambdas_cf = lambdas_config.get_config()
            filepath = os.path.join(FLAGS.output_dir, 'lambdas_config.p')
            with tf.io.gfile.GFile(filepath, 'wb') as fp:
                pickle.dump(lambdas_cf, fp, protocol=pickle.HIGHEST_PROTOCOL)
            logging.info('Saved checkpoint to %s', checkpoint_name)
Example #44
0
parser.add_argument('--content_dim', type=int, default=128, help='size of the content vector')
parser.add_argument('--pose_dim', type=int, default=10, help='size of the pose vector')
parser.add_argument('--image_width', type=int, default=128, help='the height / width of the input image to network')
parser.add_argument('--channels', default=3, type=int)
parser.add_argument('--dataset', default='kth', help='dataset to train with')
parser.add_argument('--max_step', type=int, default=20, help='maximum distance between frames')
parser.add_argument('--sd_weight', type=float, default=0.0001, help='weight on adversarial loss')
parser.add_argument('--sd_nf', type=int, default=100, help='number of layers')
parser.add_argument('--content_model', default='dcgan_unet', help='model type (dcgan | dcgan_unet | vgg_unet)')
parser.add_argument('--pose_model', default='dcgan', help='model type (dcgan | unet | resnet)')
parser.add_argument('--data_threads', type=int, default=24, help='number of parallel data loading threads')
parser.add_argument('--normalize', action='store_true', help='if true, normalize pose vector')
parser.add_argument('--data_type', default='drnet', help='speed up data loading for drnet training')
opt = parser.parse_args()

train_data, test_data = utils.load_dataset(opt)

test_loader = DataLoader(test_data,
                         num_workers=opt.data_threads,
                         batch_size=opt.batch_size,
                         shuffle=True,
                         drop_last=True,
                         pin_memory=True)


def get_testing_batch():
    while True:
        for sequence in test_loader:
            batch = utils.normalize_data(opt, dtype, sequence)
            yield batch
testing_batch_generator = get_testing_batch()
Example #45
0
def getLSTM(params, filename):
    '''
    This function uses the parameters to fetch an LSTM network and then trains it
    if the user wants so. After training it saves the model in the ./models folder with the
    name str(params) so that the model can be easily recognized and called.
    Otherwise it can load a previously trained model and return the predicting function.

    Parameters
    ----------
    params : The parameters needed by the model in the form of a dictionary

    filename : An optional parameter which is required when we want to load
        a previously buildt model

    Returns
    -------
    predictor : A predictor function using which we can get the labels for
        the test data


    References
    ----------
    http://colinraffel.com/talks/hammer2015recurrent.pdf
    '''

    input_var = T.ftensor3('input_var')
    l_out = lstm(input_var, params)

    target_values = T.fmatrix('target_output')

    network_output = lasagne.layers.get_output(l_out)
    cost = T.mean((network_output - target_values)**2)

    all_params = lasagne.layers.get_all_params(l_out)
    updates = lasagne.updates.adagrad(
        cost, all_params, params['LEARNING_RATE'])

    pred = theano.function([input_var], network_output,
                           allow_input_downcast=True)

    if filename:
        print "Loading a previously saved " + params['NAME']
        all_param_values = np.load("./models/" + filename + '.npy')

        for i in range(len(all_param_values)):
            all_param_values[i] = all_param_values[i].astype('float32')

        all_params = lasagne.layers.get_all_params(l_out)
        for p, v in zip(all_params, all_param_values):
            p.set_value(v)

    else:
        print('loading data for ' + params['NAME'])
        X_train1, y_train1, X_val1, y_val1 = load_dataset(X1, y1, params['NUM_FEATURES'], params['SEQ_LENGTH'])
        X_train2, y_train2, X_val2, y_val2 = load_dataset(X2, y2, params['NUM_FEATURES'], params['SEQ_LENGTH'])

        print('compiling the ' + params['NAME'])

        train = theano.function([input_var, target_values],
                                cost, updates=updates, allow_input_downcast=True)
        validate = theano.function(
            [input_var, target_values], cost, allow_input_downcast=True)

        old_valerr = [10, 10]

        for epoch in range(params['NUM_EPOCHS']):
            print "Training the network..."
            train_err = 0
            train_batches = 0
            old_netout = l_out
            start_time = time.time()

            for batch in iterate_minibatches(X_train1, y_train1, params['BATCH_SIZE'],
                                             params['SEQ_LENGTH'], shuffle=False):
                # if train_batches % 50 == 0:
                #     print "batch number " + str(train_batches)
                inputs, targets = batch
                train_err += train(inputs, targets)
                train_batches += 1

            for batch in iterate_minibatches(X_train2, y_train2, params['BATCH_SIZE'],
                                             params['SEQ_LENGTH'], shuffle=False):
                # if train_batches % 50 == 0:
                #     print "batch number " + str(train_batches)
                inputs, targets = batch
                train_err += train(inputs, targets)
                train_batches += 1

            # And a full pass over the validation data:
            val_err = 0
            val_acc = 0
            val_batches = 0

            for batch in iterate_minibatches(X_val1, y_val1, params['BATCH_SIZE'],
                                             params['SEQ_LENGTH'], shuffle=False):
                inputs, targets = batch
                err = validate(inputs, targets)
                val_err += err
                # val_acc += acc
                val_batches += 1

            # Then we print the results for this epoch:
            print("Epoch {} of {} (Composite addresses) took {:.3f}s".format(
                epoch + 1, params['NUM_EPOCHS'], time.time() - start_time))
            print("  training loss:\t\t{:.6f}".format(
                train_err / train_batches))
            print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
            # print("  validation accuracy:\t\t{:.2f} %".format(val_acc/val_batches * 100))

            # to prevent overfitting
            # or val_err - old_valerr[0] < 0.001:
            # or old_valerr[0] - val_err < 0.001:
            if val_err - old_valerr[0] > 0.03:
                print "overfitting or model reached saturation...\n"
                print old_valerr
                l_out = old_netout
                break

            old_netout = l_out
            old_valerr[0] = val_err

            val_err = 0
            val_acc = 0
            val_batches = 0

            for batch in iterate_minibatches(X_val2, y_val2, params['BATCH_SIZE'],
                                             params['SEQ_LENGTH'], shuffle=False):
                inputs, targets = batch
                err = validate(inputs, targets)
                val_err += err
                # val_acc += acc
                val_batches += 1

            # Then we print the results for this epoch:
            print("Epoch {} of {} (OneLine addresses) took {:.3f}s".format(
                epoch + 1, params['NUM_EPOCHS'], time.time() - start_time))
            print("  training loss:\t\t{:.6f}".format(
                train_err / train_batches))
            print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
            # print("  validation accuracy:\t\t{:.2f} %\n".format(val_acc/val_batches * 100))
            old_valerr[1] = val_err

        print "saving the parameters..."
        all_param_values = [p.get_value() for p in all_params]
        np.save("./models/" + str(params), all_param_values)

    return pred