Ejemplo n.º 1
0
def init_params(options):
    feadim = options['featureMaps']
    ctxdim = 512
    hdim = options['hdim']
    actionNum = options['actions']
    fdim = 64

    params = OrderedDict()
    #params=Saliency_init(params,ctxdim,prefix="recog");
    #params=ff_init(params,ctxdim,512,prefix="recog",name='ctx_pre');
    params = Linger_init(params, feadim, ctxdim, prefix='recog', name='linger')
    # FNN channel
    params = ff_init(params, ctxdim, fdim, prefix="recog", name='highway')
    # LSTM
    params = LSTM_init(params, ctxdim, hdim, prefix="recog", name='lstm')

    params = ff_init(params, hdim, fdim, prefix="recog", name='fullconn')
    params = ff_init(params, fdim, actionNum, prefix="recog", name='output')

    tparams = share_params(params)
    # loading params if need
    loadfrom = options['loadfrom']
    if options['load'] and os.path.exists(loadfrom):
        print "loading model parameters from ", loadfrom
        tparams = load_params(loadfrom, tparams, strict=False)

    return tparams
def init_params(options):
    ctxdim=options['featureMaps'];
    hdim=options['hdim'];
    actionNum=options['actions'];
    fdim=options['fdim'];

    params=OrderedDict();

    params=Saliency_init(params,ctxdim,prefix="recog",name='saliency');

    # FNN channel
    params=ff_init(params,ctxdim,fdim,prefix="recog",name='fullconn');
    # LSTM channel
    params=LSTM_init(params,ctxdim,hdim,prefix="recog",name='lstm');
    params=ff_init(params,hdim,fdim,prefix="recog",name='fullconn_lstm');

    #output
    params=ff_init(params,fdim,actionNum,prefix="recog",name='output');

  
    tparams=share_params(params);
    # loading params if need
    loadfrom=options['loadfrom'];
    if options['load'] and os.path.exists(loadfrom):
        print "loading model parameters from ",loadfrom;
        tparams = load_params(loadfrom, tparams,strict=False);

    return tparams
Ejemplo n.º 3
0
def init_params(options):
    ctxdim=options['featureMaps'];
    hdim=options['hdim'];
    actionNum=options['actions'];
    fdim=options['fdim'];

    # print actionNum;
    params=OrderedDict();

    params=SaliencyLSTM_init(params,ctxdim,prefix="recog",name='saliencyLSTM');

    params=ff_init(params,ctxdim,fdim,prefix="recog",name='channel0');

    params=LSTM_init(params,ctxdim,hdim,prefix="recog",name='lstm');
    params=ff_init(params,hdim,fdim,prefix="recog",name='channel1');

    #output
    params=ff_init(params,fdim,actionNum,prefix="recog",name='output');


    tparams=share_params(params);
    # loading params if need
    loadfrom=options['loadfrom'];
    if options['load'] and os.path.exists(loadfrom):
        print "loading model parameters from ",loadfrom;
        tparams = load_params(loadfrom, tparams,strict=False);

    return tparams
Ejemplo n.º 4
0
  def __init__(self, load=None, **kwargs):
    if load is None:
      args = {}
    else:
      args = util.load_params(load, 'train')
      
    util.update(args, mode=RL.Mode.TRAIN, **kwargs)
    print(args)
    Default.__init__(self, **args)
    
    if self.init:
      self.model.init()
      self.model.save()
    else:
      self.model.restore()

    context = zmq.Context.instance()

    self.experience_socket = context.socket(zmq.PULL)
    experience_addr = "tcp://%s:%d" % (self.dump, util.port(self.model.name + "/experience"))
    self.experience_socket.bind(experience_addr)
    
    self.params_socket = context.socket(zmq.PUB)
    params_addr = "tcp://%s:%d" % (self.dump, util.port(self.model.name + "/params"))
    print("Binding params socket to", params_addr)
    self.params_socket.bind(params_addr)

    self.sweep_size = self.batches * self.batch_size
    print("Sweep size", self.sweep_size)
    
    self.buffer = util.CircularQueue(self.sweep_size)
    
    self.last_save = time.time()
Ejemplo n.º 5
0
def init_params(options):
    ctxdim = options['featureMaps']
    hdim = options['hdim']
    actionNum = options['actions']
    fdim = options['fdim']

    params = OrderedDict()

    params = Saliency_init(params, ctxdim, prefix="recog", name='saliency')

    # FNN channel
    params = ff_init(params, ctxdim, fdim, prefix="recog", name='fullconn')
    # LSTM channel
    params = LSTM_init(params, ctxdim, hdim, prefix="recog", name='lstm')
    params = ff_init(params, hdim, fdim, prefix="recog", name='fullconn_lstm')

    #output
    params = ff_init(params, fdim, actionNum, prefix="recog", name='output')

    tparams = share_params(params)
    # loading params if need
    loadfrom = options['loadfrom']
    if options['load'] and os.path.exists(loadfrom):
        print "loading model parameters from ", loadfrom
        tparams = load_params(loadfrom, tparams, strict=False)

    return tparams
Ejemplo n.º 6
0
def gen_model(idx, context, model, options, k, normalize, word_idict, sampling):
    import theano
    from theano import tensor
    from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams

    trng = RandomStreams(1234)
    # this is zero indicate we are not using dropout in the graph
    use_noise = theano.shared(numpy.float32(0.), name='use_noise')

    # get the parameters
    params = init_params(options)
    params = load_params(model, params)
    tparams = init_tparams(params)

    # build the sampling computational graph
    # see capgen.py for more detailed explanations
    f_init, f_next = build_sampler(tparams, options, use_noise, trng, sampling=sampling)

    def _gencap(cc0):
        sample, score = gen_sample(tparams, f_init, f_next, cc0, options,
                                   trng=trng, k=k, maxlen=200, stochastic=False)
        # adjust for length bias
        if normalize:
            lengths = numpy.array([len(s) for s in sample])
            score = score / lengths
        sidx = numpy.argmin(score)
        return sample[sidx]

    seq = _gencap(context)

    return (idx, seq)
def init_params(options):
    ctxdim=options['featureMaps'];
    hdim=options['hdim'];
    actionNum=options['actions'];
    fdim=options['fdim'];

    # print actionNum;
    params=OrderedDict();

    params=SaliencyFgbg_init(params,options['locations'],options['featureMaps'],prefix="recog",name='saliencyFgbg');
    
    params=ff_init(params,ctxdim,fdim,prefix="recog",name='channel0');

    params=LSTM_init(params,ctxdim,hdim,prefix="recog",name='lstm1');
    params=ff_init(params,hdim,fdim,prefix="recog",name='channel1');

    #output
    params=ff_init(params,fdim,actionNum,prefix="recog",name='output');


    # params['recog/saliencyFgbg_w']=theano.gradient.grad_clip(params['recog/saliencyFgbg_w'],-0.1,0.1);
    tparams=share_params(params);
    # loading params if need
    loadfrom=options['loadfrom'];
    if options['load']: 
        if os.path.exists(loadfrom):
            print "loading model parameters from ",loadfrom;
            tparams = load_params(loadfrom, tparams,strict=False);
        else:
            print "Not exist ",loadfrom;

    
    return tparams
def init_params(options):
    ctxdim = options['featureMaps']
    hdim = options['hdim']
    actionNum = options['actions']
    fdim = options['fdim']

    # print actionNum;
    params = OrderedDict()

    params = SaliencyFgbg_init(params,
                               options['locations'],
                               options['featureMaps'],
                               prefix="recog",
                               name='saliencyFgbg')

    params = ff_init(params, ctxdim, fdim, prefix="recog", name='channel0')

    params = LSTM_init(params, ctxdim, hdim, prefix="recog", name='lstm1')
    params = ff_init(params, hdim, fdim, prefix="recog", name='channel1')

    #output
    params = ff_init(params, fdim, actionNum, prefix="recog", name='output')

    # params['recog/saliencyFgbg_w']=theano.gradient.grad_clip(params['recog/saliencyFgbg_w'],-0.1,0.1);
    tparams = share_params(params)
    # loading params if need
    loadfrom = options['loadfrom']
    if options['load']:
        if os.path.exists(loadfrom):
            print "loading model parameters from ", loadfrom
            tparams = load_params(loadfrom, tparams, strict=False)
        else:
            print "Not exist ", loadfrom

    return tparams
Ejemplo n.º 9
0
def get_model():
    model_params = load_params()["model"]

    if model_params["name"].lower() == "mlp":
        p = model_params["mlp"]
        model = mlp(p["units"], p["activation"])
    elif model_params["name"].lower() == "cnn":
        p = model_params["cnn"]
        model = cnn(dense_units=p["dense_units"],
                    conv_kernel=(p["conv_kernel_size"], p["conv_kernel_size"]),
                    conv_units=p["conv_units"],
                    dropout=p["dropout"],
                    activation=p["activation"])
    else:
        raise Exception(
            f"No Model with the name {model_params['name']} is defined")

    if model_params["optimizer"].lower() == "adam":
        optimizer = tf.keras.optimizers.Adam()
    elif model_params["optimizer"].lower() == "sgd":
        optimizer = tf.keras.optimizers.SGD()
    elif model_params["optimizer"].lower() == "rmsprop":
        optimizer = tf.keras.optimizers.RMSprop()
    elif model_params["optimizer"].lower() == "adadelta":
        optimizer = tf.keras.optimizers.Adadelta()
    elif model_params["optimizer"].lower() == "adagrad":
        optimizer = tf.keras.optimizers.Adagrad()
    elif model_params["optimizer"].lower() == "adamax":
        optimizer = tf.keras.optimizers.Adamax()
    elif model_params["optimizer"].lower() == "nadam":
        optimizer = tf.keras.optimizers.Nadam()
    elif model_params["optimizer"].lower() == "ftrl":
        optimizer = tf.keras.optimizers.Ftrl()
    else:
        raise Exception(
            f"No optimizer with the name {model_params['optimizer']} is defined"
        )

    loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

    metrics = [
        tf.keras.metrics.CategoricalAccuracy(),
        tf.keras.metrics.Precision(),
        tf.keras.metrics.Recall(),
        tf.keras.metrics.AUC(curve="ROC", name="ROC", multi_label=True),
        tf.keras.metrics.AUC(curve="PR", name="PR", multi_label=True),
        tf.keras.metrics.TruePositives(),
        tf.keras.metrics.TrueNegatives(),
        tf.keras.metrics.FalsePositives(),
        tf.keras.metrics.FalseNegatives()
    ]

    model.compile(
        optimizer=optimizer,
        loss=loss,
        metrics=metrics,
    )

    return model
Ejemplo n.º 10
0
def gen_model(queue, rqueue, pid, model, options, k, normalize, word_idict,
              sampling):
    import theano
    from theano import tensor
    from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams

    trng = RandomStreams(1234)
    # this is zero indicate we are not using dropout in the graph
    use_noise = theano.shared(numpy.float32(0.), name='use_noise')

    # get the parameters
    params = init_params(options)
    params = load_params(model, params)
    tparams = init_tparams(params)

    # build the sampling computational graph
    # see capgen.py for more detailed explanations
    f_init, f_next = build_sampler(tparams,
                                   options,
                                   use_noise,
                                   trng,
                                   sampling=sampling)

    def _gencap(cc0):
        sample, score = gen_sample(tparams,
                                   f_init,
                                   f_next,
                                   cc0,
                                   options,
                                   trng=trng,
                                   k=k,
                                   maxlen=200,
                                   stochastic=False)
        # adjust for length bias
        if normalize:
            lengths = numpy.array([len(s) for s in sample])
            score = score / lengths
        sidx = numpy.argmin(score)
        return sample[sidx]

    while True:
        req = queue.get()
        # exit signal
        if req is None:
            break

        idx, context = req[0], req[1]

        print "Processing example %d in process # %d" % (idx, pid)
        seq = _gencap(context)
        print seq
        rqueue.put((idx, seq))
        print "Added example %d to the result queue" % idx

    print "gen_model process w/ pid %d has returned..." % pid
    return
Ejemplo n.º 11
0
def gen_model(model, options):

    # this is zero indicate we are not using dropout in the graph
    use_noise = theano.shared(numpy.float32(0.), name='use_noise')

    # get the parameters
    params = init_params(options)
    params = load_params(model, params)
    tparams = init_tparams(params)

    # build the sampling computational graph
    # see capgen.py for more detailed explanations
    f_init, f_next = build_sampler(tparams, options, use_noise, trng)

    return f_init, f_next
Ejemplo n.º 12
0
def main():
    params = load_params()

    test_img, test_labels = load_npz_data(
        "data/fashion-mnist/preprocessed/mnist-test.npz")
    model = tf.keras.models.load_model("models/fashion-mnist/model.h5")

    metrics_dict = model.evaluate(
        test_img,
        test_labels,
        batch_size=params["train"]["batch_size"],
        return_dict=True,
    )
    metrics_file = "metrics.json"

    with open(metrics_file, "w") as f:
        f.write(json.dumps(metrics_dict))
Ejemplo n.º 13
0
    def __init__(self,
                 param_folder,
                 data_folder,
                 overlap_thresh=0.5,
                 n=100,
                 use_cuda=False):
        """
        data_folder: the folder of a sequence that has ground-truth (can be BU-RU dataset, look into this)
        param_folder: folder in which all params files are populated already using hog_multi_trainer
        """
        self.imgs, self.pos_rects = util.read_imgs(data_folder)
        # self.params contains all the combinations of [BB,bsize,csize,nbins,weights]
        # list in list structure basically
        self.sign, self.params = util.load_params(param_folder)

        self.overlap_thresh = overlap_thresh
        self.n = n
        self.use_cuda = use_cuda
Ejemplo n.º 14
0
def gen_model(queue, rqueue, pid, model, options, k, normalize, word_idict, sampling):
    import theano
    from theano import tensor
    from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams

    trng = RandomStreams(1234)
    # this is zero indicate we are not using dropout in the graph
    use_noise = theano.shared(numpy.float32(0.), name='use_noise')

    # get the parameters
    params = init_params(options)
    params = load_params(model, params)
    tparams = init_tparams(params)

    # build the sampling computational graph
    # see capgen.py for more detailed explanations
    f_init, f_next = build_sampler(tparams, options, use_noise, trng, sampling=sampling)

    def _gencap(cc0):
        sample, score = gen_sample(tparams, f_init, f_next, cc0, options,
                                   trng=trng, k=k, maxlen=200, stochastic=False)
        # adjust for length bias
        if normalize:
            lengths = numpy.array([len(s) for s in sample])
            score = score / lengths
        sidx = numpy.argmin(score)
        return sample[sidx]

    while True:
        req = queue.get()
        # exit signal
        if req is None:
            break

        idx, context = req[0], req[1]

        print "Processing example %d in process # %d" % (idx, pid)
        seq = _gencap(context)
        print seq
        rqueue.put((idx, seq))
        print "Added example %d to the result queue" % idx

    print "gen_model process w/ pid %d has returned..." % pid
    return
Ejemplo n.º 15
0
def main():
    params = load_params()["train"]
    if params["resume"] and os.path.exists(MODEL_FILE):
        m = tf.keras.models.load_model(MODEL_FILE)
    else:
        m = models.get_model()
    m.summary()

    whole_train_img, whole_train_labels = load_npz_data(
        "data/preprocessed/mnist-train.npz")
    test_img, test_labels = load_npz_data("data/preprocessed/mnist-test.npz")
    validation_split_index = int(
        (1 - params["validation_split"]) * whole_train_img.shape[0])
    if validation_split_index == whole_train_img.shape[0]:
        x_train = whole_train_img
        x_valid = test_img
        y_train = whole_train_labels
        y_valid = test_labels
    else:
        x_train = whole_train_img[:validation_split_index]
        x_valid = whole_train_img[validation_split_index:]
        y_train = whole_train_labels[:validation_split_index]
        y_valid = whole_train_labels[validation_split_index:]

    print(f"x_train: {x_train.shape}")
    print(f"x_valid: {x_valid.shape}")
    print(f"y_train: {y_train.shape}")
    print(f"y_valid: {y_valid.shape}")

    dvclive.init("training_metrics")

    m.fit(x_train,
          y_train,
          batch_size=params["batch_size"],
          epochs=params["epochs"],
          verbose=1,
          validation_data=(x_valid, y_valid),
          callbacks=[DVCLiveCallback()])

    with open("logs.csv", "w") as f:
        f.write(history_to_csv(history))

    m.save(MODEL_FILE)
Ejemplo n.º 16
0
def main():
    params = load_params()["train"]
    m = models.get_model()
    m.summary()

    whole_train_img, whole_train_labels = load_npz_data(
        "data/fashion-mnist/preprocessed/mnist-train.npz"
    )
    test_img, test_labels = load_npz_data(
        "data/fashion-mnist/preprocessed/mnist-test.npz"
    )
    validation_split_index = int(
        (1 - params["validation_split"]) * whole_train_img.shape[0]
    )
    if validation_split_index == whole_train_img.shape[0]:
        x_train = whole_train_img
        x_valid = test_img
        y_train = whole_train_labels
        y_valid = test_labels
    else:
        x_train = whole_train_img[:validation_split_index]
        x_valid = whole_train_img[validation_split_index:]
        y_train = whole_train_labels[:validation_split_index]
        y_valid = whole_train_labels[validation_split_index:]

    print(f"x_train: {x_train.shape}")
    print(f"x_valid: {x_valid.shape}")
    print(f"y_train: {y_train.shape}")
    print(f"y_valid: {y_valid.shape}")

    history = m.fit(
        x_train,
        y_train,
        batch_size=params["batch_size"],
        epochs=params["epochs"],
        verbose=1,
        validation_data=(x_valid, y_valid),
    )

    with open("logs.csv", "w") as f:
        f.write(history_to_csv(history))

    m.save("models/fashion-mnist/model.h5")
Ejemplo n.º 17
0
def gen_model(idx, context, model, options, k, normalize, word_idict,
              sampling):
    import theano
    from theano import tensor
    from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams

    trng = RandomStreams(1234)
    # this is zero indicate we are not using dropout in the graph
    use_noise = theano.shared(numpy.float32(0.), name='use_noise')

    # get the parameters
    params = init_params(options)
    params = load_params(model, params)
    tparams = init_tparams(params)

    # build the sampling computational graph
    # see capgen.py for more detailed explanations
    f_init, f_next = build_sampler(tparams,
                                   options,
                                   use_noise,
                                   trng,
                                   sampling=sampling)

    def _gencap(cc0):
        sample, score = gen_sample(tparams,
                                   f_init,
                                   f_next,
                                   cc0,
                                   options,
                                   trng=trng,
                                   k=k,
                                   maxlen=200,
                                   stochastic=False)
        # adjust for length bias
        if normalize:
            lengths = numpy.array([len(s) for s in sample])
            score = score / lengths
        sidx = numpy.argmin(score)
        return sample[sidx]

    seq = _gencap(context)

    return (idx, seq)
Ejemplo n.º 18
0
def main():
    params = load_params()["prepare"]
    print(params)

    training_images = mnist_images_idx_to_array(
        "data/fashion-mnist/raw/train-images-idx3-ubyte.gz")
    # print(f"Read training data: {training_images}")
    training_labels = mnist_labels_idx_to_array(
        "data/fashion-mnist/raw/train-labels-idx1-ubyte.gz")
    # print(f"Read training labels: {training_labels}")
    testing_images = mnist_images_idx_to_array(
        "data/fashion-mnist/raw/t10k-images-idx3-ubyte.gz")
    # print(f"Read testing data: {testing_images}")
    testing_labels = mnist_labels_idx_to_array(
        "data/fashion-mnist/raw/t10k-labels-idx1-ubyte.gz")
    # print(f"Read testing labels: {testing_labels}")

    if params["remix"]:
        training_images, testing_images, training_labels, testing_labels = remix(
            images1=training_images,
            images2=testing_images,
            labels1=training_labels,
            labels2=testing_labels,
            seed=params["seed"],
            split=params["remix_split"])

        assert training_images.shape[0] + testing_images.shape[0] == 70000
        assert training_labels.shape[0] + testing_labels.shape[0] == 70000

    print(f"Training Dataset Shape: {training_images.shape}")
    print(f"Testing Dataset Shape: {testing_images.shape}")
    print(f"Training Labels: {training_labels}")
    print(f"Testing Labels: {testing_labels}")

    os.makedirs("data/fashion-mnist/prepared")

    np.savez("data/fashion-mnist/prepared/mnist-train.npz",
             images=training_images,
             labels=training_labels)
    np.savez("data/fashion-mnist/prepared/mnist-test.npz",
             images=testing_images,
             labels=testing_labels)
Ejemplo n.º 19
0
def main():
    params = load_params()["preprocess"]
    print(params)

    training_images, training_labels = load_npz_data(
        "data/fashion-mnist/prepared/mnist-train.npz")
    testing_images, testing_labels = load_npz_data(
        "data/fashion-mnist/prepared/mnist-test.npz")

    seed = params["seed"]

    if params["normalize"]:
        training_images = normalize(training_images)
        testing_images = normalize(testing_images)

    if params["shuffle"]:
        training_images, training_labels = shuffle_in_parallel(
            seed, training_images, training_labels)
        testing_images, testing_labels = shuffle_in_parallel(
            seed, testing_images, testing_labels)

    training_labels = tf.keras.utils.to_categorical(training_labels,
                                                    num_classes=10,
                                                    dtype="float32")
    testing_labels = tf.keras.utils.to_categorical(testing_labels,
                                                   num_classes=10,
                                                   dtype="float32")

    print(
        f"Training Images: {training_images.shape} - {training_images.dtype}")
    print(f"Testing Images: {testing_images.shape} - {testing_images.dtype}")

    if not os.path.exists("data/fashion-mnist/preprocessed"):
        os.makedirs("data/fashion-mnist/preprocessed")
    np.savez("data/fashion-mnist/preprocessed/mnist-train.npz",
             images=training_images,
             labels=training_labels)
    np.savez("data/fashion-mnist/preprocessed/mnist-test.npz",
             images=testing_images,
             labels=testing_labels)
def init_params(options):
    ctxdim=256;
    hdim=options['hdim'];
    actionNum=options['actions'];
    fdim=options['fdim'];

    params=OrderedDict();

    # params=Saliency_init(params,ctxdim,prefix="recog",name='saliency');
    # params=Linger_init(params,ctxdim,ctxdim,prefix='recog',name='linger');

    # LSTM
    # params=LSTM_init(params,ctxdim,hdim,prefix="recog",name='lstm1');
    # params=LSTM_init(params,hdim,512,prefix="recog",name='lstm2');
    # params=LSTM_init(params,512,256,prefix="recog",name='lstm3');
    # params=LSTM_init(params,256,128,prefix="recog",name='lstm4');

    # multiChannel
    params=ff_init(params,ctxdim,fdim,prefix="recog",name='channel0');
    # params=ff_init(params,hdim,fdim,prefix="recog",name='channel1');
    # params=ff_init(params,512,fdim,prefix="recog",name='channel2');
    # params=ff_init(params,256,fdim,prefix="recog",name='channel3');
    # params=ff_init(params,128,fdim,prefix="recog",name='channel4');

    #output
    params=ff_init(params,fdim,actionNum,prefix="recog",name='output');


    tparams=share_params(params);
    tparams=cnn_init(tparams,cnn_net,prefix='recog',name='cnn');

    # loading params if need
    loadfrom=options['loadfrom'];
    if options['load'] and os.path.exists(loadfrom):
        print "loading model parameters from ",loadfrom;
        tparams = load_params(loadfrom, tparams,strict=False);

    return tparams
def init_params(options):
    ctxdim = 256
    hdim = options['hdim']
    actionNum = options['actions']
    fdim = options['fdim']

    params = OrderedDict()

    # params=Saliency_init(params,ctxdim,prefix="recog",name='saliency');
    # params=Linger_init(params,ctxdim,ctxdim,prefix='recog',name='linger');

    # LSTM
    # params=LSTM_init(params,ctxdim,hdim,prefix="recog",name='lstm1');
    # params=LSTM_init(params,hdim,512,prefix="recog",name='lstm2');
    # params=LSTM_init(params,512,256,prefix="recog",name='lstm3');
    # params=LSTM_init(params,256,128,prefix="recog",name='lstm4');

    # multiChannel
    params = ff_init(params, ctxdim, fdim, prefix="recog", name='channel0')
    # params=ff_init(params,hdim,fdim,prefix="recog",name='channel1');
    # params=ff_init(params,512,fdim,prefix="recog",name='channel2');
    # params=ff_init(params,256,fdim,prefix="recog",name='channel3');
    # params=ff_init(params,128,fdim,prefix="recog",name='channel4');

    #output
    params = ff_init(params, fdim, actionNum, prefix="recog", name='output')

    tparams = share_params(params)
    tparams = cnn_init(tparams, cnn_net, prefix='recog', name='cnn')

    # loading params if need
    loadfrom = options['loadfrom']
    if options['load'] and os.path.exists(loadfrom):
        print "loading model parameters from ", loadfrom
        tparams = load_params(loadfrom, tparams, strict=False)

    return tparams
def init_params(options):
    featureMaps=options['featureMaps'];
    actionNum=options['actions'];
    fdim=options['fdim'];

    params=OrderedDict();

    params=ff_init(params,featureMaps,fdim,prefix="recog",name='fc0');
    params=ff_init(params,fdim,fdim,prefix="recog",name='fc1');
    
    #output
    params=ff_init(params,fdim,actionNum,prefix="recog",name='output');


    tparams=share_params(params);
    tparams=lasagne_net_init(tparams,CNN_NET,CNN_outputLayerName,prefix='recog',name='cnn');

    # loading params if need
    loadfrom=options['loadfrom'];
    if options['load'] and os.path.exists(loadfrom):
        print "loading model parameters from ",loadfrom;
        tparams = load_params(loadfrom, tparams,strict=False);

    return tparams
def train(args, model_args):

    model_id = '/data/lisatmp4/anirudhg/spiral_walk_back/walkback_'
    model_dir = create_log_dir(args, model_id)
    model_id2 = 'logs/walkback_'
    model_dir2 = create_log_dir(args, model_id2)
    print model_dir
    print model_dir2 + '/' + 'log.jsonl.gz'
    logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz',
                          formatter=None)

    # TODO batches_per_epoch should not be hard coded
    lrate = args.lr
    import sys
    sys.setrecursionlimit(10000000)
    args, model_args = parse_args()

    #trng = RandomStreams(1234)

    if args.resume_file is not None:
        print "Resuming training from " + args.resume_file
        from blocks.scripts import continue_training
        continue_training(args.resume_file)

    ## load the training data
    if args.dataset == 'MNIST':
        print 'loading MNIST'
        from fuel.datasets import MNIST
        dataset_train = MNIST(['train'], sources=('features', ))
        dataset_test = MNIST(['test'], sources=('features', ))
        n_colors = 1
        spatial_width = 28

    elif args.dataset == 'CIFAR10':
        from fuel.datasets import CIFAR10
        dataset_train = CIFAR10(['train'], sources=('features', ))
        dataset_test = CIFAR10(['test'], sources=('features', ))
        n_colors = 3
        spatial_width = 32

    elif args.dataset == "lsun" or args.dataset == "lsunsmall":

        print "loading lsun class!"

        from load_lsun import load_lsun

        print "loading lsun data!"

        if args.dataset == "lsunsmall":
            dataset_train, dataset_test = load_lsun(args.batch_size,
                                                    downsample=True)
            spatial_width = 32
        else:
            dataset_train, dataset_test = load_lsun(args.batch_size,
                                                    downsample=False)
            spatial_width = 64

        n_colors = 3

    elif args.dataset == "celeba":

        print "loading celeba data"

        from fuel.datasets.celeba import CelebA

        dataset_train = CelebA(which_sets=['train'],
                               which_format="64",
                               sources=('features', ),
                               load_in_memory=False)
        dataset_test = CelebA(which_sets=['test'],
                              which_format="64",
                              sources=('features', ),
                              load_in_memory=False)

        spatial_width = 64
        n_colors = 3

        tr_scheme = SequentialScheme(examples=dataset_train.num_examples,
                                     batch_size=args.batch_size)
        ts_scheme = SequentialScheme(examples=dataset_test.num_examples,
                                     batch_size=args.batch_size)

        train_stream = DataStream.default_stream(dataset_train,
                                                 iteration_scheme=tr_scheme)
        test_stream = DataStream.default_stream(dataset_test,
                                                iteration_scheme=ts_scheme)

        dataset_train = train_stream
        dataset_test = test_stream

        #epoch_it = train_stream.get_epoch_iterator()

    elif args.dataset == 'Spiral':
        print 'loading SPIRAL'
        train_set = Spiral(num_examples=20000,
                           classes=1,
                           cycles=1.,
                           noise=0.01,
                           sources=('features', ))
        dataset_train = DataStream.default_stream(
            train_set,
            iteration_scheme=ShuffledScheme(train_set.num_examples,
                                            args.batch_size))
    elif args.dataset == 'Circle':
        print 'loading Circle'
        train_set = Circle(num_examples=20000,
                           classes=1,
                           cycles=1.,
                           noise=0.0,
                           sources=('features', ))
        dataset_train = DataStream.default_stream(
            train_set,
            iteration_scheme=ShuffledScheme(train_set.num_examples,
                                            args.batch_size))
        iter_per_epoch = train_set.num_examples
    else:
        raise ValueError("Unknown dataset %s." % args.dataset)

    model_options = locals().copy()

    train_stream = dataset_train

    shp = next(train_stream.get_epoch_iterator())[0].shape

    print "got epoch iterator"

    # make the training data 0 mean and variance 1
    # TODO compute mean and variance on full dataset, not minibatch
    Xbatch = next(train_stream.get_epoch_iterator())[0]
    scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2))
    shft = -np.mean(Xbatch * scl)
    # scale is applied before shift
    #train_stream = ScaleAndShift(train_stream, scl, shft)
    #test_stream = ScaleAndShift(test_stream, scl, shft)

    print 'Building model'
    params = init_params(model_options)
    if args.reload_:
        print "Trying to reload parameters"
        if os.path.exists(args.saveto_filename):
            print 'Reloading Parameters'
            print args.saveto_filename
            params = load_params(args.saveto_filename, params)
    tparams = init_tparams(params)
    print tparams
    x, cost, start_temperature = build_model(tparams, model_options)
    inps = [x, start_temperature]

    x_Data = T.matrix('x_Data', dtype='float32')
    temperature = T.scalar('temperature', dtype='float32')
    forward_diffusion = one_step_diffusion(x_Data, model_options, tparams,
                                           temperature)

    #print 'Building f_cost...',
    #f_cost = theano.function(inps, cost)
    #print 'Done'
    print tparams
    grads = T.grad(cost, wrt=itemlist(tparams))

    #get_grads = theano.function(inps, grads)

    for j in range(0, len(grads)):
        grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]),
                            grads[j])

    # compile the optimizer, the actual computational graph is compiled here
    lr = T.scalar(name='lr')
    print 'Building optimizers...',
    optimizer = args.optimizer

    f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams,
                                                             grads, inps, cost)
    print 'Done'

    print 'Buiding Sampler....'
    f_sample = sample(tparams, model_options)
    print 'Done'
    uidx = 0
    estop = False
    bad_counter = 0
    max_epochs = 4000
    batch_index = 0
    print 'Number of steps....', args.num_steps
    print 'Done'
    count_sample = 1
    batch_index = 0
    for eidx in xrange(max_epochs):
        if eidx % 20 == 0:
            params = unzip(tparams)
            save_params(params,
                        model_dir + '/' + 'params_' + str(eidx) + '.npz')
            if eidx == 30:
                ipdb.set_trace()
        n_samples = 0
        print 'Starting Next Epoch ', eidx

        for data in train_stream.get_epoch_iterator():
            batch_index += 1
            n_samples += len(data[0])
            uidx += 1
            if data[0] is None:
                print 'No data '
                uidx -= 1
                continue
            data_run = data[0]
            temperature_forward = args.temperature
            meta_cost = []
            for meta_step in range(0, args.meta_steps):
                meta_cost.append(f_grad_shared(data_run, temperature_forward))
                f_update(lrate)
                if args.meta_steps > 1:
                    data_run, sigma, _, _ = forward_diffusion(
                        data_run, temperature_forward)
                    temperature_forward *= args.temperature_factor
            cost = sum(meta_cost) / len(meta_cost)
            if np.isnan(cost) or np.isinf(cost):
                print 'NaN detected'
                return 1.
            logger.log({
                'epoch': eidx,
                'batch_index': batch_index,
                'uidx': uidx,
                'training_error': cost
            })
            empty = []
            spiral_x = [empty for i in range(args.num_steps)]
            spiral_corrupted = []
            spiral_sampled = []
            grad_forward = []
            grad_back = []
            x_data_time = []
            x_tilt_time = []
            if batch_index % 8 == 0:
                count_sample += 1
                temperature = args.temperature * (args.temperature_factor
                                                  **(args.num_steps - 1))
                temperature_forward = args.temperature
                for num_step in range(args.num_steps):
                    if num_step == 0:
                        x_data_time.append(data[0])
                        plot_images(
                            data[0], model_dir + '/' + 'orig_' + 'epoch_' +
                            str(count_sample) + '_batch_' + str(batch_index))
                        x_data, mu_data, _, _ = forward_diffusion(
                            data[0], temperature_forward)

                        plot_images(
                            x_data, model_dir + '/' + 'corrupted_' + 'epoch_' +
                            str(count_sample) + '_batch_' + str(batch_index) +
                            '_time_step_' + str(num_step))
                        x_data_time.append(x_data)
                        temp_grad = np.concatenate(
                            (x_data_time[-2], x_data_time[-1]), axis=1)
                        grad_forward.append(temp_grad)

                        x_data = np.asarray(x_data).astype('float32').reshape(
                            args.batch_size, INPUT_SIZE)
                        spiral_corrupted.append(x_data)
                        mu_data = np.asarray(mu_data).astype(
                            'float32').reshape(args.batch_size, INPUT_SIZE)
                        mu_data = mu_data.reshape(args.batch_size, 2)
                    else:
                        x_data_time.append(x_data)
                        x_data, mu_data, _, _ = forward_diffusion(
                            x_data, temperature_forward)
                        plot_images(
                            x_data, model_dir + '/' + 'corrupted_' + 'epoch_' +
                            str(count_sample) + '_batch_' + str(batch_index) +
                            '_time_step_' + str(num_step))
                        x_data = np.asarray(x_data).astype('float32').reshape(
                            args.batch_size, INPUT_SIZE)
                        spiral_corrupted.append(x_data)

                        mu_data = np.asarray(mu_data).astype(
                            'float32').reshape(args.batch_size, INPUT_SIZE)
                        mu_data = mu_data.reshape(args.batch_size, 2)
                        x_data_time.append(x_data)
                        temp_grad = np.concatenate(
                            (x_data_time[-2], x_data_time[-1]), axis=1)
                        grad_forward.append(temp_grad)
                    temperature_forward = temperature_forward * args.temperature_factor

                mean_sampled = x_data.mean()
                var_sampled = x_data.var()

                x_temp2 = data[0].reshape(args.batch_size, 2)
                plot_2D(
                    spiral_corrupted, args.num_steps,
                    model_dir + '/' + 'corrupted_' + 'epoch_' +
                    str(count_sample) + '_batch_' + str(batch_index))
                plot_2D(
                    x_temp2, 1, model_dir + '/' + 'orig_' + 'epoch_' +
                    str(count_sample) + '_batch_index_' + str(batch_index))
                plot_grad(
                    grad_forward,
                    model_dir + '/' + 'grad_forward_' + 'epoch_' +
                    str(count_sample) + '_batch_' + str(batch_index))
                for i in range(args.num_steps + args.extra_steps):
                    x_tilt_time.append(x_data)
                    x_data, sampled_mean = f_sample(x_data, temperature)
                    plot_images(
                        x_data, model_dir + '/' + 'sampled_' + 'epoch_' +
                        str(count_sample) + '_batch_' + str(batch_index) +
                        '_time_step_' + str(i))
                    x_tilt_time.append(x_data)
                    temp_grad = np.concatenate(
                        (x_tilt_time[-2], x_tilt_time[-1]), axis=1)
                    grad_back.append(temp_grad)

                    ###print 'Recons, On step number, using temperature', i, temperature
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor

                plot_grad(
                    grad_back, model_dir + '/' + 'grad_back_' + 'epoch_' +
                    str(count_sample) + '_batch_' + str(batch_index))
                plot_2D(
                    x_tilt_time, args.num_steps,
                    model_dir + '/' + 'sampled_' + 'epoch_' +
                    str(count_sample) + '_batch_' + str(batch_index))

                s = np.random.normal(mean_sampled, var_sampled,
                                     [args.batch_size, 2])
                x_sampled = s

                temperature = args.temperature * (args.temperature_factor
                                                  **(args.num_steps - 1))
                x_data = np.asarray(x_sampled).astype('float32')
                for i in range(args.num_steps + args.extra_steps):
                    x_data, sampled_mean = f_sample(x_data, temperature)
                    spiral_sampled.append(x_data)
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor
                plot_2D(
                    spiral_sampled, args.num_steps,
                    model_dir + '/' + 'inference_' + 'epoch_' +
                    str(count_sample) + '_batch_' + str(batch_index))
    ipdb.set_trace()
Ejemplo n.º 24
0
def train(dim_word=100,  # word vector dimensionality
          ctx_dim=512,  # context vector dimensionality
          dim=1000,  # the number of LSTM units
          attn_type='stochastic',  # [see section 4 from paper]
          n_layers_att=1,  # number of layers used to compute the attention weights
          n_layers_out=1,  # number of layers used to compute logit
          n_layers_lstm=1,  # number of lstm layers
          n_layers_init=1,  # number of layers to initialize LSTM at time 0
          lstm_encoder=False,  # if True, run bidirectional LSTM on input units
          prev2out=False,  # Feed previous word into logit
          ctx2out=False,  # Feed attention weighted ctx into logit
          alpha_entropy_c=0.002,  # hard attn param
          RL_sumCost=True,  # hard attn param
          semi_sampling_p=0.5,  # hard attn param
          temperature=1.,  # hard attn param
          patience=10,
          max_epochs=5000,
          dispFreq=100,
          decay_c=0.,  # weight decay coeff
          alpha_c=0.,  # doubly stochastic coeff
          lrate=0.01,  # used only for SGD
          selector=False,  # selector (see paper)
          n_words=10000,  # vocab size
          maxlen=100,  # maximum length of the description
          optimizer='rmsprop',
          batch_size = 16,
          valid_batch_size = 16,
          saveto='model.npz',  # relative path of saved model file
          validFreq=1000,
          saveFreq=1000,  # save the parameters after every saveFreq updates
          sampleFreq=100,  # generate some samples after every sampleFreq updates
          data_path='./data',  # path to find data
          dataset='flickr8k',
          dictionary=None,  # word dictionary
          use_dropout=False,  # setting this true turns on dropout at various points
          use_dropout_lstm=False,  # dropout on lstm gates
          reload_=False,
          save_per_epoch=False): # this saves down the model every epoch

    # hyperparam dict
    model_options = locals().copy()
    model_options = validate_options(model_options)

    # reload options
    if reload_ and os.path.exists(saveto):
        print "Reloading options"
        with open('%s.pkl'%saveto, 'rb') as f:
            model_options = pkl.load(f)

    print "Using the following parameters:"
    print  model_options

    print 'Loading data'
    load_data, prepare_data = get_dataset(dataset)
    train, valid, test, worddict = load_data(path=data_path)
    if dataset == 'coco':
        valid, _ = valid # the second one contains all the validation data

    # index 0 and 1 always code for the end of sentence and unknown token
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # Initialize (or reload) the parameters using 'model_options'
    # then build the Theano graph
    print 'Building model'
    params = init_params(model_options)
    if reload_ and os.path.exists(saveto):
        print "Reloading model"
        params = load_params(saveto, params)

    # numpy arrays -> theano shared variables
    tparams = init_tparams(params)

    # In order, we get:
    #   1) trng - theano random number generator
    #   2) use_noise - flag that turns on dropout
    #   3) inps - inputs for f_grad_shared
    #   4) cost - log likelihood for each sentence
    #   5) opts_out - optional outputs (e.g selector)
    trng, use_noise, \
          inps, alphas, alphas_sample,\
          cost, \
          opt_outs = \
          build_model(tparams, model_options)


    # To sample, we use beam search: 1) f_init is a function that initializes
    # the LSTM at time 0 [see top right of page 4], 2) f_next returns the distribution over
    # words and also the new "initial state/memory" see equation
    print 'Building sampler'
    f_init, f_next = build_sampler(tparams, model_options, use_noise, trng)

    # we want the cost without any the regularizers
    # define the log probability
    f_log_probs = theano.function(inps, -cost, profile=False,
                                        updates=opt_outs['attn_updates']
                                        if model_options['attn_type']=='stochastic'
                                        else None, allow_input_downcast=True)

    # Define the cost function + Regularization
    cost = cost.mean()
    # add L2 regularization costs
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # Doubly stochastic regularization
    if alpha_c > 0.:
        alpha_c = theano.shared(numpy.float32(alpha_c), name='alpha_c')
        alpha_reg = alpha_c * ((1.-alphas.sum(0))**2).sum(0).mean()
        cost += alpha_reg

    hard_attn_updates = []
    # Backprop!
    if model_options['attn_type'] == 'deterministic':
        grads = tensor.grad(cost, wrt=itemlist(tparams))
    else:
        # shared variables for hard attention
        baseline_time = theano.shared(numpy.float32(0.), name='baseline_time')
        opt_outs['baseline_time'] = baseline_time
        alpha_entropy_c = theano.shared(numpy.float32(alpha_entropy_c), name='alpha_entropy_c')
        alpha_entropy_reg = alpha_entropy_c * (alphas*tensor.log(alphas)).mean()
        # [see Section 4.1: Stochastic "Hard" Attention for derivation of this learning rule]
        if model_options['RL_sumCost']:
            grads = tensor.grad(cost, wrt=itemlist(tparams),
                                disconnected_inputs='raise',
                                known_grads={alphas:(baseline_time-opt_outs['masked_cost'].mean(0))[None,:,None]/10.*
                                            (-alphas_sample/alphas) + alpha_entropy_c*(tensor.log(alphas) + 1)})
        else:
            grads = tensor.grad(cost, wrt=itemlist(tparams),
                            disconnected_inputs='raise',
                            known_grads={alphas:opt_outs['masked_cost'][:,:,None]/10.*
                            (alphas_sample/alphas) + alpha_entropy_c*(tensor.log(alphas) + 1)})
        # [equation on bottom left of page 5]
        hard_attn_updates += [(baseline_time, baseline_time * 0.9 + 0.1 * opt_outs['masked_cost'].mean())]
        # updates from scan
        hard_attn_updates += opt_outs['attn_updates']

    # to getthe cost after regularization or the gradients, use this
    # f_cost = theano.function([x, mask, ctx], cost, profile=False)
    # f_grad = theano.function([x, mask, ctx], grads, profile=False)

    # f_grad_shared computes the cost and updates adaptive learning rate variables
    # f_update updates the weights of the model
    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost, hard_attn_updates)

    print 'Optimization'

    # [See note in section 4.3 of paper]
    train_iter = HomogeneousData(train, batch_size=batch_size, maxlen=maxlen)

    if valid:
        kf_valid = KFold(len(valid[0]), n_folds=len(valid[0])/valid_batch_size, shuffle=False)
    if test:
        kf_test = KFold(len(test[0]), n_folds=len(test[0])/valid_batch_size, shuffle=False)

    # history_errs is a bare-bones training log that holds the validation and test error
    history_errs = []
    # reload history
    if reload_ and os.path.exists(saveto):
        history_errs = numpy.load(saveto)['history_errs'].tolist()
    best_p = None
    bad_counter = 0

    if validFreq == -1:
        validFreq = len(train[0])/batch_size
    if saveFreq == -1:
        saveFreq = len(train[0])/batch_size
    if sampleFreq == -1:
        sampleFreq = len(train[0])/batch_size

    uidx = 0
    estop = False
    for eidx in xrange(max_epochs):
        n_samples = 0

        print 'Epoch ', eidx

        for caps in train_iter:
            n_samples += len(caps)
            uidx += 1
            # turn on dropout
            use_noise.set_value(1.)

            # preprocess the caption, recording the
            # time spent to help detect bottlenecks
            pd_start = time.time()
            x, mask, ctx = prepare_data(caps,
                                        train[1],
                                        worddict,
                                        maxlen=maxlen,
                                        n_words=n_words)
            pd_duration = time.time() - pd_start

            if x is None:
                print 'Minibatch with zero sample under length ', maxlen
                continue

            # get the cost for the minibatch, and update the weights
            ud_start = time.time()
            cost = f_grad_shared(x, mask, ctx)
            f_update(lrate)
            ud_duration = time.time() - ud_start # some monitoring for each mini-batch

            # Numerical stability check
            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'PD ', pd_duration, 'UD ', ud_duration

            # Checkpoint
            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',

                if best_p is not None:
                    params = copy.copy(best_p)
                else:
                    params = unzip(tparams)
                numpy.savez(saveto, history_errs=history_errs, **params)
                pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
                print 'Done'

            # Print a generated sample as a sanity check
            if numpy.mod(uidx, sampleFreq) == 0:
                # turn off dropout first
                use_noise.set_value(0.)
                x_s = x
                mask_s = mask
                ctx_s = ctx
                # generate and decode the a subset of the current training batch
                for jj in xrange(numpy.minimum(10, len(caps))):
                    sample, score = gen_sample(tparams, f_init, f_next, ctx_s[jj], model_options,
                                               trng=trng, k=5, maxlen=30, stochastic=False)
                    # Decode the sample from encoding back to words
                    print 'Truth ',jj,': ',
                    for vv in x_s[:,jj]:
                        if vv == 0:
                            break
                        if vv in word_idict:
                            print word_idict[vv],
                        else:
                            print 'UNK',
                    print
                    for kk, ss in enumerate([sample[0]]):
                        print 'Sample (', kk,') ', jj, ': ',
                        for vv in ss:
                            if vv == 0:
                                break
                            if vv in word_idict:
                                print word_idict[vv],
                            else:
                                print 'UNK',
                    print

            # Log validation loss + checkpoint the model with the best validation log likelihood
            if numpy.mod(uidx, validFreq) == 0:
                use_noise.set_value(0.)
                train_err = 0
                valid_err = 0
                test_err = 0

                if valid:
                    valid_err = -pred_probs(f_log_probs, model_options, worddict, prepare_data, valid, kf_valid).mean()
                if test:
                    test_err = -pred_probs(f_log_probs, model_options, worddict, prepare_data, test, kf_test).mean()

                history_errs.append([valid_err, test_err])

                # the model with the best validation long likelihood is saved seperately with a different name
                if uidx == 0 or valid_err <= numpy.array(history_errs)[:,0].min():
                    best_p = unzip(tparams)
                    print 'Saving model with best validation ll'
                    params = copy.copy(best_p)
                    params = unzip(tparams)
                    numpy.savez(saveto+'_bestll', history_errs=history_errs, **params)
                    bad_counter = 0

                # abort training if perplexity has been increasing for too long
                if eidx > patience and len(history_errs) > patience and valid_err >= numpy.array(history_errs)[:-patience,0].min():
                    bad_counter += 1
                    if bad_counter > patience:
                        print 'Early Stop!'
                        estop = True
                        break

                print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err

        print 'Seen %d samples' % n_samples

        if estop:
            break

        if save_per_epoch:
            numpy.savez(saveto + '_epoch_' + str(eidx + 1), history_errs=history_errs, **unzip(tparams))

    # use the best nll parameters for final checkpoint (if they exist)
    if best_p is not None:
        zipp(best_p, tparams)

    use_noise.set_value(0.)
    train_err = 0
    valid_err = 0
    test_err = 0
    if valid:
        valid_err = -pred_probs(f_log_probs, model_options, worddict, prepare_data, valid, kf_valid)
    if test:
        test_err = -pred_probs(f_log_probs, model_options, worddict, prepare_data, test, kf_test)

    print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err

    params = copy.copy(best_p)
    numpy.savez(saveto, zipped_params=best_p, train_err=train_err,
                valid_err=valid_err, test_err=test_err, history_errs=history_errs,
                **params)

    return train_err, valid_err, test_err
Ejemplo n.º 25
0
decoding_embedding_size = 64
# RNN Size
rnn_size = 64
# Number of Layers
num_layers = 2
# Learning Rate
learning_rate = 0.0001
# Dropout Keep Probability
keep_probability = 0.8
display_step = 10

save_path = 'checkpoints/sup/actor/dev'
save_path_critic = 'checkpoints/sup/critic/dev'

(source_int_text, target_int_text), (source_vocab_to_int, target_vocab_to_int), (source_int_to_vocab, target_int_to_vocab) = util.load_preprocess('preprocess.p')
load_path_actor = util.load_params('params_actor_sup.p')

source_train = util.read_list('source_train.npy')
target_train = util.read_list('target_train.npy')
valid_size = batch_size * 10
train_source = source_train[valid_size:]
train_target = target_train[valid_size:]
valid_source = source_train[:valid_size]
valid_target = target_train[:valid_size]

test_acc_list = []

train_graph = tf.Graph()
critic_graph = tf.Graph()
actor_graph = tf.Graph()
Ejemplo n.º 26
0
def train(args, model_args, lrate):

    model_id = '/data/lisatmp4/anirudhg/minst_walk_back/walkback_'
    model_dir = create_log_dir(args, model_id)
    model_id2 = 'walkback_'
    model_dir2 = create_log_dir(args, model_id2)
    print model_dir
    logger = mimir.Logger(filename=model_dir2 + '/' + model_id2 +
                          'log.jsonl.gz',
                          formatter=None)

    # TODO batches_per_epoch should not be hard coded
    lrate = args.lr
    import sys
    sys.setrecursionlimit(10000000)
    args, model_args = parse_args()

    #trng = RandomStreams(1234)

    if args.resume_file is not None:
        print "Resuming training from " + args.resume_file
        from blocks.scripts import continue_training
        continue_training(args.resume_file)

    ## load the training data
    if args.dataset == 'MNIST':
        print 'loading MNIST'
        from fuel.datasets import MNIST
        dataset_train = MNIST(['train'], sources=('features', ))
        dataset_test = MNIST(['test'], sources=('features', ))
        n_colors = 1
        spatial_width = 28

    elif args.dataset == 'CIFAR10':
        from fuel.datasets import CIFAR10
        dataset_train = CIFAR10(['train'], sources=('features', ))
        dataset_test = CIFAR10(['test'], sources=('features', ))
        n_colors = 3
        spatial_width = 32

    elif args.dataset == 'Spiral':
        print 'loading SPIRAL'
        train_set = Spiral(num_examples=100000,
                           classes=1,
                           cycles=2.,
                           noise=0.01,
                           sources=('features', ))
        dataset_train = DataStream.default_stream(
            train_set,
            iteration_scheme=ShuffledScheme(train_set.num_examples,
                                            args.batch_size))

    else:
        raise ValueError("Unknown dataset %s." % args.dataset)

    model_options = locals().copy()

    train_stream = Flatten(
        DataStream.default_stream(dataset_train,
                                  iteration_scheme=ShuffledScheme(
                                      examples=dataset_train.num_examples,
                                      batch_size=args.batch_size)))

    shp = next(train_stream.get_epoch_iterator())[0].shape
    # make the training data 0 mean and variance 1
    # TODO compute mean and variance on full dataset, not minibatch
    Xbatch = next(train_stream.get_epoch_iterator())[0]
    scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2))
    shft = -np.mean(Xbatch * scl)
    # scale is applied before shift
    #train_stream = ScaleAndShift(train_stream, scl, shft)
    #test_stream = ScaleAndShift(test_stream, scl, shft)

    print 'Building model'
    params = init_params(model_options)
    if args.reload_ and os.path.exists(args.saveto_filename):
        print 'Reloading Parameters'
        print args.saveto_filename
        params = load_params(args.saveto_filename, params)
    tparams = init_tparams(params)
    '''
    x = T.matrix('x', dtype='float32')
    f=transition_operator(tparams, model_options, x, 1)

    for data in train_stream.get_epoch_iterator():
        print data[0]
        a = f(data[0])
        print a
        ipdb.set_trace()
    '''
    x, cost = build_model(tparams, model_options)
    inps = [x]

    x_Data = T.matrix('x_Data', dtype='float32')
    temperature = T.scalar('temperature', dtype='float32')
    forward_diffusion = one_step_diffusion(x_Data, model_options, tparams,
                                           temperature)

    print 'Building f_cost...',
    f_cost = theano.function(inps, cost)
    print 'Done'
    print tparams
    grads = T.grad(cost, wrt=itemlist(tparams))

    get_grads = theano.function(inps, grads)

    for j in range(0, len(grads)):
        grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]),
                            grads[j])

    # compile the optimizer, the actual computational graph is compiled here
    lr = T.scalar(name='lr')
    print 'Building optimizers...',
    optimizer = args.optimizer

    f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams,
                                                             grads, inps, cost)
    print 'Done'

    print 'Buiding Sampler....'
    f_sample = sample(tparams, model_options)
    print 'Done'

    uidx = 0
    estop = False
    bad_counter = 0
    max_epochs = 4000
    batch_index = 0
    print 'Number of steps....'
    print args.num_steps
    print 'Done'
    count_sample = 1
    for eidx in xrange(max_epochs):
        n_samples = 0
        print 'Starting Next Epoch ', eidx
        for data in train_stream.get_epoch_iterator():
            batch_index += 1
            n_samples += len(data[0])
            uidx += 1
            if data[0] is None:
                print 'No data '
                uidx -= 1
                continue
            ud_start = time.time()
            cost = f_grad_shared(data[0])
            f_update(lrate)
            ud = time.time() - ud_start

            if batch_index % 1 == 0:
                print 'Cost is this', cost
                count_sample += 1

                from impainting import change_image, inpainting
                train_temp = data[0]
                print data[0].shape
                change_image(train_temp.reshape(args.batch_size, 1, 28, 28), 3)
                train_temp = train_temp.reshape(args.batch_size, 784)
                output = inpainting(train_temp)
                change_image(output.reshape(args.batch_size, 1, 28, 28), 1)

                reverse_time(
                    scl, shft, output,
                    model_dir + '/' + 'impainting_orig_' + 'epoch_' +
                    str(count_sample) + '_batch_index_' + str(batch_index))
                x_data = np.asarray(output).astype('float32')
                temperature = args.temperature * (args.temperature_factor
                                                  **(args.num_steps - 1))
                temperature = args.temperature  #* (args.temperature_factor ** (args.num_steps -1 ))
                orig_impainted_data = np.asarray(data[0]).astype('float32')

                for i in range(args.num_steps + args.extra_steps + 5):
                    x_data, sampled, sampled_activation, sampled_preactivation = f_sample(
                        x_data, temperature)
                    print 'Impainting using temperature', i, temperature
                    x_data = do_half_image(x_data, orig_impainted_data)
                    reverse_time(
                        scl, shft, x_data, model_dir + '/' +
                        'impainting_orig_' + 'epoch_' + str(count_sample) +
                        '_batch_index_' + str(batch_index) + 'step_' + str(i))
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature = temperature
                        #temperature /= args.temperature_factor
    ipdb.set_trace()
Ejemplo n.º 27
0
opts = parser.parse_args()
print(opts, file=sys.stderr)

assert opts.model_type in ['single', 'multi-res']

FITNESS_EVAL_BATCHES = 10
if opts.popn_size % FITNESS_EVAL_BATCHES != 0:
    raise Exception("only support population size that's"
                    " a multiple of %d" % FITNESS_EVAL_BATCHES)

dataset = data.dataset(split=opts.split, batch_size=opts.num_examples)
for x, y_true in dataset:
    break

params = u.load_params(opts.params)


@jit
def inv_mean_loss(member):
    if opts.model_type == 'single':
        # member denotes a channel mask we want to apply to
        # entire x batch
        model = models.construct_single_trunk_model()
        mask_tile_shape = list(x.shape)
        mask_tile_shape[-1] = 1
        mask = jnp.tile(member, mask_tile_shape)
        logits = model.apply(params, x * mask)
    else:  # multi-res
        # member denotes channel selection handled in model
        model = models.construct_multires_model()
Ejemplo n.º 28
0
    # Read the command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--mode', choices=['train', 'test', 'render'], default='train')
    parser.add_argument(
        '--params', type=str, default='params.yaml',
        help='Source for experiment parameters (will be copied to log directory).')
    parser.add_argument(
        '--episodes', type=int, default=1,
        help='Number of episodes during testing.')
    args = parser.parse_args()

    # Set the experiment parameter
    PARAMS_FILE = str(Path(os.path.join(PARAMS_DIR, args.params)))
    params = load_params(PARAMS_FILE)
    params['mode'] = args.mode
    params['test_episodes'] = args.episodes
    params['random_seed'] = seed_generator(params['random_seed'], params['runs'])
    params['start_time'] = start_time

    # Set up directory structure if training
    #if params['mode'] == 'train':
    # Create experiment dir
    params['exp_dir'] = create_dir(
        Path(os.path.join(
            LOG_DIR, 
            params['env_type'], 
            params['env_name'],
            str(time.strftime("%Y-%m-%d_%H-%M")))))
    # Safe experiment parameters to log dir
Ejemplo n.º 29
0
def main():
    params = load_params()
    m = get_model(conv_units=params['model']['conv_units'])
    m.summary()

    training_images, training_labels, testing_images, testing_labels = read_dataset(
        DATASET_FILE)

    assert training_images.shape[0] + testing_images.shape[0] == 70000
    assert training_labels.shape[0] + testing_labels.shape[0] == 70000

    training_images = normalize(training_images)
    testing_images = normalize(testing_images)

    training_labels = tf.keras.utils.to_categorical(training_labels,
                                                    num_classes=10,
                                                    dtype="float32")
    testing_labels = tf.keras.utils.to_categorical(testing_labels,
                                                   num_classes=10,
                                                   dtype="float32")

    # We use the test set as validation for simplicity
    x_train = training_images
    x_valid = testing_images
    y_train = training_labels
    y_valid = testing_labels

    history = m.fit(
        x_train,
        y_train,
        batch_size=BATCH_SIZE,
        epochs=params["train"]["epochs"],
        verbose=1,
        validation_data=(x_valid, y_valid),
        callbacks=[DvcLiveCallback(model_file=f"{OUTPUT_DIR}/model.h5")],
    )

    metrics_dict = m.evaluate(
        testing_images,
        testing_labels,
        batch_size=BATCH_SIZE,
        return_dict=True,
    )

    with open(METRICS_FILE, "w") as f:
        f.write(json.dumps(metrics_dict))

    misclassified = {}

    # predictions for the confusion matrix
    y_prob = m.predict(x_valid)
    y_pred = y_prob.argmax(axis=-1)
    os.makedirs("plots")
    with open("plots/confusion.csv", "w") as f:
        f.write("actual,predicted\n")
        sx = y_valid.shape[0]
        for i in range(sx):
            actual = y_valid[i].argmax()
            predicted = y_pred[i]
            f.write(f"{actual},{predicted}\n")
            misclassified[(actual, predicted)] = x_valid[i]

    # find misclassified examples and generate a confusion table image
    confusion_out = create_image_matrix(misclassified)
    imageio.imwrite("plots/confusion.png", confusion_out)
Ejemplo n.º 30
0
import tensorflow as tf
import numpy as np
import util

_, vocab_to_int, int_to_vocab, token_dict = util.load_preprocess()
seq_length, load_dir = util.load_params()


def get_tensors(loaded_graph):
    """
    Get input, initial state, final state, and probabilities tensor from <loaded_graph>
    :param loaded_graph: TensorFlow graph loaded from file
    :return: Tuple (InputTensor, InitialStateTensor, FinalStateTensor, ProbsTensor)
    """
    with loaded_graph.as_default():
        inputs = tf.get_default_graph().get_tensor_by_name('input:0')
        initial_state = tf.get_default_graph().get_tensor_by_name(
            'initial_state:0')
        fianl_state = tf.get_default_graph().get_tensor_by_name(
            'final_state:0')
        prob = tf.get_default_graph().get_tensor_by_name('probs:0')
    return inputs, initial_state, fianl_state, prob


def pick_word(probabilities, int_to_vocab):
    """
    Pick the next word in the generated text
    :param probabilities: Probabilites of the next word
    :param int_to_vocab: Dictionary of word ids as the keys and words as the values
    :return: String of the predicted word
    """
Ejemplo n.º 31
0
import tensorflow as tf
import numpy as np
import util

# Batch Size
batch_size = 128

_, (source_vocab_to_int, target_vocab_to_int), (
    source_int_to_vocab,
    target_int_to_vocab) = util.load_preprocess('preprocess.p')
load_path_sup = util.load_params('params_actor_sup.p')
load_path_actor = util.load_params('params_actor_reinforce.p')

source_test = util.read_list('source_test.npy')
target_test = util.read_list('target_test.npy')

test_acc_list = []

loaded_graph_sup = tf.Graph()
loaded_graph_actor = tf.Graph()
sup_sess = tf.Session(graph=loaded_graph_sup)
actor_sess = tf.Session(graph=loaded_graph_actor)
with sup_sess.as_default():
    with loaded_graph_sup.as_default():
        # Load saved model and restore the saved variables
        loader = tf.train.import_meta_graph(load_path_sup + '.meta')
        loader.restore(sup_sess, load_path_sup)

        input_data = loaded_graph_sup.get_tensor_by_name('input:0')
        logits = loaded_graph_sup.get_tensor_by_name('predictions:0')
        target_sequence_length = loaded_graph_sup.get_tensor_by_name(
Ejemplo n.º 32
0
def train(dim_word=100,  # word vector dimensionality
          ctx_dim=512,  # context vector dimensionality
          dim=1000,  # the number of LSTM units
          attn_type='deterministic',  # [see section 4 from paper]
          n_layers_att=1,  # number of layers used to compute the attention weights
          n_layers_out=1,  # number of layers used to compute logit
          n_layers_lstm=1,  # number of lstm layers
          n_layers_init=1,  # number of layers to initialize LSTM at time 0
          lstm_encoder=False,  # if True, run bidirectional LSTM on input units
          prev2out=False,  # Feed previous word into logit
          ctx2out=False,  # Feed attention weighted ctx into logit
          alpha_entropy_c=0.002,  # hard attn param
          RL_sumCost=False,  # hard attn param
          semi_sampling_p=0.5,  # hard attn param
          temperature=1.,  # hard attn param
          patience=10,
          max_epochs=5000,
          dispFreq=100,
          decay_c=0.,  # weight decay coeff
          alpha_c=0.,  # doubly stochastic coeff
          lrate=0.01,  # used only for SGD
          selector=False,  # selector (see paper)
          n_words=10000,  # vocab size
          maxlen=100,  # maximum length of the description
          optimizer='rmsprop',
          batch_size = 16,
          valid_batch_size = 2,#change from 16
          saveto='model.npz',  # relative path of saved model file
          validFreq=1000,
          saveFreq=1000,  # save the parameters after every saveFreq updates
          sampleFreq=5,  # generate some samples after every sampleFreq updates
          data_path='./data',  # path to find data
          dataset='flickr30k',
          dictionary=None,  # word dictionary
          use_dropout=False,  # setting this true turns on dropout at various points
          use_dropout_lstm=False,  # dropout on lstm gates
          reload_=False,
          save_per_epoch=False): # this saves down the model every epoch

    # hyperparam dict
    model_options = locals().copy()
    model_options = validate_options(model_options)

    # reload options
    if reload_ and os.path.exists(saveto):
        print "Reloading options"
        with open('%s.pkl'%saveto, 'rb') as f:
            model_options = pkl.load(f)

    print "Using the following parameters:"
    print  model_options

    print 'Loading data'
    load_data, prepare_data = get_dataset(dataset)
    train, valid, test, worddict = load_data(path=data_path)

    # index 0 and 1 always code for the end of sentence and unknown token
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # Initialize (or reload) the parameters using 'model_options'
    # then build the Theano graph
    print 'Building model'
    params = init_params(model_options)
    if reload_ and os.path.exists(saveto):
        print "Reloading model"
        params = load_params(saveto, params)

    # numpy arrays -> theano shared variables
    tparams = init_tparams(params)

    # In order, we get:
    #   1) trng - theano random number generator
    #   2) use_noise - flag that turns on dropout
    #   3) inps - inputs for f_grad_shared
    #   4) cost - log likelihood for each sentence
    #   5) opts_out - optional outputs (e.g selector)
    trng, use_noise, \
          inps, alphas, alphas_sample,\
          cost, \
          opt_outs = \
          build_model(tparams, model_options)


    # To sample, we use beam search: 1) f_init is a function that initializes
    # the LSTM at time 0 [see top right of page 4], 2) f_next returns the distribution over
    # words and also the new "initial state/memory" see equation
    print 'Building sampler'
    f_init, f_next = build_sampler(tparams, model_options, use_noise, trng)

    # we want the cost without any the regularizers
    # define the log probability
    f_log_probs = theano.function(inps, -cost, profile=False,
                                        updates=opt_outs['attn_updates']
                                        if model_options['attn_type']=='stochastic'
                                        else None, allow_input_downcast=True)

    # Define the cost function + Regularization
    cost = cost.mean()
    # add L2 regularization costs
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # Doubly stochastic regularization
    if alpha_c > 0.:
        alpha_c = theano.shared(numpy.float32(alpha_c), name='alpha_c')
        alpha_reg = alpha_c * ((1.-alphas.sum(0))**2).sum(0).mean()
        cost += alpha_reg

    hard_attn_updates = []
    # Backprop!
    if model_options['attn_type'] == 'deterministic':
        grads = tensor.grad(cost, wrt=itemlist(tparams))
    else:
        # shared variables for hard attention
        baseline_time = theano.shared(numpy.float32(0.), name='baseline_time')
        opt_outs['baseline_time'] = baseline_time
        alpha_entropy_c = theano.shared(numpy.float32(alpha_entropy_c), name='alpha_entropy_c')
        alpha_entropy_reg = alpha_entropy_c * (alphas*tensor.log(alphas)).mean()
        # [see Section 4.1: Stochastic "Hard" Attention for derivation of this learning rule]
        if model_options['RL_sumCost']:
            grads = tensor.grad(cost, wrt=itemlist(tparams),
                                disconnected_inputs='raise',
                                known_grads={alphas:(baseline_time-opt_outs['masked_cost'].mean(0))[None,:,None]/10.*
                                            (-alphas_sample/alphas) + alpha_entropy_c*(tensor.log(alphas) + 1)})
        else:
            grads = tensor.grad(cost, wrt=itemlist(tparams),
                            disconnected_inputs='raise',
                            known_grads={alphas:opt_outs['masked_cost'][:,:,None]/10.*
                            (alphas_sample/alphas) + alpha_entropy_c*(tensor.log(alphas) + 1)})
        # [equation on bottom left of page 5]
        hard_attn_updates += [(baseline_time, baseline_time * 0.9 + 0.1 * opt_outs['masked_cost'].mean())]
        # updates from scan
        hard_attn_updates += opt_outs['attn_updates']

    # to getthe cost after regularization or the gradients, use this
    # f_cost = theano.function([x, mask, ctx], cost, profile=False)
    # f_grad = theano.function([x, mask, ctx], grads, profile=False)

    # f_grad_shared computes the cost and updates adaptive learning rate variables
    # f_update updates the weights of the model
    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost, hard_attn_updates)

    print 'Optimization'

    # [See note in section 4.3 of paper]
    train_iter = HomogeneousData(train, batch_size=batch_size, maxlen=maxlen)

    if valid:
        kf_valid = KFold(len(valid[0]), n_folds=len(valid[0])/valid_batch_size, shuffle=False)
    if test:
        kf_test = KFold(len(test[0]), n_folds=len(test[0])/valid_batch_size, shuffle=False)

    # history_errs is a bare-bones training log that holds the validation and test error
    history_errs = []
    # reload history
    if reload_ and os.path.exists(saveto):
        history_errs = numpy.load(saveto)['history_errs'].tolist()
    best_p = None
    bad_counter = 0

    if validFreq == -1:
        validFreq = len(train[0])/batch_size
    if saveFreq == -1:
        saveFreq = len(train[0])/batch_size
    if sampleFreq == -1:
        sampleFreq = len(train[0])/batch_size

    uidx = 0
    estop = False
    for eidx in xrange(max_epochs):
        n_samples = 0

        print 'Epoch ', eidx

        for caps in train_iter:
            n_samples += len(caps)
            uidx += 1
            # turn on dropout
            use_noise.set_value(1.)

            # preprocess the caption, recording the
            # time spent to help detect bottlenecks
            pd_start = time.time()
            x, mask, ctx = prepare_data(caps,
                                        train[1],
                                        worddict,
                                        maxlen=maxlen,
                                        n_words=n_words)
            pd_duration = time.time() - pd_start

            if x is None:
                print 'Minibatch with zero sample under length ', maxlen
                continue

            # get the cost for the minibatch, and update the weights
            ud_start = time.time()
            cost = f_grad_shared(x, mask, ctx)
            f_update(lrate)
            ud_duration = time.time() - ud_start # some monitoring for each mini-batch

            # Numerical stability check
            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'PD ', pd_duration, 'UD ', ud_duration

            # Checkpoint
            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',

                if best_p is not None:
                    params = copy.copy(best_p)
                else:
                    params = unzip(tparams)
                numpy.savez(saveto, history_errs=history_errs, **params)
                pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
                print 'Done'

            # Print a generated sample as a sanity check
            if numpy.mod(uidx, sampleFreq) == 0:
                # turn off dropout first
                use_noise.set_value(0.)
                x_s = x
                mask_s = mask
                ctx_s = ctx
                # generate and decode the a subset of the current training batch
                for jj in xrange(numpy.minimum(10, len(caps))):
                    sample, score = gen_sample(tparams, f_init, f_next, ctx_s[jj], model_options,
                                               trng=trng, k=5, maxlen=30, stochastic=False)
                    # Decode the sample from encoding back to words
                    print 'Truth ',jj,': ',
                    for vv in x_s[:,jj]:
                        if vv == 0:
                            break
                        if vv in word_idict:
                            print word_idict[vv],
                        else:
                            print 'UNK',
                    print
                    for kk, ss in enumerate([sample[0]]):
                        print 'Sample (', kk,') ', jj, ': ',
                        for vv in ss:
                            if vv == 0:
                                break
                            if vv in word_idict:
                                print word_idict[vv],
                            else:
                                print 'UNK',
                    print

            # Log validation loss + checkpoint the model with the best validation log likelihood
            if numpy.mod(uidx, validFreq) == 0:
                use_noise.set_value(0.)
                train_err = 0
                valid_err = 0
                test_err = 0

                if valid:
                    valid_err = -pred_probs(f_log_probs, model_options, worddict, prepare_data, valid, kf_valid).mean()
                if test:
                    test_err = -pred_probs(f_log_probs, model_options, worddict, prepare_data, test, kf_test).mean()

                history_errs.append([valid_err, test_err])

                # the model with the best validation long likelihood is saved seperately with a different name
                if uidx == 0 or valid_err <= numpy.array(history_errs)[:,0].min():
                    best_p = unzip(tparams)
                    print 'Saving model with best validation ll'
                    params = copy.copy(best_p)
                    params = unzip(tparams)
                    numpy.savez(saveto+'_bestll', history_errs=history_errs, **params)
                    bad_counter = 0

                # abort training if perplexity has been increasing for too long
                if eidx > patience and len(history_errs) > patience and valid_err >= numpy.array(history_errs)[:-patience,0].min():
                    bad_counter += 1
                    if bad_counter > patience:
                        print 'Early Stop!'
                        estop = True
                        break

                print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err

        print 'Seen %d samples' % n_samples

        if estop:
            break

        if save_per_epoch:
            numpy.savez(saveto + '_epoch_' + str(eidx + 1), history_errs=history_errs, **unzip(tparams))

    # use the best nll parameters for final checkpoint (if they exist)
    if best_p is not None:
        zipp(best_p, tparams)

    use_noise.set_value(0.)
    train_err = 0
    valid_err = 0
    test_err = 0
    if valid:
        valid_err = -pred_probs(f_log_probs, model_options, worddict, prepare_data, valid, kf_valid)
    if test:
        test_err = -pred_probs(f_log_probs, model_options, worddict, prepare_data, test, kf_test)

    print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err

    params = copy.copy(best_p)
    numpy.savez(saveto, zipped_params=best_p, train_err=train_err,
                valid_err=valid_err, test_err=test_err, history_errs=history_errs,
                **params)

    return train_err, valid_err, test_err
Ejemplo n.º 33
0
import tensorflow as tf
import numpy as np

import util

_, (source_vocab_to_int,
    target_vocab_to_int), (source_int_to_vocab,
                           target_int_to_vocab) = util.load_preprocess()
load_path = util.load_params()


# To feed a sentence into the model for translation, you first need to preprocess it
def sentence_to_seq(sentence, vocab_to_int):
    """
    Convert a sentence to a sequence of ids
    :param sentence: String
    :param vocab_to_int: Dictionary to go from the words to an id
    :return: List of word ids
    """
    sentence = sentence.lower()
    sentence_list = sentence.split()
    word_ids = []
    for val in sentence_list:
        word_ids.append(vocab_to_int.get(val, vocab_to_int['<UNK>']))
    return word_ids


# Translate
translate_sentence = 'he saw a old yellow truck .'

translate_sentence = sentence_to_seq(translate_sentence, source_vocab_to_int)
def train(
    dim_word=300,  # word vector dimensionality
    ctx_dim=300,  # context vector dimensionality
    semantic_dim=300,
    dim=1000,  # the number of LSTM units
    cnn_dim=4096,  # CNN feature dimension
    n_layers_att=1,  # number of layers used to compute the attention weights
    n_layers_out=1,  # number of layers used to compute logit
    n_layers_lstm=1,  # number of lstm layers
    n_layers_init=1,  # number of layers to initialize LSTM at time 0
    lstm_encoder=True,  # if True, run bidirectional LSTM on input units
    prev2out=False,  # Feed previous word into logit
    ctx2out=False,  # Feed attention weighted ctx into logit
    cutoff=10,
    patience=5,
    max_epochs=30,
    dispFreq=500,
    decay_c=0.,  # weight decay coeff
    alpha_c=0.,  # doubly stochastic coeff
    lrate=1e-4,  # used only for SGD
    selector=False,  # selector (see paper)
    maxlen=30,  # maximum length of the description
    optimizer='rmsprop',
    pretrained='',
    batch_size=256,
    saveto='model',  # relative path of saved model file
    saveFreq=1000,  # save the parameters after every saveFreq updates
    sampleFreq=100,  # generate some samples after every sampleFreq updates
    embedding='../Data/GloVe/vocab_glove.pkl',
    cnn_type='vgg',
    prefix='../Data',  # path to find data
    dataset='coco',
    criterion='Bleu_4',
    switch_test_val=False,
    use_cnninit=True,
    use_dropout=True,  # setting this true turns on dropout at various points
    use_dropout_lstm=False,  # dropout on lstm gates
    save_per_epoch=False):  # this saves down the model every epoch

    # hyperparam dict
    model_options = locals().copy()
    model_options = validate_options(model_options)

    # reload options
    if os.path.exists('%s.pkl' % saveto):
        print "Reloading options"
        with open('%s.pkl' % saveto, 'rb') as f:
            model_options = pkl.load(f)

    print "Using the following parameters:"
    print model_options

    print 'Loading data'
    load_data, prepare_data = get_dataset(model_options['dataset'])

    # Load data from data path
    if 'switch_test_val' in model_options and model_options['switch_test_val']:
        train, valid, worddict = load_data(path=osp.join(
            model_options['prefix'], model_options['dataset']),
                                           options=model_options,
                                           load_train=True,
                                           load_test=True)
    else:
        train, valid, worddict = load_data(path=osp.join(
            model_options['prefix'], model_options['dataset']),
                                           options=model_options,
                                           load_train=True,
                                           load_val=True)

    # Automatically calculate the update frequency
    validFreq = len(train[0]) / model_options['batch_size']
    print "Validation frequency is %d" % validFreq

    word_idict = {vv: kk for kk, vv in worddict.iteritems()}
    model_options['n_words'] = len(worddict)

    # Initialize (or reload) the parameters using 'model_options'
    # then build the Theano graph
    print 'Building model'
    params = init_params(model_options)
    # Initialize it with glove
    if 'VCemb' in params:
        params['VCemb'] = read_pkl(
            model_options['embedding']).astype('float32')

    # If there is a same experiment, don't use pretrained weights
    if os.path.exists('%s.npz' % saveto):
        print "Reloading model"
        params = load_params('%s.npz' % saveto, params)
    elif pretrained != '':
        params = load_params(pretrained, params,
                             False)  # Only pretrain the Language model

    # numpy arrays -> theano shared variables
    tparams = init_tparams(params)

    # In order, we get:
    #   1) trng - theano random number generator
    #   2) use_noise - flag that turns on dropout
    #   3) inps - inputs for f_grad_shared
    #   4) cost - log likelihood for each sentence
    #   5) opts_out - optional outputs (e.g selector)
    trng, use_noise, \
          inps, alphas,\
          cost, \
          opt_outs = \
          build_model(tparams, model_options)

    # Load evaluator to calculate bleu score
    evaluator = cocoEvaluation(model_options['dataset'])

    # To sample, we use beam search: 1) f_init is a function that initializes
    # the LSTM at time 0 [see top right of page 4], 2) f_next returns the distribution over
    # words and also the new "initial state/memory" see equation
    print 'Building sampler'
    f_init, f_next = build_sampler(tparams, model_options, use_noise, trng)

    # we want the cost without any the regularizers
    # define the log probability
    f_log_probs = theano.function(inps,
                                  -cost,
                                  profile=False,
                                  updates=None,
                                  allow_input_downcast=True)

    # Define the cost function + Regularization
    cost = cost.mean()
    # add L2 regularization costs
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv**2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # Doubly stochastic regularization
    if alpha_c > 0.:
        alpha_c = theano.shared(numpy.float32(alpha_c), name='alpha_c')
        alpha_reg = sum([
            alpha_c * ((1. - alpha.sum(0))**2).sum(0).mean()
            for alpha in alphas
        ])
        cost += alpha_reg

    # Backprop!
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    # to getthe cost after regularization or the gradients, use this

    # f_grad_shared computes the cost and updates adaptive learning rate variables
    # f_update updates the weights of the model
    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams,
                                                               grads, inps,
                                                               cost)

    print 'Optimization'
    train_iter = HomogeneousData(train,
                                 batch_size=batch_size,
                                 maxlen=model_options['maxlen'])

    # history_bleu is a bare-bones training log, reload history
    history_bleu = []
    if os.path.exists('%s.npz' % saveto):
        history_bleu = numpy.load('%s.npz' % saveto)['history_bleu'].tolist()
    start_epochs = len(history_bleu)
    best_p = None
    bad_counter = 0

    if validFreq == -1:
        validFreq = len(train[0]) / batch_size
    if saveFreq == -1:
        saveFreq = len(train[0]) / batch_size
    if sampleFreq == -1:
        sampleFreq = len(train[0]) / batch_size

    uidx = 0
    estop = False
    for eidx in xrange(start_epochs, model_options['max_epochs']):
        n_samples = 0

        print 'Epoch ', eidx

        for caps in train_iter:
            n_samples += len(caps)
            uidx += 1
            # turn on dropout
            use_noise.set_value(1.)

            # preprocess the caption, recording the
            # time spent to help detect bottlenecks
            pd_start = time.time()
            x, mask, ctx, cnn_feats = prepare_data(caps, train[1], train[2],
                                                   worddict, model_options)
            pd_duration = time.time() - pd_start

            if x is None:
                print 'Minibatch with zero sample under length ', model_options[
                    'maxlen']
                continue

            # get the cost for the minibatch, and update the weights
            ud_start = time.time()
            cost = f_grad_shared(x, mask, ctx, cnn_feats)

            print "Epoch %d, Updates: %d, Cost is: %f" % (eidx, uidx, cost)

            f_update(model_options['lrate'])
            ud_duration = time.time(
            ) - ud_start  # some monitoring for each mini-batch

            # Numerical stability check
            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'PD ', pd_duration, 'UD ', ud_duration

            # Print a generated sample as a sanity check
            if numpy.mod(uidx, model_options['sampleFreq']) == 0:
                # turn off dropout first
                use_noise.set_value(0.)
                x_s = x
                mask_s = mask
                ctx_s = ctx
                # generate and decode the a subset of the current training batch
                for jj in xrange(numpy.minimum(10, len(caps))):
                    sample, score, alphas = gen_sample(
                        f_init,
                        f_next,
                        ctx_s[jj],
                        cnn_feats[jj],
                        model_options,
                        trng=trng,
                        maxlen=model_options['maxlen'])
                    # Decode the sample from encoding back to words
                    print 'Truth ', jj, ': ',
                    print seqs2words(x_s[:, jj], word_idict)
                    for kk, ss in enumerate([sample[0]]):
                        print 'Sample (', kk, ') ', jj, ': ',
                        print seqs2words(ss, word_idict)

            # Log validation loss + checkpoint the model with the best validation log likelihood
            if numpy.mod(uidx, validFreq) == 0:
                use_noise.set_value(0.)

                # Do evaluation on validation set
                imgid = collapse([elem[-1] for elem in valid[0]])
                caps = process_examples([f_init], [f_next], imgid, valid[1],
                                        valid[2], word_idict, model_options)
                folder = osp.join('../output', '%s_%s' % (saveto, 'val'))
                if not osp.exists(folder):
                    os.mkdir(folder)
                with open(osp.join(folder, 'captions_val2014_results.json'),
                          'w') as f:
                    json.dump(caps, f)
                eva_result = evaluator.evaluate(folder, False)
                if model_options['criterion'] == 'combine':
                    history_bleu.append(eva_result['Bleu_4'] +
                                        eva_result['CIDEr'])
                else:
                    history_bleu.append(eva_result[model_options['criterion']])

                # the model with the best validation long likelihood is saved seperately with a different name
                if uidx == 0 or history_bleu[-1] == max(history_bleu):
                    best_p = unzip(tparams)
                    print 'Saving model with best validation ll'
                    params = copy.copy(best_p)
                    params = unzip(tparams)
                    numpy.savez(saveto + '_bestll',
                                history_bleu=history_bleu,
                                **params)
                    bad_counter = 0

                # abort training if perplexity has been increasing for too long
                if len(history_bleu) > model_options[
                        'patience'] and history_bleu[-1] <= max(
                            history_bleu[:-model_options['patience']]):
                    bad_counter += 1
                    if bad_counter > model_options['patience']:
                        print 'Early Stop!'
                        estop = True
                        break

                print ' BLEU-4 score ', history_bleu[-1]

            # Checkpoint
            if numpy.mod(uidx, model_options['saveFreq']) == 0:
                print 'Saving...',

                if best_p is not None:
                    params = copy.copy(best_p)
                else:
                    params = unzip(tparams)
                numpy.savez(saveto, history_bleu=history_bleu, **params)
                pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'))
                print 'Done'

        print 'Seen %d samples' % n_samples

        if estop:
            break

        if model_options['save_per_epoch']:
            numpy.savez(saveto + '_epoch_' + str(eidx + 1),
                        history_bleu=history_bleu,
                        **unzip(tparams))

    # use the best nll parameters for final checkpoint (if they exist)
    if best_p is not None:
        zipp(best_p, tparams)
    params = copy.copy(best_p)
    numpy.savez(saveto,
                zipped_params=best_p,
                history_bleu=history_bleu,
                **params)
Ejemplo n.º 35
0
def main(model, saveto, k=5, normalize=False, zero_pad=False, datasets='dev,test', sampling=False, pkl_name=None):
# load model model_options
    if pkl_name is None:
        pkl_name = model
    with open('%s.pkl'% pkl_name, 'rb') as f:
        options = pkl.load(f)

    # fetch data, skip ones we aren't using to save time
    load_data, prepare_data = get_dataset(options['dataset'])
    train, valid, test, worddict = load_data(path='./data/coco/', load_train=True if 'train' in datasets else False,
                                             load_dev=True if 'dev' in datasets else False,
                                             load_test=True if 'test' in datasets else False)
    
    # import pdb; pdb.set_trace()
    # <eos> means end of sequence (aka periods), UNK means unknown
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # get the parameters
    params = init_params(options)
    params = load_params(model, params)
    tparams = init_tparams(params)

    # index -> words
    def _seqs2words(caps):
        capsw = []
        for cc in caps:
            ww = []
            for w in cc:
                if w == 0:
                    break
                ww.append(word_idict[w])
            capsw.append(' '.join(ww))
        return capsw

    # process all dev examples
    def _process_examples(contexts, params, tparams):
        caps = [None] * contexts.shape[0]
        for idx, ctx in enumerate(contexts):
            cc = ctx.todense().reshape([5,4122])
            #cc = ctx.todense().reshape([14*14,512])
            if zero_pad:
                cc0 = numpy.zeros((cc.shape[0]+1, cc.shape[1])).astype('float32')
                cc0[:-1,:] = cc
            else:
                cc0 = cc
            resp = gen_model(idx, cc0, model, options, k, normalize, word_idict, sampling, params, tparams)
            caps[resp[0]] = resp[1]
            print 'Sample ', (idx+1), '/', contexts.shape[0], ' Done'
            print resp[1]
            sys.stdout.flush()
        return caps

    ds = datasets.strip().split(',')

    # send all the features for the various datasets
    for dd in ds:
        if dd == 'train':
            print 'Training Set...',
            new_train = train[1][:2000]
            caps = _seqs2words(_process_examples(new_train, params, tparams))
            # import pdb; pdb.set_trace()
            with open(saveto+'.train.txt', 'w') as f:
                print >>f, '\n'.join(caps)
            print 'Done'
        if dd == 'dev':
            print 'Development Set...',
            caps = _seqs2words(_process_examples(valid[1], params, tparams))
            # import pdb; pdb.set_trace()
            with open(saveto+'.dev.txt', 'w') as f:
                print >>f, '\n'.join(caps)
            print 'Done'
        if dd == 'test':
            print 'Test Set...',
            caps = _seqs2words(_process_examples(test[1][:1000], params, tparams)) # subset
            with open(saveto+'.test.txt', 'w') as f:
                print >>f, '\n'.join(caps)
            print 'Done'
Ejemplo n.º 36
0
def train(args, model_args):

    #model_id = '/data/lisatmp4/lambalex/lsun_walkback/walkback_'

    model_id = '/data/lisatmp4/anirudhg/cifar_walk_back/walkback_'
    model_dir = create_log_dir(args, model_id)
    model_id2 = 'logs/walkback_'
    model_dir2 = create_log_dir(args, model_id2)
    print model_dir
    print model_dir2 + '/' + 'log.jsonl.gz'
    logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz',
                          formatter=None)

    # TODO batches_per_epoch should not be hard coded
    lrate = args.lr
    import sys
    sys.setrecursionlimit(10000000)
    args, model_args = parse_args()

    #trng = RandomStreams(1234)

    if args.resume_file is not None:
        print "Resuming training from " + args.resume_file
        from blocks.scripts import continue_training
        continue_training(args.resume_file)

    ## load the training data
    if args.dataset == 'MNIST':
        print 'loading MNIST'
        from fuel.datasets import MNIST
        dataset_train = MNIST(['train'], sources=('features', ))
        dataset_test = MNIST(['test'], sources=('features', ))
        n_colors = 1
        spatial_width = 28

    elif args.dataset == 'CIFAR10':
        from fuel.datasets import CIFAR10
        dataset_train = CIFAR10(['train'], sources=('features', ))
        dataset_test = CIFAR10(['test'], sources=('features', ))
        n_colors = 3
        spatial_width = 32

    elif args.dataset == "lsun" or args.dataset == "lsunsmall":

        print "loading lsun class!"

        from load_lsun import load_lsun

        print "loading lsun data!"

        if args.dataset == "lsunsmall":
            dataset_train, dataset_test = load_lsun(args.batch_size,
                                                    downsample=True)
            spatial_width = 32
        else:
            dataset_train, dataset_test = load_lsun(args.batch_size,
                                                    downsample=False)
            spatial_width = 64

        n_colors = 3

    elif args.dataset == "celeba":

        print "loading celeba data"

        from fuel.datasets.celeba import CelebA

        dataset_train = CelebA(which_sets=['train'],
                               which_format="64",
                               sources=('features', ),
                               load_in_memory=False)
        dataset_test = CelebA(which_sets=['test'],
                              which_format="64",
                              sources=('features', ),
                              load_in_memory=False)

        spatial_width = 64
        n_colors = 3

        tr_scheme = SequentialScheme(examples=dataset_train.num_examples,
                                     batch_size=args.batch_size)
        ts_scheme = SequentialScheme(examples=dataset_test.num_examples,
                                     batch_size=args.batch_size)

        train_stream = DataStream.default_stream(dataset_train,
                                                 iteration_scheme=tr_scheme)
        test_stream = DataStream.default_stream(dataset_test,
                                                iteration_scheme=ts_scheme)

        dataset_train = train_stream
        dataset_test = test_stream

        #epoch_it = train_stream.get_epoch_iterator()

    elif args.dataset == 'Spiral':
        print 'loading SPIRAL'
        train_set = Spiral(num_examples=100000,
                           classes=1,
                           cycles=2.,
                           noise=0.01,
                           sources=('features', ))
        dataset_train = DataStream.default_stream(
            train_set,
            iteration_scheme=ShuffledScheme(train_set.num_examples,
                                            args.batch_size))

    else:
        raise ValueError("Unknown dataset %s." % args.dataset)

    model_options = locals().copy()

    if args.dataset != 'lsun' and args.dataset != 'celeba':
        train_stream = Flatten(
            DataStream.default_stream(
                dataset_train,
                iteration_scheme=ShuffledScheme(
                    examples=dataset_train.num_examples -
                    (dataset_train.num_examples % args.batch_size),
                    batch_size=args.batch_size)))
    else:
        train_stream = dataset_train
        test_stream = dataset_test

    print "Width", WIDTH, spatial_width

    shp = next(train_stream.get_epoch_iterator())[0].shape

    print "got epoch iterator"

    # make the training data 0 mean and variance 1
    # TODO compute mean and variance on full dataset, not minibatch
    Xbatch = next(train_stream.get_epoch_iterator())[0]
    scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2))
    shft = -np.mean(Xbatch * scl)
    # scale is applied before shift
    #train_stream = ScaleAndShift(train_stream, scl, shft)
    #test_stream = ScaleAndShift(test_stream, scl, shft)

    print 'Building model'
    params = init_params(model_options)
    if args.reload_:
        print "Trying to reload parameters"
        if os.path.exists(args.saveto_filename):
            print 'Reloading Parameters'
            print args.saveto_filename
            params = load_params(args.saveto_filename, params)
    tparams = init_tparams(params)
    print tparams
    '''
    x = T.matrix('x', dtype='float32')
    temp  = T.scalar('temp', dtype='float32')
    f=transition_operator(tparams, model_options, x, temp)

    for data in train_stream.get_epoch_iterator():
        print data[0]
        a = f([data[0], 1.0, 1])
        #ipdb.set_trace()
    '''
    x, cost, start_temperature = build_model(tparams, model_options)
    inps = [x, start_temperature]

    x_Data = T.matrix('x_Data', dtype='float32')
    temperature = T.scalar('temperature', dtype='float32')
    forward_diffusion = one_step_diffusion(x_Data, model_options, tparams,
                                           temperature)

    #print 'Building f_cost...',
    #f_cost = theano.function(inps, cost)
    #print 'Done'
    print tparams
    grads = T.grad(cost, wrt=itemlist(tparams))

    #get_grads = theano.function(inps, grads)

    for j in range(0, len(grads)):
        grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]),
                            grads[j])

    # compile the optimizer, the actual computational graph is compiled here
    lr = T.scalar(name='lr')
    print 'Building optimizers...',
    optimizer = args.optimizer

    f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams,
                                                             grads, inps, cost)
    print 'Done'

    for param in tparams:
        print param
        print tparams[param].get_value().shape

    print 'Buiding Sampler....'
    f_sample = sample(tparams, model_options)
    print 'Done'

    uidx = 0
    estop = False
    bad_counter = 0
    max_epochs = 4000
    batch_index = 1
    print 'Number of steps....'
    print args.num_steps
    print "Number of metasteps...."
    print args.meta_steps
    print 'Done'
    count_sample = 1
    for eidx in xrange(max_epochs):
        if eidx % 20 == 0:
            params = unzip(tparams)
            save_params(params,
                        model_dir + '/' + 'params_' + str(eidx) + '.npz')
        n_samples = 0
        print 'Starting Next Epoch ', eidx
        for data in train_stream.get_epoch_iterator():

            if args.dataset == 'CIFAR10':
                if data[0].shape[0] == args.batch_size:
                    data_use = (data[0].reshape(args.batch_size,
                                                3 * 32 * 32), )
                else:
                    continue
            t0 = time.time()
            batch_index += 1
            n_samples += len(data_use[0])
            uidx += 1
            if data_use[0] is None:
                print 'No data '
                uidx -= 1
                continue
            ud_start = time.time()

            t1 = time.time()

            data_run = data_use[0]
            temperature_forward = args.temperature
            meta_cost = []
            for meta_step in range(0, args.meta_steps):
                meta_cost.append(f_grad_shared(data_run, temperature_forward))
                f_update(lrate)
                if args.meta_steps > 1:
                    data_run, sigma, _, _ = forward_diffusion(
                        [data_run, temperature_forward, 1])
                    temperature_forward *= args.temperature_factor
            cost = sum(meta_cost) / len(meta_cost)

            ud = time.time() - ud_start

            #gradient_updates_ = get_grads(data_use[0],args.temperature)

            if np.isnan(cost) or np.isinf(cost):
                print 'NaN detected'
                return 1.
            t1 = time.time()
            #print time.time() - t1, "time to get grads"
            t1 = time.time()
            logger.log({
                'epoch': eidx,
                'batch_index': batch_index,
                'uidx': uidx,
                'training_error': cost
            })
            #'Norm_1': np.linalg.norm(gradient_updates_[0]),
            #'Norm_2': np.linalg.norm(gradient_updates_[1]),
            #'Norm_3': np.linalg.norm(gradient_updates_[2]),
            #'Norm_4': np.linalg.norm(gradient_updates_[3])})
            #print time.time() - t1, "time to log"

            #print time.time() - t0, "total time in batch"
            t5 = time.time()

            if batch_index % 20 == 0:
                print batch_index, "cost", cost

            if batch_index % 200 == 0:
                count_sample += 1
                temperature = args.temperature * (args.temperature_factor**(
                    args.num_steps * args.meta_steps - 1))
                temperature_forward = args.temperature

                for num_step in range(args.num_steps * args.meta_steps):
                    print "Forward temperature", temperature_forward
                    if num_step == 0:
                        x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion(
                            [data_use[0], temperature_forward, 1])
                        x_data = np.asarray(x_data).astype('float32').reshape(
                            args.batch_size, INPUT_SIZE)
                        x_temp = x_data.reshape(args.batch_size, n_colors,
                                                WIDTH, WIDTH)
                        plot_images(
                            x_temp, model_dir + '/' + "batch_" +
                            str(batch_index) + '_corrupted' + 'epoch_' +
                            str(count_sample) + '_time_step_' + str(num_step))
                    else:
                        x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion(
                            [x_data, temperature_forward, 1])
                        x_data = np.asarray(x_data).astype('float32').reshape(
                            args.batch_size, INPUT_SIZE)
                        x_temp = x_data.reshape(args.batch_size, n_colors,
                                                WIDTH, WIDTH)
                        plot_images(
                            x_temp, model_dir + '/batch_' + str(batch_index) +
                            '_corrupted' + '_epoch_' + str(count_sample) +
                            '_time_step_' + str(num_step))

                    temperature_forward = temperature_forward * args.temperature_factor

                x_temp2 = data_use[0].reshape(args.batch_size, n_colors, WIDTH,
                                              WIDTH)
                plot_images(
                    x_temp2, model_dir + '/' + 'orig_' + 'epoch_' + str(eidx) +
                    '_batch_index_' + str(batch_index))

                temperature = args.temperature * (args.temperature_factor**(
                    args.num_steps * args.meta_steps - 1))

                for i in range(args.num_steps * args.meta_steps +
                               args.extra_steps):
                    x_data, sampled, sampled_activation, sampled_preactivation = f_sample(
                        [x_data, temperature, 0])
                    print 'On backward step number, using temperature', i, temperature
                    reverse_time(
                        scl, shft, x_data, model_dir + '/' + "batch_" +
                        str(batch_index) + '_samples_backward_' + 'epoch_' +
                        str(count_sample) + '_time_step_' + str(i))
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor

                if args.noise == "gaussian":
                    x_sampled = np.random.normal(
                        0.5, 2.0,
                        size=(args.batch_size, INPUT_SIZE)).clip(0.0, 1.0)
                else:
                    s = np.random.binomial(1, 0.5, INPUT_SIZE)

                temperature = args.temperature * (args.temperature_factor**(
                    args.num_steps * args.meta_steps - 1))

                x_data = np.asarray(x_sampled).astype('float32')
                for i in range(args.num_steps * args.meta_steps +
                               args.extra_steps):
                    x_data, sampled, sampled_activation, sampled_preactivation = f_sample(
                        [x_data, temperature, 0])
                    print 'On step number, using temperature', i, temperature
                    reverse_time(
                        scl, shft, x_data, model_dir + '/batch_index_' +
                        str(batch_index) + '_inference_' + 'epoch_' +
                        str(count_sample) + '_step_' + str(i))
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor

    ipdb.set_trace()