예제 #1
0
def search_model_adam(state, channel, reload_model=False):

    pp.pprint(state)

    def NReLU(x, rng=None, use_noise=False):
        assert rng is not None
        if use_noise:
            stds = Sigmoid(x)
            x = x + rng.normal(x.shape, avg=0.0, std=stds, dtype=x.dtype)
        return Trect(x)

    def NRect(x, rng=None, use_noise=False, std=0.05):
        assert rng is not None
        if use_noise:
            x = x + rng.normal(x.shape, avg=0.0, std=std, dtype=x.dtype)
        return Trect(x)

    def get_inps(use_mask=True,
                 vgen=None,
                 use_bow_out=False,
                 debug=False,
                 output_map=None):
        if use_mask:
            X, y, mask, cmask = TT.itensor3("X"), TT.imatrix("y"), TT.fmatrix("mask"), \
                    TT.fmatrix("cost_mask")
            qmask = TT.fmatrix("qmask")
            bow_out = TT.ftensor3("bow_out")

            if debug:
                theano.config.compute_test_value = "warn"
                batch = vgen.next()
                X.tag.test_value = batch['x'].astype("int32")
                y.tag.test_value = batch['y'].astype("int32")
                mask.tag.test_value = batch['mask'].astype("float32")
                cmask.tag.test_value = batch['cmask'].astype("float32")
                qmask.tag.test_value = batch["qmask"].astype("float32")
                if use_bow_out:
                    bow_out.tag.test_value = batch['bow_out'].astype("float32")

            if output_map:
                outs = {}
                outs["X"] = X
                outs["y"] = y
                outs["mask"] = mask
                outs["cmask"] = cmask
                if use_bow_out:
                    outs["bow_out"] = bow_out
                outs["qmask"] = qmask
            else:
                outs = [X, y, mask, cmask]
                if use_bow_out:
                    outs += [bow_out]
            return outs
        else:
            X, y = TT.itensor3("X"), TT.itensor3("y")
            if debug:
                theano.config.compute_test_value = "warn"
                batch = vgen.next()
                X.tag.test_value = batch['x']
                y.tag.test_value = batch['y']
            return [X, y]

    lr = state.lr
    batch_size = state.batch_size

    seed = state.get("seed", 3)
    seed_path = "{0}/seed_{1}.txt".format(state.save_path, str(seed))
    replace_seed(seed_path, seed)
    seed_setter = SEEDSetter(seed_path)
    print "seed is", seed_setter

    # No of els in the cols of the content for the memory
    mem_size = state.mem_size

    # No of rows in M
    mem_nel = state.mem_nel
    std = state.std
    renormalization_scale = state.renormalization_scale
    sub_mb_size = state.sub_mb_size
    smoothed_diff_weights = state.get('smoothed_diff_weights', True)

    # No of hids for controller
    n_hids = state.n_hids

    # Not using deep out
    deep_out_size = 100

    # Size of the bow embeddings
    bow_size = state.get('bow_size', 80)

    # ff controller
    use_ff_controller = state.use_ff_controller

    # For RNN controller:
    learn_h0 = state.get('learn_h0', False)
    use_nogru_mem2q = False

    # Use loc based addressing:
    use_loc_based_addressing = state.get('use_loc_based_addressing', False)
    bowout = state.get('bowout', True)
    use_reinforce = state.get('use_reinforce', False)

    max_seq_len = state.max_seq_len
    max_fact_len = state.max_fact_len

    n_read_heads = state.n_read_heads
    n_write_heads = 1
    n_reading_steps = state.n_reading_steps

    lambda1_rein = state.get('lambda1_rein', 4e-5)
    lambda2_rein = state.get('lambda2_rein', 1e-5)
    base_reg = 2e-5

    #size of the address in the memory:
    address_size = state.address_size
    renormalization_scale = state.renormalization_scale
    w2v_embed_scale = 0.05
    use_layer_norm = state.get('use_layer_norm', False)

    rng = np.random.RandomState(int(seed))
    trng = RandomStreams(int(seed))
    NRect = lambda x, use_noise=False: NRect(
        x, rng=trng, use_noise=use_noise, std=std)
    use_noise = False
    emb_scale = state.get('emb_scale', 0.32)

    use_quad_interactions = state.get('use_quad_interactions', True)

    mode = state.get('theano_function_mode', None)
    import sys
    sys.setrecursionlimit(50000)

    learning_rule = Adam(gradient_clipping=state.get('gradient_clip', 10))
    task_id = state.task_id
    print "Task id is, ", task_id
    cont_act = Tanh
    mem_gater_activ = Sigmoid
    erase_activ = Sigmoid
    content_activ = Tanh
    use_gru_inp = state.get('use_gru_inp', True)
    use_bow_inp = state.get('use_bow_inp', False)

    w2v_embed_path = None
    use_reinforce_baseline = state.use_reinforce_baseline
    use_reinforce = state.get('use_reinforce', False)
    l1_pen = state.get('l1_pen', 1e-4)
    l2_pen = state.get('l2_pen', 1e-3)
    hybrid_att = state.get('hybrid_att', False)
    use_dice_val = state.get('use_dice_val', False)
    debug = state.get('debug', False)
    correlation_ws = state.get('correlation_ws', 6e-4)
    anticorr = state.get('anticorr', None)
    path = state.path
    prfx = (
        "ntm_on_fb_BABI_task_%(task_id)d_seed_%(seed)s_learn_h0_l1_no_n_hids_%(n_hids)s_bsize_%(batch_size)d"
        "_std_%(std)f_mem_nel_%(mem_nel)d_mem_size_%(mem_size)f_lr_%(lr)f"
    ) % locals()

    prfx = state.save_path + prfx
    tdata_gen = FBbABIDataIteratorSingleQ(
        task_file='all_tasks_train_ngram_False.pkl',
        randomize=True,
        max_seq_len=max_seq_len,
        max_fact_len=max_fact_len,
        task_id=task_id,
        task_path=path,
        mode='train',
        fact_vocab="../all_tasks_test_ngram_False_dict.pkl",
        batch_size=batch_size)

    vdata_gen = FBbABIDataIteratorSingleQ(
        task_file='all_tasks_valid_ngram_False.pkl',
        max_fact_len=tdata_gen.max_fact_len,
        max_seq_len=max_seq_len,
        randomize=False,
        task_id=task_id,
        mode="valid",
        task_path=path,
        fact_vocab="../all_tasks_test_ngram_False_dict.pkl",
        batch_size=batch_size)

    tst_data_gen = FBbABIDataIteratorSingleQ(
        task_file='../all_tasks_test_ngram_False.pkl',
        max_fact_len=tdata_gen.max_fact_len,
        max_seq_len=max_seq_len,
        randomize=False,
        task_id=task_id,
        mode="valid",
        task_path=path,
        fact_vocab="../all_tasks_test_ngram_False_dict.pkl",
        batch_size=batch_size)

    use_mask = True
    n_layers = state.get('n_layers', 1)
    inps = get_inps(vgen=vdata_gen,
                    debug=debug,
                    use_bow_out=bowout,
                    output_map=True)

    wi = WeightInitializer(sparsity=-1,
                           scale=std,
                           rng=rng,
                           init_method=InitMethods.Adaptive,
                           center=0.0)

    bi = BiasInitializer(sparsity=-1,
                         scale=std,
                         rng=rng,
                         init_method=BiasInitMethods.Constant,
                         center=0.0)

    print "Length of the vocabulary, ", len(tdata_gen.vocab.items())
    ntm = NTMModel(n_in=len(tdata_gen.vocab.items()),
                   n_hids=n_hids,
                   bow_size=bow_size,
                   n_out=len(tdata_gen.vocab.items()),
                   predict_bow_out=bowout,
                   mem_size=mem_size,
                   mem_nel=mem_nel,
                   use_ff_controller=use_ff_controller,
                   sub_mb_size=sub_mb_size,
                   deep_out_size=deep_out_size,
                   inps=inps,
                   n_layers=n_layers,
                   hybrid_att=hybrid_att,
                   smoothed_diff_weights=smoothed_diff_weights,
                   baseline_reg=base_reg,
                   w2v_embed_path=w2v_embed_path,
                   renormalization_scale=renormalization_scale,
                   w2v_embed_scale=w2v_embed_scale,
                   emb_scale=emb_scale,
                   n_read_heads=n_read_heads,
                   n_write_heads=n_write_heads,
                   use_layer_norm=use_layer_norm,
                   use_last_hidden_state=False,
                   use_loc_based_addressing=use_loc_based_addressing,
                   use_simple_rnn_inp_rep=False,
                   use_gru_inp_rep=use_gru_inp,
                   use_bow_input=use_bow_inp,
                   anticorr=anticorr,
                   erase_activ=erase_activ,
                   use_gate_quad_interactions=use_quad_interactions,
                   content_activ=content_activ,
                   use_multiscale_shifts=True,
                   correlation_ws=correlation_ws,
                   learning_rule=learning_rule,
                   lambda1_rein=lambda1_rein,
                   lambda2_rein=lambda2_rein,
                   n_reading_steps=n_reading_steps,
                   use_deepout=False,
                   use_reinforce=use_reinforce,
                   use_nogru_mem2q=use_nogru_mem2q,
                   use_reinforce_baseline=use_reinforce_baseline,
                   controller_activ=cont_act,
                   use_adv_indexing=False,
                   use_out_mem=False,
                   unroll_recurrence=False,
                   address_size=address_size,
                   reinforce_decay=0.9,
                   learn_h0=learn_h0,
                   theano_function_mode=mode,
                   l1_pen=l1_pen,
                   debug=debug,
                   mem_gater_activ=mem_gater_activ,
                   tie_read_write_gates=False,
                   weight_initializer=wi,
                   bias_initializer=bi,
                   use_cost_mask=True,
                   use_noise=use_noise,
                   max_fact_len=max_fact_len,
                   softmax=True,
                   use_mask=use_mask,
                   batch_size=batch_size)

    bow_weight_stop = state.get('bow_weight_stop', 1.2 * 1e-1)
    bow_weight_anneal_start = state.get('bow_weight_anneal_start', 320)
    bow_weight_start = state.get("bow_weight_start", 0.74)
    bow_out_anneal_rate = state.get("bow_out_anneal_rate", 2 * 1e-4)
    save_freq = state.get("save_freq", 1000)

    main_loop = FBaBIMainLoop(ntm,
                              print_every=50,
                              checkpoint_every=save_freq,
                              validate_every=500,
                              bow_out_anneal_rate=bow_out_anneal_rate,
                              bow_weight_start=bow_weight_start,
                              bow_weight_stop=bow_weight_stop,
                              bow_weight_anneal_start=bow_weight_anneal_start,
                              train_data_gen=tdata_gen,
                              valid_data_gen=vdata_gen,
                              test_data_gen=tst_data_gen,
                              learning_rate=lr,
                              reload_model=reload_model,
                              valid_iters=None,
                              linear_start=False,
                              use_qmask=True,
                              max_iters=state.max_iters,
                              state=state,
                              prefix=prfx)
    main_loop.run()

    if channel is None:
        return None
    return channel.COMPLETE
예제 #2
0
def search_model_adam(state, channel, reload_model=False):

    pp.pprint(state)

    def NReLU(x, rng=None, use_noise=False):
        assert rng is not None
        if use_noise:
            stds = Sigmoid(x)
            x = x + rng.normal(x.shape, avg=0.0, std=stds, dtype=x.dtype)
        return Trect(x)

    def get_inps(vgen=None, debug=False, output_map=None):

        X, y = TT.fmatrix("X"), TT.vector("y", dtype="uint8")

        if debug:
            theano.config.compute_test_value = "warn"
            batch = vgen.get_epoch_iterator().next()
            X.tag.test_value = batch[0].reshape((batch[0].shape[0], -1))
            y.tag.test_value = batch[1].flatten()
        return [X, y]

    lr = state['lr']
    batch_size = state['batch_size']

    # No of els in the cols of the content for the memory
    mem_size = state['mem_size']

    # No of rows in M
    mem_nel = state['mem_nel']
    std = state['std']
    renormalization_scale = state['renormalization_scale']
    sub_mb_size = state['sub_mb_size']
    smoothed_diff_weights = state.get('smoothed_diff_weights', False)

    max_len = 784
    inp_size = 1

    # No of hids for controller
    n_hids = state['n_hids']

    # Not using deep out
    deep_out_size = 100

    # Size of the bow embeddings
    bow_size = state.get('bow_size', 80)

    # ff controller
    use_ff_controller = state['use_ff_controller']

    # For RNN controller:
    learn_h0 = state.get('learn_h0', False)
    use_nogru_mem2q = False

    # Use loc based addressing:
    use_loc_based_addressing = state.get('use_loc_based_addressing', False)
    bowout = state.get('bowout', False)
    use_reinforce = state.get('use_reinforce', False)
    permute_order = state.get('permute_order', True)

    seed = 7
    n_read_heads = state['n_read_heads']
    n_write_heads = 1
    n_reading_steps = state['n_reading_steps']

    lambda1_rein = state.get('lambda1_rein', 4e-5)
    lambda2_rein = state.get('lambda2_rein', 1e-5)
    base_reg = 2e-5

    #size of the address in the memory:
    address_size = state["address_size"]
    w2v_embed_scale = 0.05
    n_out = 10

    rng = np.random.RandomState(seed)
    trng = RandomStreams(seed)
    NRect = lambda x, use_noise=False: NRect(
        x, rng=trng, use_noise=use_noise, std=std)
    use_noise = False

    use_quad_interactions = state.get('use_quad_interactions', True)

    mode = state.get('theano_function_mode', None)
    import sys
    sys.setrecursionlimit(50000)

    learning_rule = Adam(gradient_clipping=state.get('gradient_clip', 10))

    cont_act = Tanh
    mem_gater_activ = Sigmoid
    erase_activ = Sigmoid
    content_activ = Tanh
    use_gru_inp = state.get('use_gru_inp', False)
    use_bow_inp = state.get('use_bow_inp', False)

    w2v_embed_path = None
    use_reinforce_baseline = state['use_reinforce_baseline']

    use_reinforce = state.get('use_reinforce', False)
    l1_pen = state.get('l1_pen', 1e-4)
    l2_pen = state.get('l2_pen', 1e-3)
    hybrid_att = state.get('hybrid_att', False)
    use_dice_val = state.get('use_dice_val', False)
    debug = state.get('debug', False)
    correlation_ws = state.get('correlation_ws', False)

    idxs = np.arange(max_len)
    np.random.shuffle(idxs)
    use_batch_norm = state.get("use_batch_norm", False)
    anticorr = state.get('anticorr', None)
    prfx = (
        "ntm_on_fb_copy_task_all_learn_h0_l1_no_n_hids_%(n_hids)s_bsize_%(batch_size)d"
        "_std_%(std)f_mem_nel_%(mem_nel)d_mem_size_%(mem_size)f_lr_%(lr)f_use_bn_%(use_batch_norm)d_hard2"
    ) % locals()

    tdata_gen = get_stream(which_set="train", batch_size=batch_size)

    vdata_gen = get_stream(which_set="valid", batch_size=batch_size)

    tst_data_gen = get_stream(which_set="test", batch_size=batch_size)

    n_layers = state.get('n_layers', 1)
    inps = get_inps(vgen=vdata_gen, debug=debug, output_map=True)

    wi = WeightInitializer(sparsity=-1,
                           scale=std,
                           rng=rng,
                           init_method=InitMethods.Adaptive,
                           center=0.0)

    bi = BiasInitializer(sparsity=-1,
                         scale=1e-3,
                         rng=rng,
                         init_method=BiasInitMethods.Random,
                         center=0.0)

    ntm = NTMModel(n_in=inp_size,
                   n_hids=n_hids,
                   bow_size=bow_size,
                   n_out=n_out,
                   predict_bow_out=bowout,
                   mem_size=mem_size,
                   mem_nel=mem_nel,
                   use_ff_controller=use_ff_controller,
                   sub_mb_size=sub_mb_size,
                   deep_out_size=deep_out_size,
                   inps=inps,
                   n_layers=n_layers,
                   hybrid_att=hybrid_att,
                   smoothed_diff_weights=smoothed_diff_weights,
                   baseline_reg=base_reg,
                   w2v_embed_path=w2v_embed_path,
                   renormalization_scale=renormalization_scale,
                   use_batch_norm=use_batch_norm,
                   w2v_embed_scale=w2v_embed_scale,
                   n_read_heads=n_read_heads,
                   n_write_heads=n_write_heads,
                   use_last_hidden_state=True,
                   use_loc_based_addressing=use_loc_based_addressing,
                   use_simple_rnn_inp_rep=False,
                   use_gru_inp_rep=use_gru_inp,
                   use_bow_input=use_bow_inp,
                   use_inp_content=False,
                   anticorr=anticorr,
                   erase_activ=erase_activ,
                   use_gate_quad_interactions=use_quad_interactions,
                   content_activ=content_activ,
                   use_multiscale_shifts=True,
                   correlation_ws=correlation_ws,
                   learning_rule=learning_rule,
                   lambda1_rein=lambda1_rein,
                   lambda2_rein=lambda2_rein,
                   n_reading_steps=n_reading_steps,
                   use_deepout=False,
                   use_reinforce=use_reinforce,
                   use_nogru_mem2q=use_nogru_mem2q,
                   use_reinforce_baseline=use_reinforce_baseline,
                   controller_activ=cont_act,
                   use_adv_indexing=False,
                   use_out_mem=False,
                   unroll_recurrence=False,
                   address_size=address_size,
                   reinforce_decay=0.9,
                   learn_h0=learn_h0,
                   theano_function_mode=mode,
                   l1_pen=l1_pen,
                   debug=debug,
                   mem_gater_activ=mem_gater_activ,
                   tie_read_write_gates=False,
                   weight_initializer=wi,
                   bias_initializer=bi,
                   use_cost_mask=False,
                   use_noise=use_noise,
                   rnd_indxs=idxs,
                   permute_order=permute_order,
                   max_fact_len=max_len,
                   softmax=True,
                   batch_size=None)

    save_freq = state.get("save_freq", 1000)

    main_loop = SeqMNISTMainLoop(ntm,
                                 print_every=50,
                                 checkpoint_every=save_freq,
                                 validate_every=500,
                                 train_data_gen=tdata_gen,
                                 valid_data_gen=vdata_gen,
                                 test_data_gen=tst_data_gen,
                                 learning_rate=lr,
                                 reload_model=reload_model,
                                 num_epochs=250,
                                 state=state,
                                 prefix=prfx)
    main_loop.run()
예제 #3
0
def search_model_adam(state, channel, reload_model=False):

    pp.pprint(state)

    def NReLU(x, rng=None, use_noise=False):
        assert rng is not None
        if use_noise:
            stds = Sigmoid(x)
            x = x + rng.normal(x.shape, avg=0.0, std=stds, dtype=x.dtype)
        return Trect(x)

    def get_inps(use_mask=True, vgen=None, debug=False, output_map=None):

        if use_mask:
            X, y, mask, cmask = TT.ftensor3("X"), TT.ftensor3("y"), \
                                    TT.fmatrix("mask"), TT.ftensor3("cost_mask")
            if debug:
                theano.config.compute_test_value = "warn"
                batch = vgen.next()
                X.tag.test_value = batch['x'].astype("float32")
                y.tag.test_value = batch['y'].astype("float32")
                mask.tag.test_value = batch['mask'].astype("float32")
                cmask.tag.test_value = batch['cmask'].astype("float32")

            if output_map:
                outs = {}
                outs["X"] = X
                outs["y"] = y
                outs["mask"] = mask
                outs["cmask"] = cmask
            else:
                outs = [X, y, mask, cmask]

            return outs
        else:
            X, y = TT.tensor3("X"), TT.tensor3("y")

            if debug:
                theano.config.compute_test_value = "warn"
                batch = vgen.next()
                X.tag.test_value = batch['x']
                y.tag.test_value = batch['y']
            return [X, y]

    lr = state['lr']
    batch_size = state['batch_size']

    # No of els in the cols of the content for the memory
    mem_size = state['mem_size']

    # No of rows in M
    mem_nel = state['mem_nel']
    std = state['std']
    renormalization_scale = state['renormalization_scale']
    sub_mb_size = state['sub_mb_size']
    smoothed_diff_weights = state.get('smoothed_diff_weights', True)

    max_len = 10
    inp_size = 10

    # No of hids for controller
    n_hids =  state['n_hids']

    # Not using deep out
    deep_out_size = 100

    # Size of the bow embeddings
    bow_size = state.get('bow_size', 80)

    # ff controller
    use_ff_controller = state['use_ff_controller']

    # For RNN controller:
    learn_h0 = state.get('learn_h0', False)
    use_nogru_mem2q = False

    # Use loc based addressing:
    use_loc_based_addressing = state.get('use_loc_based_addressing', False)
    bowout = state.get('bowout', False)
    use_reinforce = state.get('use_reinforce', False)

    seed = 7

    n_read_heads = state['n_read_heads']
    n_write_heads = 1
    n_reading_steps = state['n_reading_steps']

    lambda1_rein = state.get('lambda1_rein', 4e-5)
    lambda2_rein = state.get('lambda2_rein', 1e-5)
    base_reg = 2e-5

    #size of the address in the memory:
    address_size = state['address_size']
    renormalization_scale = state['renormalization_scale']
    w2v_embed_scale = 0.05

    rng = np.random.RandomState(seed)
    trng = RandomStreams(seed)
    NRect = lambda x, use_noise=False: NRect(x, rng=trng, use_noise=use_noise, std=std)
    use_noise = False

    use_quad_interactions = state.get('use_quad_interactions', True)
    mode = state.get('theano_function_mode', None)

    import sys
    sys.setrecursionlimit(50000)

    learning_rule = Adam(gradient_clipping=state.get('gradient_clip', 10))

    cont_act = Tanh
    mem_gater_activ = Sigmoid
    erase_activ = Sigmoid
    content_activ = Tanh
    use_gru_inp = state.get('use_gru_inp', False)
    use_bow_inp = state.get('use_bow_inp', False)

    w2v_embed_path = None
    use_reinforce_baseline = state['use_reinforce_baseline']
    use_reinforce = state.get('use_reinforce', False)
    l1_pen = state.get('l1_pen', 1e-4)
    l2_pen = state.get('l2_pen', 1e-3)
    hybrid_att = state.get('hybrid_att', False)
    use_dice_val = state.get('use_dice_val', False)
    debug = state.get('debug', False)
    correlation_ws = state.get('correlation_ws', False)

    anticorr = state.get('anticorr', None)

    prfx = ("ntm_copy_learn_h0_l1_no_n_hids_%(n_hids)s_bsize_%(batch_size)d_rs_%(renormalization_scale)f"
            "_std_%(std)f_mem_nel_%(mem_nel)d_mem_size_%(mem_size)d_lr_%(lr)f_%(address_size)d") % locals()

    save_path = state.get("save_path", ".")
    prfx = save_path + prfx

    tdata_gen = CopyDataGen(batch_size,
                            max_len,
                            inp_size,
                            rng=rng,
                            seed=seed,
                            rnd_len=True)

    vdata_gen = CopyDataGen(batch_size,
                            max_len,
                            inp_size,
                            rng=rng,
                            seed=2,
                            rnd_len=False)

    tst_data_gen = CopyDataGen(batch_size,
                               max_len,
                               inp_size,
                               rng=rng,
                               seed=3,
                               rnd_len=False)

    n_layers = state.get('n_layers', 1)
    inps = get_inps(vgen=vdata_gen, debug=debug, output_map=True)

    wi = WeightInitializer(sparsity=-1,
                           scale=std,
                           rng=rng,
                           init_method=InitMethods.Adaptive,
                           center=0.0)

    bi = BiasInitializer(sparsity=-1,
                         scale=std,
                         rng=rng,
                         init_method=BiasInitMethods.Constant,
                         center=0.0)

    ntm = NTMModel(n_in=inp_size,
                   n_hids=n_hids,
                   bow_size=bow_size,
                   n_out=inp_size,
                   predict_bow_out=bowout,
                   mem_size=mem_size,
                   mem_nel=mem_nel,
                   use_ff_controller=use_ff_controller,
                   sub_mb_size=sub_mb_size,
                   deep_out_size=deep_out_size,
                   inps=inps,
                   n_layers=n_layers,
                   hybrid_att=hybrid_att,
                   smoothed_diff_weights=smoothed_diff_weights,
                   baseline_reg=base_reg,
                   w2v_embed_path=w2v_embed_path,
                   renormalization_scale=renormalization_scale,
                   w2v_embed_scale=w2v_embed_scale,
                   n_read_heads=n_read_heads,
                   n_write_heads=n_write_heads,
                   use_last_hidden_state=False,
                   use_loc_based_addressing=use_loc_based_addressing,
                   use_simple_rnn_inp_rep=False,
                   use_gru_inp_rep=use_gru_inp,
                   use_bow_input=use_bow_inp,
                   anticorr=anticorr,
                   erase_activ=erase_activ,
                   use_gate_quad_interactions=use_quad_interactions,
                   content_activ=content_activ,
                   use_multiscale_shifts=True,
                   correlation_ws=correlation_ws,
                   learning_rule=learning_rule,
                   lambda1_rein=lambda1_rein,
                   lambda2_rein=lambda2_rein,
                   n_reading_steps=n_reading_steps,
                   use_deepout=False,
                   use_reinforce=use_reinforce,
                   use_nogru_mem2q=use_nogru_mem2q,
                   use_reinforce_baseline=use_reinforce_baseline,
                   controller_activ=cont_act,
                   use_adv_indexing=False,
                   use_out_mem=False,
                   unroll_recurrence=False,
                   address_size=address_size,
                   reinforce_decay=0.9,
                   learn_h0=learn_h0,
                   theano_function_mode=mode,
                   l1_pen=l1_pen,
                   debug=debug,
                   mem_gater_activ=mem_gater_activ,
                   tie_read_write_gates=False,
                   weight_initializer=wi,
                   bias_initializer=bi,
                   use_cost_mask=True,
                   use_noise=use_noise,
                   max_fact_len=max_len,
                   softmax=False,
                   batch_size=batch_size)

    save_freq = state.get("save_freq", 1000)

    main_loop = NTMToyMainLoop(ntm,
                               print_every=50,
                               checkpoint_every=save_freq,
                               validate_every=500,
                               train_data_gen=tdata_gen,
                               valid_data_gen=vdata_gen,
                               test_data_gen=tst_data_gen,
                               learning_rate=lr,
                               reload_model=reload_model,
                               valid_iters=200,
                               max_iters=state['max_iters'],
                               state=state,
                               prefix=prfx)
    main_loop.run()
예제 #4
0
def search_model_adam_gru_soft(state, channel):
    def NReLU(x, rng=None, use_noise=False):
        assert rng is not None
        if use_noise:
            stds = Sigmoid(x)
            x = x + rng.normal(x.shape, avg=0.0, std=stds, dtype=x.dtype)
        return Trect(x)

    def NRect(x, rng=None, use_noise=False, std=0.05):
        assert rng is not None
        if use_noise:
            x = x + rng.normal(x.shape, avg=0.0, std=std, dtype=x.dtype)
        return Trect(x)

    def get_inps(use_mask=True, vgen=None, debug=False):
        if use_mask:
            X, y, mask, cmask = TT.itensor3("X"), TT.imatrix("y"), TT.fmatrix(
                "mask"), TT.fmatrix("cost_mask")
            if debug:
                theano.config.compute_test_value = "warn"
                batch = vgen.next()
                X.tag.test_value = batch['x'].astype("int32")
                y.tag.test_value = batch['y'].astype("int32")
                mask.tag.test_value = batch['mask'].astype("float32")
                cmask.tag.test_value = batch['cmask'].astype("float32")
            return [X, y, mask, cmask]
        else:
            X, y = TT.itensor3("X"), TT.itensor3("y")
            if debug:
                theano.config.compute_test_value = "warn"
                batch = vgen.next()
                X.tag.test_value = batch['x']
                y.tag.test_value = batch['y']
            return [X, y]

    lr = state.lr
    batch_size = state.batch_size
    seed = state.get("seed", 3)

    # No of els in the cols of the content for the memory
    mem_size = state.mem_size

    # No of rows in M
    mem_nel = state.mem_nel
    std = state.std
    renormalization_scale = state.renormalization_scale
    sub_mb_size = state.sub_mb_size

    # No of hids for controller
    n_hids = state.n_hids

    # Not using deep out
    deep_out_size = 100

    # Size of the bow embeddings
    bow_size = state.n_hids

    # ff controller
    use_ff_controller = True

    # For RNN controller:
    learn_h0 = True
    use_nogru_mem2q = False

    # Use loc based addressing:
    use_loc_based_addressing = False

    seed = 7

    max_seq_len = 100
    max_fact_len = 12

    n_read_heads = 1
    n_write_heads = 1
    n_reading_steps = 1

    lambda1_rein = 4e-5
    lambda2_rein = 1e-5
    base_reg = 3e-5

    #size of the address in the memory:
    address_size = 20
    w2v_embed_scale = 0.05

    rng = np.random.RandomState(seed)
    trng = RandomStreams(seed)
    NRect = lambda x, use_noise=False: NRect(
        x, rng=trng, use_noise=use_noise, std=std)
    use_noise = False

    use_quad_interactions = True
    mode = None
    import sys
    sys.setrecursionlimit(50000)

    learning_rule = Adam(gradient_clipping=10)
    task_id = state.task_id

    cont_act = Tanh
    mem_gater_activ = Sigmoid
    erase_activ = Sigmoid
    content_activ = Tanh
    w2v_embed_path = None

    use_reinforce_baseline = False

    l1_pen = 7e-4
    l2_pen = 9e-4
    debug = False

    path = "/data/lisatmp3/gulcehrc/data/tasks_1-20_v1-2/en-10k/splitted_trainval/"
    prfx = (
        "ntm_on_fb_BABI_task_all__learn_h0_l1_no_n_hids_%(n_hids)s_bsize_%(batch_size)d"
        "_std_%(std)f_mem_nel_%(mem_nel)d_mem_size_%(mem_size)f_lr_%(lr)f"
    ) % locals()

    tdata_gen = FBbABIDataIteratorSingleQ(
        task_file='all_tasks_train_ngram_False.pkl',
        randomize=True,
        max_seq_len=max_seq_len,
        max_fact_len=max_fact_len,
        task_id=task_id,
        task_path=path,
        mode='train',
        fact_vocab="all_tasks_train_ngram_False_dict.pkl",
        batch_size=batch_size)

    vdata_gen = FBbABIDataIteratorSingleQ(
        task_file='all_tasks_valid_ngram_False.pkl',
        max_fact_len=tdata_gen.max_fact_len,
        max_seq_len=max_seq_len,
        randomize=False,
        task_id=task_id,
        mode="valid",
        task_path=path,
        fact_vocab="all_tasks_train_ngram_False_dict.pkl",
        batch_size=batch_size)

    inps = get_inps(vgen=vdata_gen, debug=debug)

    wi = WeightInitializer(sparsity=-1,
                           scale=std,
                           rng=rng,
                           init_method=InitMethods.Adaptive,
                           center=0.0)

    bi = BiasInitializer(sparsity=-1,
                         scale=std,
                         rng=rng,
                         init_method=BiasInitMethods.Constant,
                         center=0.0)

    print "Length of the vocabulary, ", len(tdata_gen.vocab.items())

    ntm = NTMModel(n_in=len(tdata_gen.vocab.items()),
                   n_hids=n_hids,
                   bow_size=bow_size,
                   n_out=len(tdata_gen.vocab.items()),
                   mem_size=mem_size,
                   mem_nel=mem_nel,
                   use_ff_controller=use_ff_controller,
                   sub_mb_size=sub_mb_size,
                   deep_out_size=deep_out_size,
                   inps=inps,
                   baseline_reg=base_reg,
                   w2v_embed_path=w2v_embed_path,
                   renormalization_scale=renormalization_scale,
                   w2v_embed_scale=w2v_embed_scale,
                   n_read_heads=n_read_heads,
                   n_write_heads=n_write_heads,
                   use_last_hidden_state=False,
                   use_loc_based_addressing=use_loc_based_addressing,
                   use_gru_inp_rep=False,
                   use_bow_input=True,
                   erase_activ=erase_activ,
                   use_gate_quad_interactions=use_quad_interactions,
                   content_activ=content_activ,
                   use_multiscale_shifts=True,
                   learning_rule=learning_rule,
                   lambda1_rein=lambda1_rein,
                   lambda2_rein=lambda2_rein,
                   n_reading_steps=n_reading_steps,
                   use_deepout=False,
                   use_reinforce=False,
                   use_nogru_mem2q=use_nogru_mem2q,
                   use_reinforce_baseline=use_reinforce_baseline,
                   controller_activ=cont_act,
                   use_adv_indexing=False,
                   use_out_mem=False,
                   unroll_recurrence=False,
                   address_size=address_size,
                   reinforce_decay=0.9,
                   learn_h0=learn_h0,
                   theano_function_mode=mode,
                   l1_pen=l1_pen,
                   mem_gater_activ=mem_gater_activ,
                   tie_read_write_gates=False,
                   weight_initializer=wi,
                   bias_initializer=bi,
                   use_cost_mask=True,
                   use_noise=use_noise,
                   max_fact_len=max_fact_len,
                   softmax=True,
                   batch_size=batch_size)

    main_loop = FBaBIMainLoop(ntm,
                              print_every=40,
                              checkpoint_every=400,
                              validate_every=100,
                              train_data_gen=tdata_gen,
                              valid_data_gen=vdata_gen,
                              learning_rate=lr,
                              reload_model=False,
                              valid_iters=None,
                              linear_start=False,
                              max_iters=state.max_iters,
                              prefix=prfx)
    main_loop.run()

    return channel.COMPLETE