Beispiel #1
0
def nalu(mode=NALU):
    x = Input((100,))
    y = NALU(2, mode=mode, 
             MW_initializer=RandomNormal(stddev=1),
             G_initializer=Constant(10))(x)
    y = NALU(1, mode=mode, 
             MW_initializer=RandomNormal(stddev=1),
             G_initializer=Constant(10))(y)
    return Model(x, y)
Beispiel #2
0
def reconstruction_experiment():

    op = 'div'
    train_dataset, valid_dataset_inter, valid_dataset_extra, test_dataset_inter, test_dataset_extra = make_dataset(
        op)  #torch.Dataset class
    train_dataloader = DataLoader(train_dataset, batch_size=100)

    IN_DIM = 2
    if op in ['ide', 'pow', 'sqrt']:
        IN_DIM = 1
    NUM_LAYERS = 2
    HIDDEN_DIM = 2
    models = [

        #MLP(num_layers=1,in_dim=IN_DIM,hidden_dim=1,out_dim=1,activation='none',),
        # NAC(num_layers=1,in_dim=IN_DIM,hidden_dim=HIDDEN_DIM,out_dim=1,),
        NALU(num_layers=1, in_dim=IN_DIM, hidden_dim=HIDDEN_DIM, out_dim=1),

        # MLP(num_layers=2,in_dim=IN_DIM,hidden_dim=HIDDEN_DIM,out_dim=1,activation='relu6',),
        # MLP(num_layers=2,in_dim=IN_DIM,hidden_dim=HIDDEN_DIM,out_dim=1,activation='none',),
        # NAC(num_layers=2,in_dim=IN_DIM,hidden_dim=HIDDEN_DIM,out_dim=1,),
        #NALU(num_layers=2,in_dim=IN_DIM,hidden_dim=HIDDEN_DIM,out_dim=1),
        # Sender_Receiver(rnn='rnn').cuda(),
        # Sender_Receiver(rnn='gru').cuda(),
        # Sender_Receiver(rnn='nac').cuda(),
        # Sender_Receiver(rnn='nalu').cuda() ,
        # Gumbel_Agent().cuda(),
        #Multi_Gumbel_Agent(in_dim=IN_DIM).cuda()
        #Mod_Agent(in_dim=IN_DIM, order=20).cuda()
        #Numeral_Machine().cuda()
    ]
    for m in models:
        m.cuda()
    criterion = nn.MSELoss()

    optimizer = optim.RMSprop
    #optimizer = optim.Adam

    for model in models:
        #print("model rnn name: ", model.rnn)

        train_model(model,
                    train_dataloader,
                    valid_dataset_inter,
                    valid_dataset_extra,
                    criterion,
                    optimizer,
                    patience=500)
        loss_inter = eval_model(model, test_dataset_inter, criterion)
        loss_extra = eval_model(model, test_dataset_extra, criterion)
        #print("test loss", loss.item())
        f = open("results.txt", 'a')
        f.write("\n{:.3f}, {:.3f}".format(loss_inter.item(),
                                          loss_extra.item()))
        f.close()
        print("evaluation loss: {:.3f}, {:.3f}".format(loss_inter.item(),
                                                       loss_extra.item()))
 def get_head(self, hidden_dims, num_classes, head_type):
     dims = hidden_dims + [num_classes]
     head = None
     if head_type == 'NALU':
         head = NALU(*dims)
     elif head_type == 'NAC':
         head = NAC(*dims)
     else:
         raise ValueError(f'{head_type} not supported')
     return head
    def __init__(self, hidden_size=32, vocab_size=10):
        super(BaselineReceiver, self).__init__()

        self.input_size = vocab_size + 3

        self.hidden_size = hidden_size

        self.lstm = nn.LSTM(self.input_size,
                            self.hidden_size,
                            batch_first=True)
        self.final = NALU(1, self.hidden_size, 1, 1)
Beispiel #5
0
    def __init__(self,
                 input_dim,
                 output_dim,
                 hidden_dim=[2],
                 model_type='nalu',
                 hyper={}):
        self.x = tf.placeholder(tf.float32, [None, input_dim], name='input')
        self.y = tf.placeholder(tf.float32, [None, output_dim], name='ouput')

        if model_type == 'nalu':
            _model = NALU(input_dim, output_dim, hidden_dim)
        elif model_type == 'nac':
            _model = NAC(input_dim, output_dim)
        else:
            _model = MLP(input_dim,
                         output_dim,
                         hidden_dim,
                         act_func=model_type)

        self.model = _model

        self.global_step = tf.Variable(0, trainable=False)
        optim = hyper.get('optim', 'rms')
        decay = hyper.get('decay', None)
        start_lr = hyper.get('lr', 1e-2)

        if decay is not None:
            self.lr = tf.train.exponential_decay(start_lr,
                                                 self.global_step,
                                                 decay_steps=1000,
                                                 decay_rate=decay,
                                                 staircase=True)
        else:
            self.lr = start_lr

        if optim.lower() == 'adam':
            self.optim = tf.train.AdamOptimizer(self.lr)
        elif optim.lower() == 'gd':
            self.optim = tf.train.GradientDescentOptimizer(self.lr)
        elif optim.lower() == 'rms':
            self.optim = tf.train.RMSPropOptimizer(self.lr)
        else:
            raise NotImplementedError("Learning algorithm not recognised")

        self.y_hat = self.model(self.x)
        self.error = tf.reduce_mean(tf.abs(self.y_hat - self.x),
                                    name='mean_abs_error')
        self.square = tf.square(tf.subtract(self.y_hat, self.y),
                                name='square_diffs')
        self.loss = tf.reduce_mean(self.square, name='loss')
        self.optimise = self.optim.minimize(self.loss)
Beispiel #6
0
    def _build_model(self):

        input = Input(shape=(self.input_memory_size, self.state_size))

        nalu = NALU(max(int(self.state_size * 10), 10), mode='NALU')(input)
        nalu = NALU(max(int(self.state_size * 5), 10), mode='NALU')(nalu)
        nalu = NALU(max(int(self.state_size * 3), 10), mode='NALU')(nalu)
        nalu = GRU(max(int(self.state_size * 5), 10),
                   activation='tanh',
                   return_sequences=False)(nalu)

        rnn = Dense(max(int(self.state_size * 10), 10),
                    activation='tanh')(input)
        rnn = Dense(max(int(self.state_size * 5), 10), activation='tanh')(rnn)
        rnn = Dense(max(int(self.state_size * 3), 10), activation='tanh')(rnn)
        rnn = GRU(max(int(self.state_size * 5), 10),
                  activation='tanh',
                  return_sequences=False)(rnn)

        c = Concatenate()([nalu, rnn])

        densenet = Dense(max(int(self.state_size * 10), 10),
                         activation='tanh')(c)
        # densenet = Dense(max(int(self.state_size * 5), 10), activation='tanh')(densenet)
        # densenet = Dropout(rate=0.05)(densenet)
        densenet = Dense(max(int(self.state_size * 5), 10),
                         activation='tanh')(densenet)
        densenet = Dense(max(int(self.state_size * 1), 10),
                         activation='tanh')(densenet)

        # output = NoisyDense(self.action_size, activation='linear', sigma_init=0.02, name='fuzzyout')(densenet)
        output = Dense(self.action_size, activation='linear')(densenet)
        brain = Model(inputs=input, outputs=output)
        self.compile_model(brain, lera)
        print(brain.summary())
        return brain
    def __init__(self, hidden_dim, vocab_size, max_len):
        super(NumToLang_Extra, self).__init__()

        self.vocab_size = vocab_size
        self.hidden_dim = 1
        self.gen_dim = 100
        self.embedding_dim = vocab_size
        self.max_len = max_len

        self.ref_size = 1000
        self.ref1 = torch.Tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 10, 100])
        self.nalu1 = NALU(1, len(self.ref1), 1, self.ref_size)
        self.nalu2 = NAC(1, self.hidden_dim + self.ref_size, 1, 1)
        self.gen = nn.Sequential(nn.Linear(1, self.vocab_size),
                                 nn.LogSoftmax(dim=1))
def testAll():
    results_dir = "results/"
    filename = "static_arithmetic_test.txt"

    for _op, op_func in operations.items():
        x, y, x_test, y_test = create_data(50000, 100, 0, 1000, 1000, 10000,
                                           op_func)
        print("In operation {}".format(_op))
        print("NAC")
        model = NAC(100, 2, 1)
        nac_err = model.train(x, y, x_test, y_test)
        tf.reset_default_graph()

        counter = 0
        nalu_err = np.nan
        while (np.isnan(nalu_err) and counter < 10):
            # NALU can often become NaN
            counter += 1
            print("NALU")
            model = NALU(100, 2, 1)
            nalu_err = model.train(x, y, x_test, y_test)
            tf.reset_default_graph()
        print("MLP")
        model = MLP(100, 2, 1)
        random_err, _ = model.validate(x_test, y_test)
        mlp_err = model.train(x, y, x_test, y_test)
        tf.reset_default_graph()

        max_score = np.nanmax([nac_err, nalu_err, random_err, mlp_err])

        with open(results_dir + filename, "a") as f:
            f.write("\n{}\n".format(_op))
            f.write("NAC err: {} | {}\n".format(nac_err, nac_err / max_score))
            f.write("NALU err: {} | {}\n".format(nalu_err,
                                                 nalu_err / max_score))
            f.write("MLP err: {} | {}\n".format(mlp_err, mlp_err / max_score))
            f.write("Random err: {} | {}\n".format(random_err,
                                                   random_err / max_score))
Beispiel #9
0
from experiments.data_utils import generate_static_dataset

if not os.path.exists('weights'):
    os.makedirs('weights')

# hyper parameters
units = 2
num_samples = 1000

# task
task_name = 'square'
task_fn = lambda x, y: x * x

# generate the model
ip = Input(shape=(100, ))
x = NALU(units)(ip)
x = NALU(1)(x)

model = Model(ip, x)
model.summary()

optimizer = RMSprop(0.1)
model.compile(optimizer, 'mse')

# Generate the datasets
X_train, y_train = generate_static_dataset(task_fn,
                                           num_samples,
                                           mode='interpolation')
X_test, y_test = generate_static_dataset(task_fn,
                                         num_samples,
                                         mode='extrapolation')
Beispiel #10
0
 def __init__(self, n_in=2, n_out=1, return_prediction=False):
     super(NaluLayer, self).__init__()
     with self.init_scope():
         self.nalu = NALU(n_in, n_out)
         self.return_prediction = return_prediction
    def __init__(self, args):
        super(Legacy_Sender_Receiver, self).__init__()

        #hyperparameters
        self.sender_hidden_size = args.hidden
        self.receiver_hidden_size = args.hidden
        self.vocab_size = args.vocab + 1
        self.max_seq_len = args.seq
        self.hard = True
        self.tau = args.tau
        self.rnn = 'rnn' if args.rnn == None else args.rnn
        self.reverse = args.reverse

        if args.input == 'continuous' or args.input == None:
            self.input_size = 1
        elif args.input == 'discrete':
            self.input_size = args.max
        elif args.input == 'combined':
            self.input_size = args.max + 1
        self.input_type = args.input

        #layers
        self.linear1 = nn.Linear(self.input_size, self.sender_hidden_size)
        self.linear2 = nn.Linear(self.sender_hidden_size, self.vocab_size)

        #alternative design option: task-specific layers for multitask leraning
        # 1) (default) share the rnn receiver and have separate FF task specific layers
        # 2) have separate rnn receivers for each task

        if self.rnn == "gru":
            self.rnncell1 = nn.GRUCell(self.vocab_size,
                                       self.sender_hidden_size)
            self.rnncell2 = nn.GRUCell(self.vocab_size,
                                       self.receiver_hidden_size)
        elif self.rnn == "rnn":
            self.rnncell1 = nn.RNNCell(self.vocab_size,
                                       self.sender_hidden_size,
                                       nonlinearity='relu')
            self.rnncell2 = nn.RNNCell(self.vocab_size,
                                       self.receiver_hidden_size,
                                       nonlinearity='relu')
        elif self.rnn == "nalu":
            self.rnncell1 = NALU(1, self.vocab_size + self.sender_hidden_size,
                                 0, self.sender_hidden_size)
            self.rnncell2 = NALU(1,
                                 self.vocab_size + self.receiver_hidden_size,
                                 0, self.receiver_hidden_size)
        elif self.rnn == 'nac':
            self.rnncell1 = NAC(1, self.vocab_size + self.sender_hidden_size,
                                0, self.sender_hidden_size)
            self.rnncell2 = NAC(1, self.vocab_size + self.receiver_hidden_size,
                                0, self.receiver_hidden_size)
        else:
            print("error")
        #one hidden layer for processing output
        self.linear5 = nn.Linear(self.receiver_hidden_size,
                                 self.receiver_hidden_size)

        #layers for regression
        self.linear3 = nn.Linear(self.receiver_hidden_size, 1)

        #layers for classification
        self.linear4 = nn.Linear(self.receiver_hidden_size, args.max)
Beispiel #12
0
    for op in operators:
        x_train, y_train, x_test_i, y_test_i, x_test_e, y_test_e, random_rmse_i, random_rmse_e = \
            load_data(op, scale, train_size, test_size)
        for net_type in networks:
            mx.random.seed(0)

            net = nn.Sequential()
            with net.name_scope():
                if net_type in ['ReLU', 'Sigmoid']:
                    net.add(nn.Dense(in_units=IN_DIM, units=HIDDEN_DIM))
                    net.add(activations[net_type])
                    net.add(nn.Dense(units=1))
                    net.collect_params().initialize(mx.init.Uniform(1),
                                                    ctx=ctx)
                elif net_type == 'NALU':
                    net.add(NALU(in_units=IN_DIM, units=HIDDEN_DIM))
                    net.add(NALU(in_units=HIDDEN_DIM, units=1))
                    net.collect_params().initialize(mx.init.Uniform(
                        args.init_scale),
                                                    ctx=ctx)
                elif net_type == 'NAC':
                    net.add(NAC(in_units=IN_DIM, units=HIDDEN_DIM))
                    net.add(NAC(in_units=HIDDEN_DIM, units=1))
                    net.collect_params().initialize(mx.init.Uniform(
                        args.init_scale),
                                                    ctx=ctx)
                else:
                    raise ValueError("Invalid Network: " + net_type)

            logging.info("Learn %s with %s" % (op, net_type))
            i_rmse, e_rmse = train_static(op, net_type, net, x_train, y_train,