Ejemplo n.º 1
0
def main():
    train, test = chainer.datasets.get_mnist()
    def forward(x, t, model):
        y, l = model(x)
        if model.c:
            y, l = Lmt(t)(y, l)
        t = np.eye(10)[t].astype(np.float32)
        loss = mse(y, t)
        return loss

    model = MLP(c=0.05)
    optimizer = Opt()
    optimizer.setup(model)

    for epoch in range(5):
        for batch in SerialIterator(train, 60, repeat=False):
            x, t = format(batch)
            optimizer.update(forward, x, t, model)
        tx, tt = format(test)
        print("epoch {}: accuracy: {:.3f}".format(epoch + 1, model.accuracy(tx, tt)))

    fgsm = FGSM(model)
    for eta in [0.01, 0.02, 0.05, 0.1]:
        cnt = 0
        fail = 0
        for i in np.random.randint(0, 10000, 100):
            res = fgsm.attack(test[i][0], test[i][1], eta=eta)
            if res != -1:
                cnt += 1
                if not res: fail += 1
        print("c: {:.3f}, eta: {:.3f}, attacked: {:.3f}".format(model.c, eta, fail / cnt))
Ejemplo n.º 2
0
class Train:
    def __init__(self):
        with open("data.pickle", "rb") as f:
            self.data = pickle.load(f)
        self.model = Model()
        self.model.to_gpu()
        self.optimizer = Adam()
        self.optimizer.setup(self.model)

        self.executor = ThreadPoolExecutor(8)

        self.hoge = self.data.next(2, 2)

    def load(self):
        d = self.hoge
        self.hoge = self.executor.submit(self.data.next, 2, 2)
        return d

    def training(self):
        for i in range(1000000000000000):
            a = self.batch()
            if i % 100 == 0:
                print(f"{i} loss:{a}")

    def batch(self):
        a, b = self.load()
        self.model.cleargrads()
        y = tuple(self.executor.map(self.model, a + b))
        loss = F.contrastive(y[0], y[1], [1]) +\
               F.contrastive(y[2], y[3], [1]) +\
               F.contrastive(y[0], y[2], [0]) +\
               F.contrastive(y[1], y[3], [0])
        loss.backward()
        self.optimizer.update()
        return loss.data.get()
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model", default=None)
    parser.add_argument("--gpu", type=int, default=0)
    parser.add_argument("--batch_size", type=int, default=4)
    parser.add_argument("--data_dir", type=str, default="./datasets")
    parser.add_argument("--data_list", type=str, default="train.txt")
    parser.add_argument("--n_class", type=int, default=5)
    parser.add_argument("--n_steps", type=int, default=100)
    parser.add_argument("--snapshot_dir", type=str, default="./snapshots")
    parser.add_argument("--save_steps", type=int, default=50)
    args = parser.parse_args()
    print(args)

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    model = RefineResNet(n_class=args.n_class)
    if args.model is not None:
        serializers.load_npz(args.model, model)

    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()
        xp = cuda.cupy
    else:
        xp = np

    optimizer = Adam()
    #optimizer = MomentumSGD()
    optimizer.setup(model)
    optimizer.add_hook(WeightDecay(1e-5), "hook_wd")

    train_dataset = ImageDataset(args.data_dir,
                                 args.data_list,
                                 crop_size=(320, 320))
    train_iterator = MultiprocessIterator(train_dataset,
                                          batch_size=args.batch_size,
                                          repeat=True,
                                          shuffle=True)

    step = 0
    for zipped_batch in train_iterator:
        step += 1
        x = Variable(xp.array([zipped[0] for zipped in zipped_batch]))
        y = Variable(
            xp.array([zipped[1] for zipped in zipped_batch], dtype=xp.int32))
        pred = xp.array(model(x).data, dtype=xp.float32)
        loss = F.softmax_cross_entropy(pred, y)
        optimizer.update(F.softmax_cross_entropy, pred, y)

        print("Step: {}, Loss: {}".format(step, loss.data))
        if step % args.save_steps == 0:
            serializers.save_npz(
                os.path.join(args.snapshot_dir, "model_{}.npz".format(step)),
                model)

        if step >= args.n_steps:
            break
Ejemplo n.º 4
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of examples in each mini-batch')
    parser.add_argument('--bproplen', '-l', type=int, default=200,
                        help='Number of words in each mini-batch '
                             '(= length of truncated BPTT)')
    parser.add_argument('--epoch', '-e', type=int, default=40,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=0,
                        help='GPU ID (negative value indicates CPU)')

    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')

    parser.add_argument('--file', default="enwik8",
                        help='path to text file for training')
    parser.add_argument('--unit', '-u', type=int, default=2800,
                        help='Number of LSTM units')
    parser.add_argument('--embd', type=int, default=400,
                        help='Number of embedding units')
    parser.add_argument('--hdrop', type=float, default=0.2,
                        help='hidden state dropout (variational)')
    parser.add_argument('--edrop', type=float, default=0.5,
                        help='embedding dropout')

    args = parser.parse_args()

    nembd = args.embd
    #number of training iterations per model save, log write, and validation set evaluation
    interval =100

    pdrop = args.hdrop

    pdrope = args.edrop

    #initial learning rate
    alpha0 = .001
    #inverse of linear decay rate towards 0
    dec_it = 12*9000
    #minimum learning rate
    alpha_min = .00007

    #first ntrain words of dataset will be used for training
    ntrain = 90000000


    seqlen = args.bproplen
    nbatch = args.batchsize

    filename= args.file

    text,mapping = get_char(filename)
    sequence = np.array(text).astype(np.int32)

    itrain =sequence[0:ntrain]
    ttrain = sequence[1:ntrain+1]
    fullseql=int(ntrain/nbatch)

    itrain = itrain.reshape(nbatch,fullseql)
    ttrain = ttrain.reshape(nbatch,fullseql)

    #doesn't use full validations set
    nval = 500000
    ival = sequence[ntrain:ntrain+nval]
    tval = sequence[ntrain+1:ntrain+nval+1]

    ival = ival.reshape(ival.shape[0]//1000,1000)
    tval = tval.reshape(tval.shape[0]//1000,1000)
    #test = sequence[ntrain+nval:ntrain+nval+ntest]


    nvocab = max(sequence) + 1  # train is just an array of integers
    print('#vocab =', nvocab)

    # Prepare an RNNLM model
    rnn = RNNForLM(nvocab, args.unit,args.embd)
    model = L.Classifier(rnn)
    model.compute_accuracy = False  # we only want the perplexity
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # make the GPU current
        model.to_gpu()

    # Set up an optimizer
    optimizer = Adam(alpha=alpha0)
    optimizer.setup(model)
    resultdir = args.out

    print('starting')
    nepoch = args.epoch

    start = 0
    loss_sum = 0;

    if not os.path.isdir(resultdir):
        os.mkdir(resultdir)

    vloss = test(rnn,ival,tval)
    vloss= (1.4427*vloss)
    f = open(os.path.join(resultdir,'log'), 'w')
    outstring = "Initial Validation loss (bits/word): " + str(vloss) + '\n'
    f.write(outstring)
    f.close()

    i=0
    epoch_num = 0
    it_num = 0

    while True:
        # Get the result of the forward pass.
        fin = start+seqlen

        if fin>(itrain.shape[1]):
            start = 0
            fin = start+seqlen
            epoch_num = epoch_num+1
            if epoch_num== nepoch:
                break

        inputs = itrain[:,start:fin]
        targets = ttrain[:,start:fin]
        start = fin

        inputs = Variable(inputs)
        targets = Variable(targets)

        targets.to_gpu()
        inputs.to_gpu()
        it_num+=1
        loss = 0
        rnn.applyWN()

        #make hidden dropout mask
        mask = cp.zeros((inputs.shape[0],args.unit),dtype = cp.float32)
        ind = cp.nonzero(cp.random.rand(inputs.shape[0],args.unit)>pdrop)
        mask[ind] = 1/(1-pdrop)

        #make embedding dropout mask
        mask2 = cp.zeros((inputs.shape[0],nembd),dtype = cp.float32)
        ind = cp.nonzero(cp.random.rand(inputs.shape[0],nembd)>pdrope)
        mask2[ind] = 1/(1-pdrope)

        for j in range(seqlen):

            output = rnn(inputs[:,j],mask,mask2)
            loss = loss+ F.softmax_cross_entropy(output,targets[:,j])

        loss = loss/(seqlen)

        # Zero all gradients before updating them.
        rnn.zerograds()
        loss_sum += loss.data

        # Calculate and update all gradients.
        loss.backward()
        s = 0;

        # Use the optmizer to move all parameters of the network
        # to values which will reduce the loss.
        optimizer.update()
        #decays learning rate linearly
        optimizer.alpha = alpha0*(dec_it-it_num)/float(dec_it)
        #prevents learning rate from going below minumum
        if optimizer.alpha<alpha_min:
            optimizer.alpha = alpha_min

        loss.unchain_backward()

        if ((i+1)%interval) ==0:
            rnn.reset_state()
            vloss = test(rnn,ival,tval)

            #converts to binary entropy
            vloss= (1.4427*vloss)
            loss_sum = (1.4427*loss_sum/interval)

            serializers.save_npz(os.path.join(resultdir,'model'),rnn)

            outstring = "Training iteration: " + str(i+1) + " Training loss (bits/char): " + str(loss_sum) + " Validation loss (bits/word): " + str(vloss) + '\n'
            f = open(os.path.join(resultdir,'log'), 'a')
            f.write(outstring)
            f.close()
            print("Training iteration: " + str(i+1))
            print('training loss: ' + str(loss_sum))
            print('validation loss: ' + str(vloss))
            loss_sum=0

        i+=1
Ejemplo n.º 5
0
for epoch in range(0, epochs):

    print('EPOCH: {}/{}'.format(epoch+1, epochs))
    perm = np.random.permutation(num_data)  # ランダムサンプリング

    # ミニバッチ単位で回す
    for idx in range(0, 1000, batch_size):
        # 入力データと出力データをスライス
        batch_x = Variable(train_x[perm[idx: idx + batch_size]])  # if idx + batch_size < num_data else num_data]])]
        batch_y = Variable(train_y[perm[idx: idx + batch_size]])  # if idx + batch_size < num_data else num_data]])]

        # modelの最適化
        model.cleargrads()
        loss, accuracy = model(batch_x, batch_y)  # ニューラルネットへの入力
        loss.backward()
        optimizer.update()
        now = time.time()
        print('{}/{}, train_loss = {}, accuracy = {}, time = {:.2f}'.format(
             idx, num_data, loss.data, accuracy.data, now-cur_at))
        average_loss.append(loss.data)
        accuracy_list.append(accuracy.data)
        cur_at = now

    # 1エポック終わる毎に適当な入力を与えて確認
    for tmp in perm[1: 10]:
        # print('入力-> {}'.format(''.join(x[tmp])))
        # print('出力-> ', end='')

        test_x = Variable(train_x[tmp])

        for index in model.beam_search_predict(test_x):
Ejemplo n.º 6
0
class QNeuralNetwork(QModel):
    def __init__(self,
                 model,
                 target,
                 device_id=-1,
                 learning_rate=0.00025,
                 momentum=.9,
                 minibatch_size=32,
                 update_interval=10000):

        assert isinstance(model, ChainerModel), \
            'model should inherit from ChainerModel'

        super(QNeuralNetwork, self).__init__(model.input_shape,
                                             model.output_shape)

        self._gpu_device = None
        self._loss_val = 0

        # Target model update method
        self._steps = 0
        self._target_update_interval = update_interval

        # Setup model and target network
        self._minibatch_size = minibatch_size
        self._model = model
        self._target = target
        self._target.copyparams(self._model)

        # If GPU move to GPU memory
        if device_id >= 0:
            with cuda.get_device(device_id) as device:
                self._gpu_device = device
                self._model.to_gpu(device)
                self._target.to_gpu(device)

        # Setup optimizer
        self._optimizer = Adam(learning_rate, momentum, 0.999)
        self._optimizer.setup(self._model)

    def evaluate(self, environment, model=QModel.ACTION_VALUE_NETWORK):
        if check_rank(environment.shape, get_rank(self._input_shape)):
            environment = environment.reshape((1, ) + environment.shape)

        # Move data if necessary
        if self._gpu_device is not None:
            environment = cuda.to_gpu(environment, self._gpu_device)

        if model == QModel.ACTION_VALUE_NETWORK:
            output = self._model(environment)
        else:
            output = self._target(environment)

        return cuda.to_cpu(output.data)

    def train(self, x, y, actions=None):
        actions = actions.astype(np.int32)
        batch_size = len(actions)

        if self._gpu_device:
            x = cuda.to_gpu(x, self._gpu_device)
            y = cuda.to_gpu(y, self._gpu_device)
            actions = cuda.to_gpu(actions, self._gpu_device)

        q = self._model(x)
        q_subset = F.reshape(F.select_item(q, actions), (batch_size, 1))
        y = y.reshape(batch_size, 1)

        loss = F.sum(F.huber_loss(q_subset, y, 1.0))

        self._model.cleargrads()
        loss.backward()
        self._optimizer.update()

        self._loss_val = np.asscalar(cuda.to_cpu(loss.data))

        # Keeps track of the number of train() calls
        self._steps += 1
        if self._steps % self._target_update_interval == 0:
            # copy weights
            self._target.copyparams(self._model)

    @property
    def loss_val(self):
        return self._loss_val  # / self._minibatch_size

    def save(self, output_file):
        save_npz(output_file, self._model)

    def load(self, input_file):
        load_npz(input_file, self._model)

        # Copy parameter from model to target
        self._target.copyparams(self._model)
Ejemplo n.º 7
0
        # Train Generator
        loss_gen = F.softmax_cross_entropy(
            y1, Variable(xp.zeros(batch_size, dtype=np.int32)))
        loss_dis = F.softmax_cross_entropy(
            y1, Variable(xp.ones(batch_size, dtype=np.int32)))

        # Train Discriminator
        batch_x = Variable(train_x[perm[idx:idx + batch_size]])
        y2 = discriminator(batch_x)
        loss_dis += F.softmax_cross_entropy(
            y2, Variable(xp.zeros(batch_size, dtype=np.int32)))

        # Generatorの最適化
        generator.cleargrads()
        loss_gen.backward()
        opt_gen.update()
        # Discriminatorの最適化
        discriminator.cleargrads()
        loss_dis.backward()
        opt_dis.update()

        now = time.time()
        print('{}/{}, Gen_loss = {}, Dis_loss = {}, time = {:.2f}'.format(
            idx, n_train_data, loss_gen.data, loss_dis.data, now - cur_at))
        gen_loss.append(loss_gen.data)
        dis_loss.append(loss_dis.data)
        cur_at = now

    pickle.dump(generator, open('generator_snapshot.model', 'wb'))
    pickle.dump(discriminator, open('discriminator_snapshot.model', 'wb'))
Ejemplo n.º 8
0
def train(source_bpe, target_bpe, source_glove, target_glove, chunk_length,
          batch_size, warmup_steps, save_decimation, num_steps, gpu_id, out,
          log_level):
    if not os.path.exists(out):
        os.makedirs(out)

    ll = getattr(logging, log_level)

    stream_handler = logging.StreamHandler(sys.stdout)
    stream_handler.setLevel(ll)
    stream_handler.setFormatter(logging.Formatter('%(message)s'))

    file_handler = logging.FileHandler(filename=os.path.join(
        out, 'training.log'),
                                       mode='a')
    file_handler.setLevel(ll)
    file_handler.setFormatter(logging.Formatter('%(message)s'))

    logger.addHandler(stream_handler)
    logger.addHandler(file_handler)
    logger.setLevel(ll)

    gpu_id = gpu_id if gpu_id is not None else -1

    device_name = '@intel64'
    if gpu_id >= 0:
        device_name = f'@cupy:{gpu_id}'

    with chainer.using_device(device_name):
        source_vocab = make_vocab(source_glove)
        target_vocab = make_vocab(target_glove)
        output_model_dim = target_vocab.embedding_size
        dataset = make_dataset(source_bpe, target_bpe, source_vocab,
                               target_vocab, chunk_length)
        iterator = MultithreadIterator(dataset, batch_size)
        state = TrainingState()
        model = Transformer(source_vocab, target_vocab)
        model.to_gpu(gpu_id)
        optimizer = Adam(beta1=0.99, beta2=0.98, eps=1e-9).setup(model)

        load_training(out, model, optimizer, state)

        try:
            for n, batch in enumerate(iterator):
                if n >= num_steps:
                    break

                if (n + 1) % save_decimation == 0:
                    save_training(out, model, optimizer, state)

                model.cleargrads()
                gc.collect()

                source, target = stack_nested(batch)

                source.token_ids.to_gpu(gpu_id)
                source.masks.to_gpu(gpu_id)
                target.token_ids.to_gpu(gpu_id)
                target.masks.to_gpu(gpu_id)

                output_probs = model.train_forward(source.token_ids,
                                                   target.token_ids,
                                                   input_masks=source.masks,
                                                   output_masks=target.masks)

                unnormalized_loss = F.softmax_cross_entropy(
                    F.reshape(output_probs,
                              (output_probs.shape[0] * output_probs.shape[1],
                               output_probs.shape[2])),
                    F.reshape(target.token_ids, (target.token_ids.shape[0] *
                                                 target.token_ids.shape[1], )),
                    reduce='no')
                loss_mask = xp.reshape(
                    xp.logical_not(target.masks.array).astype(xp.float32),
                    (target.masks.shape[0] * target.masks.shape[1], ))
                loss = F.sum(unnormalized_loss * loss_mask) / F.sum(loss_mask)
                loss.backward()

                learning_rate = (output_model_dim**-0.5) * min(
                    (state.step**-0.5), state.step * (warmup_steps**-1.5))
                optimizer.alpha = learning_rate
                optimizer.update()

                logger.info(
                    f'time = {int(time.time())} | step = {state.step} | loss = {float(loss.array)} | lr = {learning_rate}'
                )

                state.step += 1
        finally:
            save_training(out, model, optimizer, state)
Ejemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser(description="LapSRN")
    parser.add_argument("--dataset", type=str)
    parser.add_argument("--outdirname", type=str, default="./models")
    parser.add_argument("--scale", type=int, default=4)
    parser.add_argument("--batchsize", type=int, default=64)
    parser.add_argument("--epoch", type=int, default=100)
    parser.add_argument("--steps_per_epoch", type=int, default=128)
    parser.add_argument("--model", default=None)
    parser.add_argument("--gpu", type=int, default=-1)
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# dataset: {}'.format(args.dataset))
    print('# outdirname: {}'.format(args.outdirname))
    print('# scale: {}'.format(args.scale))
    print('# batchsize: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('# steps_per_epoch: {}'.format(args.steps_per_epoch))
    print('# model: {}'.format(args.model))
    print('')

    OUTPUT_DIRECTORY = args.outdirname
    if not os.path.exists(OUTPUT_DIRECTORY):
        os.makedirs(OUTPUT_DIRECTORY)

    model = LapSRN()
    if args.model is not None:
        print("Loading model...")
        serializers.load_npz(args.model, model)

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()
        xp = cuda.cupy
    else:
        xp = np

    optimizer = Adam()
    optimizer.setup(model)

    print("loading dataset...")
    paths = glob.glob(args.dataset)
    train_dataset = ImageDataset(scale=args.scale,
                                 paths=paths,
                                 dtype=xp.float32,
                                 cropsize=96)

    iterator = MultiprocessIterator(train_dataset,
                                    batch_size=args.batchsize,
                                    repeat=True,
                                    shuffle=True)

    step = 0
    epoch = 0
    loss = 0
    print("training...")
    for zipped_batch in iterator:
        lr = chainer.Variable(xp.array([zipped[0] for zipped in zipped_batch]))
        hr = chainer.Variable(xp.array([zipped[1] for zipped in zipped_batch]))

        sr = model(lr)
        loss += l1_charbonnier(sr, hr, model).data
        optimizer.update(l1_charbonnier, sr, hr, model)

        if step % args.steps_per_epoch == 0:
            loss /= args.steps_per_epoch
            print("Epoch: {}, Loss: {}, PSNR: {}".format(
                epoch, loss, PSNR(sr.data[0], hr.data[0])))
            chainer.serializers.save_npz(
                os.path.join(OUTPUT_DIRECTORY, "model_{}.npz".format(epoch)),
                model)
            epoch += 1
            loss = 0
        step += 1

        if epoch > args.epoch:
            break

    print("Done")
Ejemplo n.º 10
0
def main():
    model = Siamese()
    print('model params: ', model.count_params())
    optimizer = Adam(alpha=0.0002, beta1=0.5, beta2=0.999, eps=10e-8)
    optimizer.setup(model)

    epochs = 1000
    batch_size = 64

    data_batch = np.zeros((batch_size, 2), dtype=np.float32)

    labels = np.zeros((batch_size, 1), dtype=int)

    loss_list = []

    for e in range(epochs):
        for b in range(batch_size):
            x1 = randint(0, 9)
            x2 = randint(0, 9)
            if x1 == x2:
                lr = randint(0, 1)  # decide which will be bigger
                if x1 == 0:
                    if lr == 1:  # left
                        x1 += 1
                    else:
                        x2 += 1
                elif x1 == 9:
                    if lr == 1:
                        x2 -= 1
                    else:
                        x1 -= 1
                else:
                    if lr == 1:
                        x1 += 1
                    else:
                        x2 += 1

            data_batch[b] = [np.float32(x1), x2]
            # (1, 0) = left  (0, 1) = right
            if x1 > x2:  # left
                # labels[b] = [np.float32(1), np.float32(0)]
                labels[b] = 1
            else:  # right or equal
                # labels[b] = [np.float32(0), np.float32(1)]
                labels[b] = 0

        with chainer.using_config('train', True):
            model.cleargrads()
            d1 = np.expand_dims(data_batch[:, 0], -1)
            d2 = np.expand_dims(data_batch[:, 1], -1)
            # prediction = model(np.expand_dims(d1, 0), np.expand_dims(d2, 0))
            prediction = model(d1, d2)
            loss = sigmoid_cross_entropy(prediction, labels)
            # loss = mean_squared_error(prediction, labels)
            loss.backward()
            optimizer.update()

        loss_list.append(float(loss.data))

        # print(e, float(loss.data))

        if (e + 1) % 100 == 0:
            cm = make_confusion_matrix(prediction, labels)
            print(e, cm, loss, 'W: ', model.fc1.W, 'b: ', model.fc1.b)
Ejemplo n.º 11
0
class QNeuralNetwork(QModel):
    def __init__(self, model, target, device_id=-1,
                 learning_rate=0.00025, momentum=.9,
                 minibatch_size=32, update_interval=10000):

        assert isinstance(model, ChainerModel), \
            'model should inherit from ChainerModel'

        super(QNeuralNetwork, self).__init__(model.input_shape,
                                             model.output_shape)

        self._gpu_device = None
        self._loss_val = 0

        # Target model update method
        self._steps = 0
        self._target_update_interval = update_interval

        # Setup model and target network
        self._minibatch_size = minibatch_size
        self._model = model
        self._target = target
        self._target.copyparams(self._model)

        # If GPU move to GPU memory
        if device_id >= 0:
            with cuda.get_device(device_id) as device:
                self._gpu_device = device
                self._model.to_gpu(device)
                self._target.to_gpu(device)

        # Setup optimizer
        self._optimizer = Adam(learning_rate, momentum, 0.999)
        self._optimizer.setup(self._model)

    def evaluate(self, environment, model=QModel.ACTION_VALUE_NETWORK):
        if check_rank(environment.shape, get_rank(self._input_shape)):
            environment = environment.reshape((1,) + environment.shape)

        # Move data if necessary
        if self._gpu_device is not None:
            environment = cuda.to_gpu(environment, self._gpu_device)

        if model == QModel.ACTION_VALUE_NETWORK:
            output = self._model(environment)
        else:
            output = self._target(environment)

        return cuda.to_cpu(output.data)

    def train(self, x, y, actions=None):
        actions = actions.astype(np.int32)
        batch_size = len(actions)

        if self._gpu_device:
            x = cuda.to_gpu(x, self._gpu_device)
            y = cuda.to_gpu(y, self._gpu_device)
            actions = cuda.to_gpu(actions, self._gpu_device)

        q = self._model(x)
        q_subset = F.reshape(F.select_item(q, actions), (batch_size, 1))
        y = y.reshape(batch_size, 1)

        loss = F.sum(F.huber_loss(q_subset, y, 1.0))

        self._model.cleargrads()
        loss.backward()
        self._optimizer.update()

        self._loss_val = np.asscalar(cuda.to_cpu(loss.data))

        # Keeps track of the number of train() calls
        self._steps += 1
        if self._steps % self._target_update_interval == 0:
            # copy weights
            self._target.copyparams(self._model)

    @property
    def loss_val(self):
        return self._loss_val  # / self._minibatch_size

    def save(self, output_file):
        save_npz(output_file, self._model)

    def load(self, input_file):
        load_npz(input_file, self._model)

        # Copy parameter from model to target
        self._target.copyparams(self._model)
Ejemplo n.º 12
0
class AdamSet():
    def __init__(self, alpha, beta1, beta2, conditional=False):
        self.conditional = conditional
        self.mapper_optimizer = Adam(alpha / 100, beta1, beta2, eps=1e-08)
        self.synthesizer_optimizer = Adam(alpha, beta1, beta2, eps=1e-08)
        self.discriminator_optimizer = Adam(alpha, beta1, beta2, eps=1e-08)
        if conditional:
            self.generator_embedder_optimizer = Adam(alpha,
                                                     beta1,
                                                     beta2,
                                                     eps=1e-08)
            self.discriminator_embedder_optimizer = Adam(alpha,
                                                         beta1,
                                                         beta2,
                                                         eps=1e-08)
            self.condition_mapper_optimizer = Adam(alpha / 100,
                                                   beta1,
                                                   beta2,
                                                   eps=1e-08)

    def __iter__(self):
        yield "mapper", self.mapper_optimizer
        yield "synthesizer", self.synthesizer_optimizer
        yield "discriminator", self.discriminator_optimizer
        if self.conditional:
            yield "generator_embedder", self.generator_embedder_optimizer
            yield "discriminator_embedder", self.discriminator_embedder_optimizer
            yield "condition_mapper", self.condition_mapper_optimizer

    def setup(self, generator, discriminator):
        self.mapper_optimizer.setup(generator.mapper)
        self.synthesizer_optimizer.setup(generator.synthesizer)
        self.discriminator_optimizer.setup(discriminator.main)
        if self.conditional:
            self.generator_embedder_optimizer.setup(generator.embedder)
            self.discriminator_embedder_optimizer.setup(discriminator.embedder)
            self.condition_mapper_optimizer.setup(
                discriminator.condition_mapper)

    def update_generator(self):
        self.mapper_optimizer.update()
        self.synthesizer_optimizer.update()
        if self.conditional:
            self.generator_embedder_optimizer.update()

    def update_discriminator(self):
        self.discriminator_optimizer.update()
        if self.conditional:
            self.discriminator_embedder_optimizer.update()
            self.condition_mapper_optimizer.update()
Ejemplo n.º 13
0
class RNN(object):
    def __init__(self, n_words, emb_size, n_hidden, n_classes, classes):
        self.model = chainer.FunctionSet(
            Emb=F.EmbedID(n_words, emb_size),
            W=F.Linear(emb_size, n_hidden),
            U=F.Linear(n_hidden, n_hidden),
            O=F.Linear(n_hidden, n_classes)
        )

        self.n_hidden = n_hidden
        self.n_clsses = n_classes
        self.emb_size = emb_size

        self.classes = classes
        self.classes_rev = {v: k for k, v in classes.iteritems()}

        for param in self.model.parameters:
            param[:] = np.random.randn(*param.shape) * 0.1

        self.optimizer = Adam(alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-8)
        self.optimizer.setup(self.model)

    def forward_loss(self, mb_x, mb_y, train=True):
        mb_size = mb_x.shape[0]
        n_steps = mb_x.shape[1]

        loss = 0.0
        h = chainer.Variable(np.zeros((mb_size, self.n_hidden), dtype='float32'), volatile=not train)
        y_hat = []
        for i in range(n_steps):
            x_i = chainer.Variable(mb_x[:, i], volatile=not train)
            y_i = chainer.Variable(mb_y[:, i], volatile=not train)
            h = self.model.W(self.model.Emb(x_i)) + self.model.U(h)
            out = self.model.O(h)

            curr_loss = F.softmax_cross_entropy(out, y_i)
            y_hat.append(curr_loss.creator.y)

            loss += curr_loss * 1.0 / (n_steps * mb_size)

        y_hat = np.array(y_hat).swapaxes(0, 1)

        return loss, y_hat

    def learn(self, x, y):
        self.optimizer.zero_grads()

        loss, y_hat = self.forward_loss(x, y, train=True)

        loss.backward()

        self.optimizer.update()

        return loss.data

    def predict(self, x):
        _, y_hat = self.forward_loss(x, np.zeros(x.shape, dtype='int32'))

        return np.argmax(y_hat, axis=2)

    def predictions_to_text(self, y):
        return [self.classes_rev.get(i, '#EOS') for i in y]

    def eval(self, mb_x, mb_y):
        mb_y_hat = self.predict(mb_x)

        t = self.predictions_to_text

        acc =  sklearn.metrics.accuracy_score(mb_y.flat[mb_y.flat != -1], mb_y_hat.flat[mb_y.flat != -1])
        prec = sklearn.metrics.precision_score(mb_y.flat[mb_y.flat != -1], mb_y_hat.flat[mb_y.flat != -1])
        recall = sklearn.metrics.recall_score(mb_y.flat[mb_y.flat != -1], mb_y_hat.flat[mb_y.flat != -1])
        report = sklearn.metrics.classification_report(t(mb_y.flat[mb_y.flat != -1]), t(mb_y_hat.flat[mb_y.flat != -1]))

        return acc, prec, recall, report, mb_y_hat
Ejemplo n.º 14
0
    img1 = Variable(img1)
    img1.to_device(device)

    img2 = L.Parameter(np.random.rand(*img1.shape).astype(np.float32))

    
    img2.to_device(device)
    optimizer = Adam(0.1)
    optimizer.setup(img2)
    device.use()

    print(type(img1), type(img2()))
    ssim_value = ssim_loss(img1, img2(), 11, 11)
    print("Initial ssim:", ssim_value)

    step = 1
    while ssim_value.data < 0.95:
        optimizer.update(loss, img1, img2())
        ssim_value = -loss(img1, img2())

        ssim_value_s = "ssim: {}".format(ssim_value.array)
        print("ssim:", ssim_value)

        if args.is_plot:
            im = (img2.W.array[0].transpose(1, 2, 0).clip(0, 1) * 255).astype(np.uint8)
            plt.imshow(im)
            plt.text(0, -5, ssim_value_s)
            plt.show()

        step += 1
Ejemplo n.º 15
0
        x = generator(z)
        y1 = discriminator(x)

        # Train Generator
        loss_gen = F.softmax_cross_entropy(y1, Variable(xp.zeros(batch_size, dtype=np.int32)))
        loss_dis = F.softmax_cross_entropy(y1, Variable(xp.ones(batch_size, dtype=np.int32)))

        # Train Discriminator
        batch_x = Variable(train_x[perm[idx: idx + batch_size]])
        y2 = discriminator(batch_x)
        loss_dis += F.softmax_cross_entropy(y2, Variable(xp.zeros(batch_size, dtype=np.int32)))

        # Generatorの最適化
        generator.cleargrads()
        loss_gen.backward()
        opt_gen.update()
        # Discriminatorの最適化
        discriminator.cleargrads()
        loss_dis.backward()
        opt_dis.update()

        now = time.time()
        print('{}/{}, Gen_loss = {}, Dis_loss = {}, time = {:.2f}'.format(
            idx, n_train_data, loss_gen.data, loss_dis.data, now-cur_at))
        gen_loss.append(loss_gen.data)
        dis_loss.append(loss_dis.data)
        cur_at = now

    pickle.dump(generator, open('generator_snapshot.model', 'wb'))
    pickle.dump(discriminator, open('discriminator_snapshot.model', 'wb'))