Example #1
0
    def check_forward(self, x1_data, x2_data, x3_data):
        xp = self.link.xp
        x1 = chainer.Variable(x1_data) if self.input_variable else x1_data
        h1 = self.link(x1)
        with cuda.get_device(x1_data):
            c0 = chainer.Variable(xp.zeros((len(self.x1), self.out_size),
                                           dtype=self.x1.dtype))
            c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x1))
        testing.assert_allclose(h1.data, h1_expect.data)
        testing.assert_allclose(self.link.h.data, h1_expect.data)
        testing.assert_allclose(self.link.c.data, c1_expect.data)

        batch = len(x2_data)
        x2 = chainer.Variable(x2_data) if self.input_variable else x2_data
        h1_in, h1_rest = functions.split_axis(
            self.link.h.data, [batch], axis=0)
        y2 = self.link(x2)
        with cuda.get_device(x1):
            c2_expect, y2_expect = \
                functions.lstm(c1_expect,
                               self.link.upward(x2) + self.link.lateral(h1_in))
        testing.assert_allclose(y2.data, y2_expect.data)
        testing.assert_allclose(self.link.h.data[:batch], y2_expect.data)
        testing.assert_allclose(self.link.h.data[batch:], h1_rest.data)

        x3 = chainer.Variable(x3_data) if self.input_variable else x3_data
        h2_rest = self.link.h
        y3 = self.link(x3)
        c3_expect, y3_expect = \
            functions.lstm(c2_expect, self.link.upward(x3))
        testing.assert_allclose(y3.data, y3_expect.data)
        testing.assert_allclose(self.link.h.data, h2_rest.data)
Example #2
0
    def _finetune(self, X, y):
        utils.disp('*** finetune ***', self.verbose)
        # construct model and setup optimizer
        params = {'l{}'.format(layer + 1): dA.encoder for layer, dA in enumerate(self.dAs)}
        params.update({'l{}'.format(len(self.dAs) + 1): F.Linear(self.dAs[-1].n_hidden, self.n_output)})
        self.model = FunctionSet(**params)
        self.optimizer.setup(self.model)
        if self.gpu >= 0:
            cuda.get_device(self.gpu).use()
            self.model.to_gpu()
        xp = cuda.cupy if self.gpu >= 0 else np

        n = len(X)
        for epoch in range(self.n_epoch_finetune):
            utils.disp('epoch: {}'.format(epoch + 1), self.verbose)

            perm = np.random.permutation(n)
            sum_loss = 0
            for i in range(0, n, self.batch_size):
                X_batch = xp.asarray(X[perm[i: i + self.batch_size]])
                y_batch = xp.asarray(y[perm[i: i + self.batch_size]])

                self.optimizer.zero_grads()
                y_var = self._forward(X_batch)
                loss = self._loss_func(y_var, Variable(y_batch))
                loss.backward()
                self.optimizer.update()

                sum_loss += float(loss.data) * len(X_batch)

            utils.disp('fine tune mean loss={}'.format(sum_loss / n), self.verbose)
Example #3
0
def get_model_optimizer(result_dir, args):
    model_fn = os.path.basename(args.model)
    model_name = model_fn.split('.')[0]
    module = imp.load_source(model_fn.split('.')[0], args.model)
    Net = getattr(module, model_name)

    dst = '%s/%s' % (result_dir, model_fn)
    if not os.path.exists(dst):
        shutil.copy(args.model, dst)

    dst = '%s/%s' % (result_dir, os.path.basename(__file__))
    if not os.path.exists(dst):
        shutil.copy(__file__, dst)

    # prepare model
    model = Net()
    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
    if args.restart_from is not None:
        model = pickle.load(open(args.restart_from, 'rb'))
    if args.gpu >= 0:
        model.to_gpu()

    # prepare optimizer
    if args.opt == 'AdaGrad':
        optimizer = optimizers.AdaGrad(lr=0.0005)
    elif args.opt == 'MomentumSGD':
        optimizer = optimizers.MomentumSGD(lr=0.0005, momentum=0.9)
    elif args.opt == 'Adam':
        optimizer = optimizers.Adam()
    else:
        raise Exception('No optimizer is selected')
    optimizer.setup(model)

    return model, optimizer
Example #4
0
def inspect(image_path, mean, model_path, label, network, gpu=-1):
    network = network.split(os.sep)[-1]
    model_name = re.sub(r"\.py$", "", network)
    model_module = load_module(os.path.dirname(model_path), model_name)
    mean_image = pickle.load(open(mean, 'rb'))
    model = model_module.Network()
    serializers.load_hdf5(model_path, model)
    if gpu >= 0:
        cuda.check_cuda_available()
        cuda.get_device(gpu).use()
        model.to_gpu()
    cropwidth = 256 - model.insize
    img = read_image(image_path, model, mean_image, cropwidth)
    x = np.ndarray((1, 3, model.insize, model.insize), dtype=np.float32)
    x[0] = img
    if gpu >= 0:
        x = cuda.to_gpu(x)
    score = model.predict(x)
    score = cuda.to_cpu(score.data)
    categories = np.loadtxt(label, str, delimiter="\t")
    top_k = 20
    prediction = zip(score[0].tolist(), categories)
    prediction.sort(cmp=lambda x, y:cmp(x[0], y[0]), reverse=True)
    ret = []
    for rank, (score, name) in enumerate(prediction[:top_k], start=1):
        ret.append({"rank": rank, "name": name, "score": "{0:4.1f}%".format(score*100)})
    return ret
Example #5
0
    def __init__(self, gpu, model, model_type, out_dim):
        self.gpu = gpu
        self.model = 'bvlc_alexnet.caffemodel'
        self.model_type = 'alexnet'
        self.batchsize = 1
        self.out_dim = out_dim

        if self.gpu >= 0:
            cuda.check_cuda_available()

        print('Loading Caffe model file %s...' % self.model, file = sys.stderr)
        self.func = caffe.CaffeFunction(self.model)
        print('Loaded', file=sys.stderr)
        if self.gpu >= 0:
            cuda.get_device(self.gpu).use()
            self.func.to_gpu()

        if self.model_type == 'alexnet':
            self.in_size = 227
            mean_image = np.load('ilsvrc_2012_mean.npy')
            del self.func.layers[15:23]
            self.outname = 'pool5'
            #del self.func.layers[13:23]
            #self.outname = 'conv5'

            
        cropwidth = 256 - self.in_size
        start = cropwidth // 2
        stop = start + self.in_size
        self.mean_image = mean_image[:, start:stop, start:stop].copy()
Example #6
0
def main(args):
    if args.gpu is not None:
        cuda.get_device(args.gpu).use()

    # load model
    vae = Vae.load(args.model)
    label_dim = vae.encoder.rnn.feature_dim

    # load vocabulary
    model_base_path = os.path.dirname(args.model)
    vocab = vocabulary.Vocab.load(os.path.join(model_base_path, 'vocab'))

    while True:
        try:
            # process input
            in_str = raw_input('> ').decode('utf-8')
            es = in_str.split()
            label_id = int(es[0])
            ids = vocab.convert_ids(es[1:])

            # create input
            xs = map(lambda d: Variable(np.asarray([d], dtype=np.int32), volatile='on'), ids)
            label_in = _create_label_var([label_id], label_dim)
            label_out = _create_label_var(range(label_dim), label_dim)

            score_ids, debug_info = vae.generate(xs, label_in, label_out, no_var=args.no_var, sample=args.sample, temp=args.temp, max_len=args.max_len)
            mu = debug_info['mu']
            ln_var = debug_info['ln_var']
            z = debug_info['z']
            for ids, score in score_ids:
                print u'{}\t{}'.format(score, u' '.join(vocab.convert_words(ids))).encode('utf-8')

        except Exception as ex:
            print 'Usage: <label ID> <space-separated tokens>'
def generate(img):
    parser = argparse.ArgumentParser(description='Real-time style transfer image generator')
    #parser.add_argument('input')

    args = parser.parse_args()

    model = FastStyleNet()
    serializers.load_npz('models/seurat.model', model)
    if -1 >= 0:
        cuda.get_device(-1).use()
        model.to_gpu()
    xp = np if -1 < 0 else cuda.cupy

    start = time.time()
    image = xp.asarray(Image.open(img).convert('RGB'), dtype=xp.float32).transpose(2, 0, 1)
    image = image.reshape((1,) + image.shape)
    x = Variable(image)

    y = model(x)
    result = cuda.to_cpu(y.data)

    result = result.transpose(0, 2, 3, 1)
    result = result.reshape((result.shape[1:]))
    result = np.uint8(result)
    print(time.time() - start, 'sec')

    # Image.fromarray(result).save(args.out)
    return Image.fromarray(result)
Example #8
0
 def set_library(args):
   if args.gpu >= 0:
     XP.__lib = cuda.cupy
     cuda.get_device(args.gpu).use()
   else:
     XP.__lib = numpy
   XP.train = args.mode == 'train'
Example #9
0
    def __call__(self, x, test=False, finetune=False):
        """Invokes the forward propagation of BatchNormalization.

        BatchNormalization accepts additional arguments, which controls three
        different running mode.

        Args:
            x (Variable): Input variable.
            test (bool): If ``True``, BatchNormalization runs in testing mode;
                it normalizes the input using pre-computed statistics.
            finetune (bool): If ``finetune`` is ``True`` and ``test`` is
                ``False``, BatchNormalization runs in fine-tuning mode; it
                accumulates the input array to compute population statistics
                for normalization, and normalizes the input using batch
                statistics.

        If ``test`` is ``False``, then BatchNormalization runs in training
        mode; it computes moving averages of mean and variance for evaluation
        during training, and normalizes the input using batch statistics.

        """
        if hasattr(self, 'gamma'):
            gamma = self.gamma
        else:
            with cuda.get_device(self._device_id):
                gamma = variable.Variable(self.xp.ones(
                    self.avg_mean.shape, dtype=x.dtype), volatile='auto')
        if hasattr(self, 'beta'):
            beta = self.beta
        else:
            with cuda.get_device(self._device_id):
                beta = variable.Variable(self.xp.zeros(
                    self.avg_mean.shape, dtype=x.dtype), volatile='auto')

        # Var is always ones
        with cuda.get_device(self._device_id):
            self.one_var = self.xp.ones(self.avg_mean.shape, dtype=x.dtype)

        if not test:
            if finetune:
                self.N += 1
                decay = 1. - 1. / self.N
            else:
                decay = self.decay

            func = batch_normalization.BatchNormalizationFunction(
                self.eps, self.avg_mean, self.avg_var, True, decay,
                self.use_cudnn)
            ret = func(x, gamma, beta)

            self.avg_mean[:] = func.running_mean
            self.avg_var[:] = func.running_var
        else:
            # Use running average statistics or fine-tuned statistics.
            mean = variable.Variable(self.avg_mean, volatile='auto')
            #var = variable.Variable(self.avg_var, volatile='auto')
            var = variable.Variable(self.one_var, volatile='auto')
            ret = batch_normalization.fixed_batch_normalization(
                x, gamma, beta, mean, var, self.eps, self.use_cudnn)
        return ret
    def __init__(self, vocab_size, embed_size, hidden_size, choose_model,
                 use_gpu, gpu_id):
        # gpu Setting
        model = Alex_RNNLM(hidden_size)
        if choose_model == "Alex_RNNLM":
            model = Alex_RNNLM(hidden_size)
        if choose_model == "AlexBn_RNNLM":
            model = AlexBn_RNNLM(hidden_size)

        if use_gpu:
            cuda.get_device(gpu_id).use()
            model.to_gpu()
        # Setting Model
        super(EncoderDecoderAttention, self).__init__(
            enc=model,
            im1 = links.Linear(IM_SIZE, RESIZE_IM_SIZE),
            im2 = links.Linear(IM_SIZE, RESIZE_IM_SIZE),
            im3 = links.Linear(IM_SIZE, RESIZE_IM_SIZE),
            att=Attention(hidden_size, RESIZE_IM_SIZE),
            outay = links.Linear(hidden_size, hidden_size),
            dec=Decoder(vocab_size, embed_size, hidden_size),
        )
        self.vocab_size = vocab_size
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.common_function = CommonFunction()
        self.use_gpu = use_gpu
        self.gpu_id = gpu_id
        self.choose_model = choose_model
        self.__set_gpu()
Example #11
0
def main(description, gpu, output):
	logging.basicConfig(level=logging.INFO)

	logging.info('fetch MNIST dataset')
	mnist = fetch_mldata(description)
	mnist.data = mnist.data.astype(numpy.float32)
	mnist.data /= 255
	mnist.target = mnist.target.astype(numpy.int32)

	data_train, data_test, target_train, target_test = train_test_split(mnist.data, mnist.target)

	data = data_train, data_test
	target = target_train, target_test

	start_time = time.time()

	if gpu >= 0:
		cuda.check_cuda_available()
		cuda.get_device(gpu).use()
		logging.info("Using gpu device {}".format(gpu))
	else:
		logging.info("Not using gpu device")

	mlp = MLP(data=data, target=target, gpu=gpu)
	mlp.train_and_test(n_epoch=1)

	end_time = time.time()

	logging.info("time = {} min".format((end_time - start_time) / 60.0))
	logging.info('saving trained mlp into {}'.format(output))
	with open(output, 'wb') as fp:
		pickle.dump(mlp, fp)
    def __init__(self, vocab_size, embed_size, hidden_size, choose_model,
                 use_gpu, gpu_id):
        # gpu Setting
        model = Alex_RNNLM(hidden_size)
        if choose_model == "Alex_RNNLM":
            model = Alex_RNNLM(hidden_size)
        if choose_model == "AlexBn_RNNLM":
            model = AlexBn_RNNLM(hidden_size)

        if use_gpu:
            cuda.get_device(gpu_id).use()
            model.to_gpu()
        # Setting Model
        super(EncoderDecoder, self).__init__(
            enc=model,
            dec=Decoder(vocab_size, embed_size, hidden_size),
        )
        self.vocab_size = vocab_size
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.common_function = CommonFunction()
        self.use_gpu = use_gpu
        self.gpu_id = gpu_id
        self.choose_model = choose_model
        self.__set_gpu()
Example #13
0
	def __init__(self, model, GPU_ID=0, cont=False):
		self.model = model
		self.cont = cont

		# moving model to default GPU
		cuda.get_device(GPU_ID).use()
		model.to_gpu()
Example #14
0
 def set_library(args):
   if args.use_gpu:
     XP.__lib = cuda.cupy
     cuda.get_device(args.gpu_device).use()
   else:
     XP.__lib = numpy
   XP.__train == args.mode == 'train'
def main(args):
    # load model
    encdec = util.load_model(args.model)

    if args.gpu is not None:
        cuda.get_device(args.gpu).use()
        encdec.to_gpu()

    # load data
    model_base_path = os.path.dirname(args.model)

    if os.path.exists(os.path.join(model_base_path, 'vocab_src')):
        vocab_src_path = os.path.join(model_base_path, 'vocab_src')
        vocab_trg_path = os.path.join(model_base_path, 'vocab_trg')
        vocab_src = Vocab.load(vocab_src_path)
        vocab_trg = Vocab.load(vocab_trg_path)
    else:
        vocab_path = os.path.join(model_base_path, 'vocab')
        vocab = Vocab.load(vocab_path)
        vocab_src = vocab
        vocab_trg = vocab
    data = util.load_sentences(args.data, vocab_src)

    is_beam_search = args.beam > 1

    # create batches
    if is_beam_search:
        batch_size = 1
    else:
        batch_size = args.batch
    batches = util.create_batches_src(data, batch_size, args.bucket_step)

    # generate
    res = defaultdict(list)
    for idx_lst, xs_data in batches:
        if args.gpu is not None:
            xs_data = cuda.to_gpu(xs_data)
        xs = procedure.create_variables(xs_data, volatile='on')

        if is_beam_search:
            decoded = encdec.generate_beam(xs, beam_size=args.beam, max_len=args.max_len)
            if not args.n_best:
                # only show best result
                decoded = decoded[:1]
            assert len(idx_lst) == 1
            idx = idx_lst[0]
            for ids in decoded:
                words = map(vocab_trg.get_word, ids)
                res[idx].append(words)
        else:
            ids_batch, ys, ws = encdec.generate(xs, max_len=args.max_len, sample=args.sample, temp=args.temp)
            assert len(idx_lst) == len(ids_batch)
            for idx, ids in zip(idx_lst, ids_batch):
                words = map(vocab_trg.get_word, ids)
                res[idx].append(words)

    for idx, (idx2, cands) in enumerate(sorted(res.items(), key=lambda k_v: k_v[0])):
        for words in cands:
            assert idx == idx2
            print(' '.join(words))
Example #16
0
    def __call__(self, c, h, x):
        """Returns new cell state and updated output of LSTM.

        Args:
            c (~chainer.Variable): Cell states of LSTM units.
            h (~chainer.Variable): Output at the previous time step.
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            tuple of ~chainer.Variable: Returns ``(c_new, h_new)``, where
                ``c_new`` represents new cell state, and ``h_new`` is updated
                output of LSTM units.

        """
        if self.upward.has_uninitialized_params:
            in_size = x.size // x.shape[0]
            with cuda.get_device(self._device_id):
                self.upward._initialize_params(in_size)
                self._initialize_params()

        lstm_in = self.upward(x)
        if h is not None:
            lstm_in += self.lateral(h)
        if c is None:
            xp = self.xp
            with cuda.get_device(self._device_id):
                c = variable.Variable(
                    xp.zeros((x.shape[0], self.state_size), dtype=x.dtype),
                    volatile='auto')
        return lstm.lstm(c, lstm_in)
Example #17
0
def load_states(stage_cnt, joint_idx):
    ''' Load model, optimizer, and losses '''
    _, model, optimizer, _, _ = setup_initial_states(stage_cnt)

    modif = create_modifier(stage_cnt, joint_idx)

    # Alexnet model
    filename = settings.RESUME_MODEL % modif
    logger.info('Load model from %s', filename)
    serializers.load_npz(filename, model)
    if settings.GPU >= 0:  # GPU setup
        cuda.get_device(settings.GPU).use()
        model.to_gpu()

    # Optimizer
    optimizer.setup(model)
    filename = settings.RESUME_OPTIMIZER % modif
    logger.info('Load optimizer from %s', filename)
    serializers.load_npz(filename, optimizer)

    # Losses
    filename = settings.RESUME_LOSS % modif
    logger.info('Load loss history from %s', filename)
    loss_data = np.load(filename)
    train_losses = loss_data['train'].tolist()
    test_losses = loss_data['test'].tolist()
    assert(len(train_losses) == len(test_losses))

    # Epoch count
    epoch_cnt = len(train_losses)
    logger.info('Resume from epoch %d', epoch_cnt)

    return epoch_cnt, model, optimizer, train_losses, test_losses
 def test_forward_gpu_multi(self):
     with cuda.get_device(0):
         self.link.to_gpu()
         c = cuda.to_gpu(self.c)
         h = cuda.to_gpu(self.h)
         x = cuda.to_gpu(self.x)
     with cuda.get_device(1):
         self.check_forward(c, h, x)
Example #19
0
 def test_forward_gpu_multi(self):
     with cuda.get_device(0):
         self.link.to_gpu()
         x1 = cuda.to_gpu(self.x1)
         x2 = cuda.to_gpu(self.x2)
         x3 = cuda.to_gpu(self.x3)
     with cuda.get_device(1):
         self.check_forward(x1, x2, x3)
Example #20
0
 def test_addgrad_gpu_to_another_gpu(self):
     cp = cuda.cupy
     with cuda.get_device(1):
         a = cp.full(3, 10, dtype=np.float32)
     with cuda.get_device(0):
         b = cp.full(3, 20, dtype=np.float32)
         c = cp.full(3, 30, dtype=np.float32)
     self.check_addgrad(a, b, c)
Example #21
0
 def test_copydata_gpu_to_another_gpu(self):
     cp = cuda.cupy
     with cuda.get_device(0):
         data1 = cp.zeros(3, dtype=np.float32)
         expect = cp.ones(3, dtype=np.float32)
     with cuda.get_device(1):
         data2 = cp.ones(3, dtype=np.float32)
     self.check_copydata(data1, data2, expect)
Example #22
0
 def set_library(args):
     if args.gpu >= 0:
       XP.__lib = cuda.cupy
       cuda.get_device(args.gpu).use()
       XP.xp = XP.__lib
     else:
       XP.__lib = np
       XP.xp = XP.__lib
 def setup_gpu(self):
     if self.gpu >= 0:
         cuda.check_cuda_available()
         cuda.get_device(self.gpu).use()
         self.model.to_gpu()
         self.mod = cuda.cupy
     else:
         self.mod = np
Example #24
0
	def load_models(self, models):
		models = models.split(',')

		availiable_path = 'models_availiable'
		update_path = 'models_upload'
		all_models = 'models'

		models_availiable = os.listdir(availiable_path)
		models_upload = os.listdir(update_path)

		self.models = {m.split('.')[0].split('_')[0]:availiable_path +'/'+ m 
							for m in models_availiable}

		for m in models_upload:
			model_name = m.split('.')[0].split('_')[0]
			
			if not self.models.has_key(model_name):
				cmd = 'mv {update}/{update_name} {availiable}/'.format(
									update=update_path, update_name=m,
									availiable=availiable_path)
				(status, output) = commands.getstatusoutput(cmd)
				self.models[model_name] = availiable_path + '/' + m 
			else:
				cmd = 'mv {availiable} {all_models}'.format(
									availiable=self.models[model_name],
									all_models=all_models)
				(status, output) = commands.getstatusoutput(cmd)

				cmd = 'mv {update}/{update_name} {availiable}/'.format(
									update=update_path, update_name=m,
									availiable=availiable_path)
				(status, output) = commands.getstatusoutput(cmd)
				self.models[model_name] = availiable_path + '/' + m 

		# init denoise
		file_name = 'denoise.txt'
		if os.path.exists(file_name):
			with open(file_name, 'r') as pf:
				for line in pf.readlines():
					line = line.strip().split(' ')	
					self.denoise[line[0].strip()] = float(line[-1].strip())

		# init model
		for model_name,value in self.models.items():
			# model_name = m.split('/')[-1].split('.')[0]

			handle_model = FastStyleNet()
			serializers.load_npz(value, handle_model)

			if self.args.gpu >= 0:
				cuda.get_device(self.args.gpu).use()
				handle_model.to_gpu()

			self.models[model_name] = handle_model
		
		load_all_models = ', '.join(self.models.keys())
		logging.info('loading models : ' + load_all_models)
		logging.info('load success')
Example #25
0
 def __init__(self, discretize=10, gpu=-1):
     self.discretize = discretize
     self.gpu = gpu
     if self.gpu >= 0:
         from chainer import cuda
         self.xp = cuda.cupy
         cuda.get_device(self.gpu).use()
     else:
         self.xp = np
Example #26
0
 def __init__(self, meanpath, model, gpu=-1):
    self.mean = loader.load_mean(meanpath)
    self.model = model
    self.gpu = gpu
    self.insize = model.insize
    if gpu >= 0:
       cuda.check_cuda_available()
       cuda.get_device(gpu).use()
       self.model.to_gpu()
Example #27
0
 def test_addgrad_gpu_to_another_gpu_none_dst_dev1(self):
     cp = cuda.cupy
     with cuda.get_device(1):
         a = cp.full(3, 20, dtype=np.float32)
     with cuda.get_device(0):
         b = cp.full(3, 10, dtype=np.float32)
         c = cp.full(3, 20, dtype=np.float32)
     with cuda.get_device(1):
         self.check_addgrad(a, b, c, clear_dst_grad=True)
Example #28
0
 def __init__(self):
     super(MLP, self).__init__(
         l1=L.Linear(784, 100),
         l2=L.Linear(100, 100),
         l3=L.Linear(100, 10),
     )
     if cuda.available:
         cuda.get_device(0).use()
         self.to_gpu()
Example #29
0
 def set_net(self, net):
     self.source_net = deepcopy(net)
     self.target_net = deepcopy(net)
     if self.gpu:
         cuda.get_device(0).use()
         self.source_net.to_gpu()
         self.target_net.to_gpu()
     self.optimizer.setup(self.source_net)
     self.target_net.train = False
Example #30
0
 def test_zerograds_fill_multi_gpu(self):
     cupy = cuda.cupy
     with cuda.get_device(1):
         a = chainer.Variable(cupy.empty(3, dtype=np.float32))
         a.grad = cupy.empty_like(a.data)
     a.zerograd()
     self.assertEqual(int(a.grad.device), 1)
     with cuda.get_device(1):
         g_expect = cupy.zeros_like(a.data)
         cupy.testing.assert_array_equal(a.grad, g_expect)
Example #31
0
    def __init__(self, model, actor_optimizer, critic_optimizer, replay_buffer,
                 gamma, explorer,
                 gpu=None, replay_start_size=50000,
                 minibatch_size=32, update_interval=1,
                 target_update_interval=10000,
                 phi=lambda x: x,
                 target_update_method='hard',
                 soft_update_tau=1e-2,
                 n_times_update=1, average_q_decay=0.999,
                 average_loss_decay=0.99,
                 episodic_update=False,
                 episodic_update_len=None,
                 logger=getLogger(__name__),
                 batch_states=batch_states):

        self.model = model

        if gpu is not None and gpu >= 0:
            cuda.get_device(gpu).use()
            self.model.to_gpu(device=gpu)

        self.xp = self.model.xp
        self.replay_buffer = replay_buffer
        self.gamma = gamma
        self.explorer = explorer
        self.gpu = gpu
        self.target_update_interval = target_update_interval
        self.phi = phi
        self.target_update_method = target_update_method
        self.soft_update_tau = soft_update_tau
        self.logger = logger
        self.average_q_decay = average_q_decay
        self.average_loss_decay = average_loss_decay
        self.actor_optimizer = actor_optimizer
        self.critic_optimizer = critic_optimizer
        if episodic_update:
            update_func = self.update_from_episodes
        else:
            update_func = self.update
        self.replay_updater = ReplayUpdater(
            replay_buffer=replay_buffer,
            update_func=update_func,
            batchsize=minibatch_size,
            episodic_update=episodic_update,
            episodic_update_len=episodic_update_len,
            n_times_update=n_times_update,
            replay_start_size=replay_start_size,
            update_interval=update_interval,
        )
        self.batch_states = batch_states

        self.t = 0
        self.last_state = None
        self.last_action = None
        self.target_model = copy.deepcopy(self.model)
        disable_train(self.target_model['q_function'])
        disable_train(self.target_model['policy'])
        self.average_q = 0
        self.average_actor_loss = 0.0
        self.average_critic_loss = 0.0

        # Aliases for convenience
        self.q_function = self.model['q_function']
        self.policy = self.model['policy']
        self.target_q_function = self.target_model['q_function']
        self.target_policy = self.target_model['policy']

        self.sync_target_network()
Example #32
0
 def test_get_dummy_device_for_empty_array(self):
     x = cuda.cupy.array([]).reshape((0, 10))
     self.assertIs(cuda.get_device(x), cuda.DummyDevice)
Example #33
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--fcn16s', required=True)
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--out', required=True)
    parser.add_argument('--dataset', default='v2', choices=['v1', 'v2'])
    args = parser.parse_args()

    fcn16s_path = args.fcn16s
    gpu = args.gpu
    out = args.out
    if args.dataset == 'v1':
        dataset_class = datasets.APC2016DatasetV1
    else:
        dataset_class = datasets.APC2016DatasetV2

    if not osp.exists(out):
        os.makedirs(out)

    # 1. dataset

    dataset_train = dataset_class('train')
    dataset_val = dataset_class('val')

    iter_train = chainer.iterators.SerialIterator(dataset_train, batch_size=1)
    iter_val = chainer.iterators.SerialIterator(dataset_val,
                                                batch_size=1,
                                                repeat=False,
                                                shuffle=False)

    # 2. model

    n_class = len(dataset_train.label_names)

    fcn16s = fcn.models.FCN16s(n_class=n_class)
    chainer.serializers.load_hdf5(fcn16s_path, fcn16s)

    model = fcn.models.FCN8s(n_class=n_class)
    model.train = True
    fcn.utils.copy_chainermodel(fcn16s, model)

    if gpu >= 0:
        cuda.get_device(gpu).use()
        model.to_gpu()

    # 3. optimizer

    optimizer = chainer.optimizers.Adam(alpha=1e-5)
    optimizer.setup(model)

    # training loop

    trainer = fcn.Trainer(
        device=gpu,
        model=model,
        optimizer=optimizer,
        iter_train=iter_train,
        iter_val=iter_val,
        out=out,
    )
    trainer.train(
        max_iter=150000,
        interval_eval=5000,
    )
Example #34
0
def model_to_gpu(model, gpu_num):
    cuda.get_device(gpu_num).use()
    model.to_gpu()
    model.init_state()
Example #35
0
def main():
    args = parse_args()
    args.seed = init_rand(seed=args.seed)

    _, log_file_exist = initialize_logging(
        logging_dir_path=args.save_dir,
        logging_file_name=args.logging_file_name,
        script_args=args,
        log_packages=args.log_packages,
        log_pip_packages=args.log_pip_packages)

    num_gpus = args.num_gpus
    if num_gpus > 0:
        cuda.get_device(0).use()
    batch_size = args.batch_size

    net = prepare_model(model_name=args.model,
                        use_pretrained=args.use_pretrained,
                        pretrained_model_file_path=args.resume.strip(),
                        num_gpus=num_gpus)
    num_classes = net.classes if hasattr(net, 'classes') else 1000
    input_image_size = net.in_size[0] if hasattr(
        net, 'in_size') else args.input_size

    train_iter, val_iter = get_data_iterators(
        data_dir=args.data_dir,
        batch_size=batch_size,
        num_workers=args.num_workers,
        num_classes=num_classes,
        input_image_size=input_image_size,
        resize_inv_factor=args.resize_inv_factor)

    trainer = prepare_trainer(net=net,
                              optimizer_name=args.optimizer_name,
                              lr=args.lr,
                              momentum=args.momentum,
                              num_epochs=args.num_epochs,
                              train_iter=train_iter,
                              val_iter=val_iter,
                              logging_dir_path=args.save_dir,
                              num_gpus=num_gpus)

    # if args.save_dir and args.save_interval:
    #     lp_saver = TrainLogParamSaver(
    #         checkpoint_file_name_prefix='imagenet_{}'.format(args.model),
    #         last_checkpoint_file_name_suffix="last",
    #         best_checkpoint_file_name_suffix=None,
    #         last_checkpoint_dir_path=args.save_dir,
    #         best_checkpoint_dir_path=None,
    #         last_checkpoint_file_count=2,
    #         best_checkpoint_file_count=2,
    #         checkpoint_file_save_callback=save_params,
    #         checkpoint_file_exts=['.npz', '.states'],
    #         save_interval=args.save_interval,
    #         num_epochs=args.num_epochs,
    #         param_names=['Val.Top1', 'Train.Top1', 'Val.Top5', 'Train.Loss', 'LR'],
    #         acc_ind=2,
    #         # bigger=[True],
    #         # mask=None,
    #         score_log_file_path=os.path.join(args.save_dir, 'score.log'),
    #         score_log_attempt_value=args.attempt,
    #         best_map_log_file_path=os.path.join(args.save_dir, 'best_map.log'))
    # else:
    #     lp_saver = None

    trainer.run()
Example #36
0
def main(args):
    source_dataset, target_dataset, vocab, vocab_inv = read_data_and_vocab(
        args.source_train,
        args.target_train,
        args.source_dev,
        args.target_dev,
        args.source_test,
        args.target_test,
        reverse_source=True)

    save_vocab(args.model_dir, vocab, vocab_inv)

    source_dataset_train, source_dataset_dev, source_dataset_test = source_dataset
    target_dataset_train, target_dataset_dev, target_dataset_test = target_dataset

    vocab_source, vocab_target = vocab
    vocab_inv_source, vocab_inv_target = vocab_inv

    # split into buckets
    source_buckets_train, target_buckets_train = make_buckets(
        source_dataset_train, target_dataset_train)
    if args.buckets_slice is not None:
        source_buckets_train = source_buckets_train[:args.buckets_slice + 1]
        target_buckets_train = target_buckets_train[:args.buckets_slice + 1]

    # development dataset
    source_buckets_dev = None
    if len(source_dataset_dev) > 0:
        source_buckets_dev, target_buckets_dev = make_buckets(
            source_dataset_dev, target_dataset_dev)
        if args.buckets_slice is not None:
            source_buckets_dev = source_buckets_dev[:args.buckets_slice + 1]
            target_buckets_dev = target_buckets_dev[:args.buckets_slice + 1]

    # test dataset
    source_buckets_test = None
    if len(source_dataset_test) > 0:
        source_buckets_test, target_buckets_test = make_buckets(
            source_dataset_test, target_dataset_test)
        if args.buckets_slice is not None:
            source_buckets_test = source_buckets_test[:args.buckets_slice + 1]
            target_buckets_test = target_buckets_test[:args.buckets_slice + 1]

    # show log
    dump_dataset(
        source_dataset, vocab,
        (source_buckets_train, source_buckets_dev, source_buckets_test))

    # to maintain equilibrium
    required_interations = []
    for data in source_buckets_train:
        itr = len(data) // args.batchsize + 1
        required_interations.append(itr)
    total_iterations = sum(required_interations)
    buckets_distribution = np.asarray(required_interations,
                                      dtype=float) / total_iterations

    # init
    model = load_model(args.model_dir)
    if model is None:
        model = seq2seq(len(vocab_source),
                        len(vocab_target),
                        args.ndim_embedding,
                        args.ndim_h,
                        args.num_layers,
                        pooling=args.pooling,
                        dropout=args.dropout,
                        zoneout=args.zoneout,
                        weightnorm=args.weightnorm,
                        wgain=args.wgain,
                        densely_connected=args.densely_connected,
                        attention=args.attention)

    if args.gpu_device >= 0:
        cuda.get_device(args.gpu_device).use()
        model.to_gpu()

    # setup an optimizer
    optimizer = get_optimizer(args.optimizer, args.learning_rate,
                              args.momentum)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(args.grad_clip))
    optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay))
    final_learning_rate = 1e-5
    total_time = 0

    indices_train = []
    for bucket_idx, bucket in enumerate(source_buckets_train):
        indices = np.arange(len(bucket))
        np.random.shuffle(indices)
        indices_train.append(indices)

    def mean(l):
        return sum(l) / len(l)

    # training
    for epoch in range(1, args.epoch + 1):
        print("Epoch", epoch)
        start_time = time.time()

        with chainer.using_config("train", True):

            for itr in range(total_iterations):
                bucket_idx = int(
                    np.random.choice(np.arange(len(source_buckets_train)),
                                     size=1,
                                     p=buckets_distribution))
                source_bucket = source_buckets_train[bucket_idx]
                target_bucket = target_buckets_train[bucket_idx]

                # sample minibatch
                source_batch = source_bucket[:args.batchsize]
                target_batch = target_bucket[:args.batchsize]
                skip_mask = source_batch != ID_PAD
                target_batch_input, target_batch_output = make_source_target_pair(
                    target_batch)

                # to gpu
                if args.gpu_device >= 0:
                    skip_mask = cuda.to_gpu(skip_mask)
                    source_batch = cuda.to_gpu(source_batch)
                    target_batch_input = cuda.to_gpu(target_batch_input)
                    target_batch_output = cuda.to_gpu(target_batch_output)

                # compute loss
                model.reset_state()
                if args.attention:
                    last_hidden_states, last_layer_outputs = model.encode(
                        source_batch, skip_mask)
                    y_batch = model.decode(target_batch_input,
                                           last_hidden_states,
                                           last_layer_outputs, skip_mask)
                else:
                    last_hidden_states = model.encode(source_batch, skip_mask)
                    y_batch = model.decode(target_batch_input,
                                           last_hidden_states)
                loss = softmax_cross_entropy(y_batch,
                                             target_batch_output,
                                             ignore_label=ID_PAD)

                # update parameters
                optimizer.update(lossfun=lambda: loss)

                # show log
                printr("iteration {}/{}".format(itr + 1, total_iterations))

                source_buckets_train[bucket_idx] = np.roll(source_bucket,
                                                           -args.batchsize,
                                                           axis=0)  # shift
                target_buckets_train[bucket_idx] = np.roll(target_bucket,
                                                           -args.batchsize,
                                                           axis=0)  # shift

            # shuffle
            for bucket_idx in range(len(source_buckets_train)):
                indices = indices_train[bucket_idx]
                np.random.shuffle(indices)
                source_buckets_train[bucket_idx] = source_buckets_train[
                    bucket_idx][indices]
                target_buckets_train[bucket_idx] = target_buckets_train[
                    bucket_idx][indices]

        # serialize
        save_model(args.model_dir, model)

        # clear console
        printr("")

        # show log
        with chainer.using_config("train", False):
            if epoch % args.interval == 0:
                printb("translate (train)")
                dump_random_source_target_translation(model,
                                                      source_buckets_train,
                                                      target_buckets_train,
                                                      vocab_inv_source,
                                                      vocab_inv_target,
                                                      num_translate=5,
                                                      beam_width=1)

                if source_buckets_dev is not None:
                    printb("translate (dev)")
                    dump_random_source_target_translation(model,
                                                          source_buckets_dev,
                                                          target_buckets_dev,
                                                          vocab_inv_source,
                                                          vocab_inv_target,
                                                          num_translate=5,
                                                          beam_width=1)

                if source_buckets_dev is not None:
                    printb("WER (dev)")
                    wer_dev = compute_error_rate_buckets(model,
                                                         source_buckets_dev,
                                                         target_buckets_dev,
                                                         len(vocab_inv_target),
                                                         beam_width=1)
                    print(mean(wer_dev), wer_dev)

        elapsed_time = (time.time() - start_time) / 60.
        total_time += elapsed_time
        print("done in {} min, lr = {:.4f}, total {} min".format(
            int(elapsed_time), get_current_learning_rate(optimizer),
            int(total_time)))

        # decay learning rate
        decay_learning_rate(optimizer, args.lr_decay_factor,
                            final_learning_rate)
Example #37
0
def main():

    logging.basicConfig(
        format='%(asctime)s : %(threadName)s : %(levelname)s : %(message)s',
        level=logging.INFO)

    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu',
                        '-g',
                        default=-1,
                        type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--batchsize',
                        dest='batchsize',
                        type=int,
                        default=32,
                        help='learning minibatch size')
    parser.add_argument('--batchsize_semi',
                        dest='batchsize_semi',
                        type=int,
                        default=64,
                        help='learning minibatch size')
    parser.add_argument('--n_epoch',
                        dest='n_epoch',
                        type=int,
                        default=30,
                        help='n_epoch')
    parser.add_argument('--pretrained_model',
                        dest='pretrained_model',
                        type=str,
                        default='',
                        help='pretrained_model')
    parser.add_argument('--use_unlabled_to_vocab',
                        dest='use_unlabled_to_vocab',
                        type=int,
                        default=1,
                        help='use_unlabled_to_vocab')
    parser.add_argument('--use_rational',
                        dest='use_rational',
                        type=int,
                        default=0,
                        help='use_rational')
    parser.add_argument('--save_name',
                        dest='save_name',
                        type=str,
                        default='sentiment_model',
                        help='save_name')
    parser.add_argument('--n_layers',
                        dest='n_layers',
                        type=int,
                        default=1,
                        help='n_layers')
    parser.add_argument('--alpha',
                        dest='alpha',
                        type=float,
                        default=0.001,
                        help='alpha')
    parser.add_argument('--alpha_decay',
                        dest='alpha_decay',
                        type=float,
                        default=0.0,
                        help='alpha_decay')
    parser.add_argument('--clip',
                        dest='clip',
                        type=float,
                        default=5.0,
                        help='clip')
    parser.add_argument('--debug_mode',
                        dest='debug_mode',
                        type=int,
                        default=0,
                        help='debug_mode')
    parser.add_argument('--use_exp_decay',
                        dest='use_exp_decay',
                        type=int,
                        default=1,
                        help='use_exp_decay')
    parser.add_argument('--load_trained_lstm',
                        dest='load_trained_lstm',
                        type=str,
                        default='',
                        help='load_trained_lstm')
    parser.add_argument('--freeze_word_emb',
                        dest='freeze_word_emb',
                        type=int,
                        default=0,
                        help='freeze_word_emb')
    parser.add_argument('--dropout',
                        dest='dropout',
                        type=float,
                        default=0.50,
                        help='dropout')
    parser.add_argument('--use_adv',
                        dest='use_adv',
                        type=int,
                        default=0,
                        help='use_adv')
    parser.add_argument('--xi_var',
                        dest='xi_var',
                        type=float,
                        default=1.0,
                        help='xi_var')
    parser.add_argument('--xi_var_first',
                        dest='xi_var_first',
                        type=float,
                        default=1.0,
                        help='xi_var_first')
    parser.add_argument('--lower',
                        dest='lower',
                        type=int,
                        default=1,
                        help='lower')
    parser.add_argument('--nl_factor',
                        dest='nl_factor',
                        type=float,
                        default=1.0,
                        help='nl_factor')
    parser.add_argument('--min_count',
                        dest='min_count',
                        type=int,
                        default=1,
                        help='min_count')
    parser.add_argument('--ignore_unk',
                        dest='ignore_unk',
                        type=int,
                        default=0,
                        help='ignore_unk')
    parser.add_argument('--use_semi_data',
                        dest='use_semi_data',
                        type=int,
                        default=0,
                        help='use_semi_data')
    parser.add_argument('--add_labeld_to_unlabel',
                        dest='add_labeld_to_unlabel',
                        type=int,
                        default=1,
                        help='add_labeld_to_unlabel')
    parser.add_argument('--norm_sentence_level',
                        dest='norm_sentence_level',
                        type=int,
                        default=1,
                        help='norm_sentence_level')
    parser.add_argument('--dataset',
                        default='imdb',
                        choices=['imdb', 'elec', 'rotten', 'dbpedia', 'rcv1'])
    parser.add_argument('--eval',
                        dest='eval',
                        type=int,
                        default=0,
                        help='eval')
    parser.add_argument('--emb_dim',
                        dest='emb_dim',
                        type=int,
                        default=256,
                        help='emb_dim')
    parser.add_argument('--hidden_dim',
                        dest='hidden_dim',
                        type=int,
                        default=1024,
                        help='hidden_dim')
    parser.add_argument('--hidden_cls_dim',
                        dest='hidden_cls_dim',
                        type=int,
                        default=30,
                        help='hidden_cls_dim')
    parser.add_argument('--adaptive_softmax',
                        dest='adaptive_softmax',
                        type=int,
                        default=1,
                        help='adaptive_softmax')
    parser.add_argument('--random_seed',
                        dest='random_seed',
                        type=int,
                        default=1234,
                        help='random_seed')
    parser.add_argument('--n_class',
                        dest='n_class',
                        type=int,
                        default=2,
                        help='n_class')
    parser.add_argument('--word_only',
                        dest='word_only',
                        type=int,
                        default=0,
                        help='word_only')
    # iVAT
    parser.add_argument('--use_attn_d',
                        dest='use_attn_d',
                        type=int,
                        default=0,
                        help='use_attn_d')
    parser.add_argument('--nn_k',
                        dest='nn_k',
                        type=int,
                        default=10,
                        help='nn_k')
    parser.add_argument('--nn_k_offset',
                        dest='nn_k_offset',
                        type=int,
                        default=1,
                        help='nn_k_offset')
    parser.add_argument('--online_nn',
                        dest='online_nn',
                        type=int,
                        default=0,
                        help='online_nn')
    parser.add_argument('--use_limit_vocab',
                        dest='use_limit_vocab',
                        type=int,
                        default=1,
                        help='use_limit_vocab')
    parser.add_argument('--batchsize_nn',
                        dest='batchsize_nn',
                        type=int,
                        default=10,
                        help='batchsize_nn')
    parser.add_argument('--update_nearest_epoch',
                        dest='update_nearest_epoch',
                        type=int,
                        default=1,
                        help='update_nearest_epoch')

    args = parser.parse_args()
    batchsize = args.batchsize
    batchsize_semi = args.batchsize_semi
    print(args)

    random.seed(args.random_seed)
    np.random.seed(args.random_seed)
    os.environ["CHAINER_SEED"] = str(args.random_seed)
    os.makedirs("models", exist_ok=True)

    if args.debug_mode:
        chainer.set_debug(True)

    use_unlabled_to_vocab = args.use_unlabled_to_vocab
    lower = args.lower == 1
    n_char_vocab = 1
    n_class = 2
    if args.dataset == 'imdb':
        vocab_obj, dataset, lm_data, t_vocab = utils.load_dataset_imdb(
            include_pretrain=use_unlabled_to_vocab,
            lower=lower,
            min_count=args.min_count,
            ignore_unk=args.ignore_unk,
            use_semi_data=args.use_semi_data,
            add_labeld_to_unlabel=args.add_labeld_to_unlabel)
        (train_x, train_x_len, train_y, dev_x, dev_x_len, dev_y, test_x,
         test_x_len, test_y) = dataset
        vocab, vocab_count = vocab_obj
        n_class = 2
    # TODO: add other dataset code

    if args.use_semi_data:
        semi_train_x, semi_train_x_len = lm_data

    print('train_vocab_size:', t_vocab)

    vocab_inv = dict([(widx, w) for w, widx in vocab.items()])
    print('vocab_inv:', len(vocab_inv))

    xp = cuda.cupy if args.gpu >= 0 else np
    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        xp.random.seed(args.random_seed)

    n_vocab = len(vocab)
    model = nets.uniLSTM_iVAT(n_vocab=n_vocab,
                              emb_dim=args.emb_dim,
                              hidden_dim=args.hidden_dim,
                              use_dropout=args.dropout,
                              n_layers=args.n_layers,
                              hidden_classifier=args.hidden_cls_dim,
                              use_adv=args.use_adv,
                              xi_var=args.xi_var,
                              n_class=n_class,
                              args=args)
    model.train_vocab_size = t_vocab
    model.vocab_size = n_vocab
    model.logging = logging

    if args.pretrained_model != '':
        # load pretrained LM model
        pretrain_model = lm_nets.RNNForLM(
            n_vocab,
            1024,
            args.n_layers,
            0.50,
            share_embedding=False,
            adaptive_softmax=args.adaptive_softmax)
        serializers.load_npz(args.pretrained_model, pretrain_model)
        pretrain_model.lstm = pretrain_model.rnn
        model.set_pretrained_lstm(pretrain_model, word_only=args.word_only)

    all_nn_flag = args.use_attn_d
    if all_nn_flag and args.online_nn == 0:
        word_embs = model.word_embed.W.data
        model.norm_word_embs = word_embs / np.linalg.norm(
            word_embs, axis=1).reshape(-1, 1)
        model.norm_word_embs = np.array(model.norm_word_embs, dtype=np.float32)

    if args.load_trained_lstm != '':
        serializers.load_hdf5(args.load_trained_lstm, model)

    if args.gpu >= 0:
        model.to_gpu()

    if all_nn_flag and args.online_nn == 0:
        model.compute_all_nearest_words(top_k=args.nn_k)

        # check nearest words
        def most_sims(word):
            if word not in vocab:
                logging.info('[not found]:{}'.format(word))
                return False
            idx = vocab[word]
            idx_gpu = xp.array([idx], dtype=xp.int32)
            top_idx = model.get_nearest_words(idx_gpu)
            sim_ids = top_idx[0]
            words = [vocab_inv[int(i)] for i in sim_ids]
            word_line = ','.join(words)
            logging.info('{}\t\t{}'.format(word, word_line))

        most_sims(u'good')
        most_sims(u'this')
        most_sims(u'that')
        most_sims(u'awesome')
        most_sims(u'bad')
        most_sims(u'wrong')

    def evaluate(x_set, x_length_set, y_set):
        chainer.config.train = False
        chainer.config.enable_backprop = False
        iteration_list = range(0, len(x_set), batchsize)
        correct_cnt = 0
        total_cnt = 0.0
        predicted_np = []

        for i_index, index in enumerate(iteration_list):
            x = [to_gpu(_x) for _x in x_set[index:index + batchsize]]
            x_length = x_length_set[index:index + batchsize]
            y = to_gpu(y_set[index:index + batchsize])
            output = model(x, x_length)

            predict = xp.argmax(output.data, axis=1)
            correct_cnt += xp.sum(predict == y)
            total_cnt += len(y)

        accuracy = (correct_cnt / total_cnt) * 100.0
        chainer.config.enable_backprop = True
        return accuracy

    def get_unlabled(perm_semi, i_index):
        index = i_index * batchsize_semi
        sample_idx = perm_semi[index:index + batchsize_semi]
        x = [to_gpu(semi_train_x[_i]) for _i in sample_idx]
        x_length = [semi_train_x_len[_i] for _i in sample_idx]
        return x, x_length

    base_alpha = args.alpha
    opt = optimizers.Adam(alpha=base_alpha)
    opt.setup(model)
    opt.add_hook(chainer.optimizer.GradientClipping(args.clip))

    if args.freeze_word_emb:
        model.freeze_word_emb()

    prev_dev_accuracy = 0.0
    global_step = 0.0
    adv_rep_num_statics = {}
    adv_rep_pos_statics = {}

    if args.eval:
        dev_accuracy = evaluate(dev_x, dev_x_len, dev_y)
        log_str = ' [dev] accuracy:{}, length:{}'.format(str(dev_accuracy))
        logging.info(log_str)

        # test
        test_accuracy = evaluate(test_x, test_x_len, test_y)
        log_str = ' [test] accuracy:{}, length:{}'.format(str(test_accuracy))
        logging.info(log_str)

    for epoch in range(args.n_epoch):
        logging.info('epoch:' + str(epoch))
        # train
        model.cleargrads()
        chainer.config.train = True
        iteration_list = range(0, len(train_x), batchsize)

        perm = np.random.permutation(len(train_x))
        if args.use_semi_data:
            perm_semi = [
                np.random.permutation(len(semi_train_x)) for _ in range(2)
            ]
            perm_semi = np.concatenate(perm_semi, axis=0)

        def idx_func(shape):
            return xp.arange(shape).astype(xp.int32)

        sum_loss = 0.0
        sum_loss_z = 0.0
        sum_loss_z_sparse = 0.0
        sum_loss_label = 0.0
        avg_rate = 0.0
        avg_rate_num = 0.0
        correct_cnt = 0
        total_cnt = 0.0
        N = len(iteration_list)
        is_adv_example_list = []
        is_adv_example_disc_list = []
        is_adv_example_disc_craft_list = []
        y_np = []
        predicted_np = []
        save_items = []
        for i_index, index in enumerate(iteration_list):
            global_step += 1.0
            model.set_train(True)
            sample_idx = perm[index:index + batchsize]
            x = [to_gpu(train_x[_i]) for _i in sample_idx]
            x_length = [train_x_len[_i] for _i in sample_idx]

            y = to_gpu(train_y[sample_idx])

            d = None

            # Classification loss
            output = model(x, x_length)
            output_original = output
            loss = F.softmax_cross_entropy(output, y, normalize=True)
            if args.use_adv or args.use_semi_data:
                # Adversarial Training
                if args.use_adv:
                    output = model(x, x_length, first_step=True, d=None)
                    # Adversarial loss (First step)
                    loss_adv_first = F.softmax_cross_entropy(output,
                                                             y,
                                                             normalize=True)
                    model.cleargrads()
                    loss_adv_first.backward()

                    if args.use_attn_d:
                        # iAdv
                        attn_d_grad = model.attention_d_var.grad
                        attn_d_grad = F.normalize(attn_d_grad, axis=1)
                        # Get directional vector
                        dir_normed = model.dir_normed.data
                        attn_d = F.broadcast_to(attn_d_grad,
                                                dir_normed.shape).data
                        d = xp.sum(attn_d * dir_normed, axis=1)
                    else:
                        # Adv
                        d = model.d_var.grad
                    output = model(x, x_length, d=d)
                    # Adversarial loss
                    loss_adv = F.softmax_cross_entropy(output,
                                                       y,
                                                       normalize=True)
                    loss += loss_adv * args.nl_factor

                # Virtual Adversarial Training
                if args.use_semi_data:
                    x, length = get_unlabled(perm_semi, i_index)
                    output_original = model(x, length)
                    output_vat = model(x, length, first_step=True, d=None)
                    loss_vat_first = nets.kl_loss(xp, output_original.data,
                                                  output_vat)
                    model.cleargrads()
                    loss_vat_first.backward()
                    if args.use_attn_d:
                        # iVAT (ours)
                        attn_d_grad = model.attention_d_var.grad
                        attn_d_grad = F.normalize(attn_d_grad, axis=1)
                        # Get directional vector
                        dir_normed = model.dir_normed.data
                        attn_d = F.broadcast_to(attn_d_grad,
                                                dir_normed.shape).data
                        d_vat = xp.sum(attn_d * dir_normed, axis=1)
                    else:
                        # VAT
                        d_vat = model.d_var.grad

                    output_vat = model(x, length, d=d_vat)
                    loss_vat = nets.kl_loss(xp, output_original.data,
                                            output_vat)
                    loss += loss_vat

            predict = xp.argmax(output.data, axis=1)
            correct_cnt += xp.sum(predict == y)
            total_cnt += len(y)

            # update
            model.cleargrads()
            loss.backward()
            opt.update()

            if args.alpha_decay > 0.0:
                if args.use_exp_decay:
                    opt.hyperparam.alpha = (base_alpha) * (args.alpha_decay**
                                                           global_step)
                else:
                    opt.hyperparam.alpha *= args.alpha_decay  # 0.9999

            sum_loss += loss.data

        accuracy = (correct_cnt / total_cnt) * 100.0

        logging.info(' [train] sum_loss: {}'.format(sum_loss / N))
        logging.info(' [train] apha:{}, global_step:{}'.format(
            opt.hyperparam.alpha, global_step))
        logging.info(' [train] accuracy:{}'.format(accuracy))

        model.set_train(False)
        # dev
        dev_accuracy = evaluate(dev_x, dev_x_len, dev_y)
        log_str = ' [dev] accuracy:{}'.format(str(dev_accuracy))
        logging.info(log_str)

        # test
        test_accuracy = evaluate(test_x, test_x_len, test_y)
        log_str = ' [test] accuracy:{}'.format(str(test_accuracy))
        logging.info(log_str)

        last_epoch_flag = args.n_epoch - 1 == epoch
        if prev_dev_accuracy < dev_accuracy:

            logging.info(' => '.join(
                [str(prev_dev_accuracy),
                 str(dev_accuracy)]))
            result_str = 'dev_acc_' + str(dev_accuracy)
            result_str += '_test_acc_' + str(test_accuracy)
            model_filename = './models/' + '_'.join(
                [args.save_name, str(epoch), result_str])
            # if len(sentences_train_list) == 1:
            serializers.save_hdf5(model_filename + '.model', model)

            prev_dev_accuracy = dev_accuracy

        nn_update_flag = args.update_nearest_epoch > 0 and (
            epoch % args.update_nearest_epoch == 0)
        if all_nn_flag and nn_update_flag and args.online_nn == 0:
            model.cleargrads()
            x = None
            x_length = None
            y = None
            model.compute_all_nearest_words(top_k=args.nn_k)
Example #38
0
def _sqnorm(x):
    with cuda.get_device(x):
        x = x.ravel()
        return float(x.dot(x))
Example #39
0
import numpy as np
import chainer
from chainer import Chain, Variable, cuda, functions, links, optimizer, optimizers, serializers
import MeCab
import pickle
import os
from utils import *

# アテンション機構の追加
# 状態の初期化にバッチサイズを与えることでデータ数の調整を不要に

if FLAG_GPU:
    cuda.check_cuda_available()
    cuda.get_device(0).use()
    ARR = cuda.cupy
else:
    ARR = np


class LSTM_Encoder(Chain):
    def __init__(self, vocab_size, embed_size, hidden_size):
        super(LSTM_Encoder,
              self).__init__(xe=links.EmbedID(vocab_size,
                                              embed_size,
                                              ignore_label=-1),
                             eh=links.Linear(embed_size, 4 * hidden_size),
                             hh=links.Linear(hidden_size, 4 * hidden_size))

    def __call__(self, x, c, h):
        e = functions.tanh(self.xe(x))
        return functions.lstm(c, self.eh(e) + self.hh(h))
Example #40
0
    def train(self):
        params = self.params

        use_gpu = params['gpu'] >= 0
        if use_gpu:
            cuda.get_device(params['gpu']).use()
            self.dcgan.to_gpu()

        xp = cp if use_gpu else np

        nz = params['nz']
        train = params['train']
        batchsize = params['batchsize']
        output_interval = train // 2
        xp.random.seed(0)
        z_vis = xp.random.uniform(-1, 1, (100, nz)).astype(np.float32)
        xp.random.seed()
        loader = ImageLoader(self._image_dir, batchsize)

        for epoch in range(params['current_epoch'] + 1, params['epoch']):
            start = time.time()
            perm = np.random.permutation(train)
            sum_loss_gen = 0
            sum_loss_dis = 0

            for i in range(train):
                x = np.zeros((batchsize, 3, DCGAN.SIZE[0], DCGAN.SIZE[1]),
                             np.float32)
                for j, image in loader:
                    x[j] = image
                if use_gpu:
                    x = cuda.to_gpu(x)

                x = Variable(x)
                z = Variable(
                    xp.random.uniform(-1, 1,
                                      (batchsize, nz)).astype(np.float32))

                loss_gen, loss_dis = self.dcgan(z, x)

                self.opt_gen.zero_grads()
                loss_gen.backward()
                self.opt_gen.update()

                self.opt_dis.zero_grads()
                loss_dis.backward()
                self.opt_dis.update()

                sum_loss_gen += loss_gen.data.get(
                ) if use_gpu else loss_gen.data
                sum_loss_dis += loss_dis.data.get(
                ) if use_gpu else loss_dis.data

                if i % output_interval == 0:
                    pylab.rcParams['figure.figsize'] = (16.0, 16.0)
                    pylab.clf()
                    z = z_vis
                    z[50:, :] = xp.random.uniform(-1, 1,
                                                  (50, nz)).astype(np.float32)
                    z = Variable(z)
                    x = self.dcgan.generate(z)
                    for j in range(x.shape[0]):
                        image = x[j]
                        pylab.subplot(10, 10, j + 1)
                        pylab.imshow(image)
                        pylab.axis('off')
                    image_path = os.path.join(
                        self._output_dir,
                        '{}_{}_{}.png'.format(self.name, epoch, i))
                    pylab.savefig(image_path)

            sum_loss_gen /= train
            sum_loss_dis /= train
            params['current_epoch'] = epoch
            params['losses'][epoch] = {
                'loss_gen': sum_loss_gen,
                'loss_dis': sum_loss_dis
            }
            self.save()
            elapsed = time.time() - start
            print('epoch {} ({:.2f} sec) / loss_gen: {:.8f}, loss_dis: {:.8f}'.
                  format(epoch, elapsed, sum_loss_gen, sum_loss_dis))
Example #41
0
def calculate_loss(model, dataset, position):
    # use random window size in the same way as the original word2vec
    # implementation.
    w = np.random.randint(args.window - 1) + 1
    # offset is [-w, ..., -1, 1, ..., w]
    offset = np.concatenate([np.arange(-w, 0), np.arange(1, w + 1)])
    pos = np.expand_dims(position, 0) + np.expand_dims(offset, 1)
    d = xp.asarray(dataset.take(pos))
    context = chainer.Variable(d)
    x_data = xp.asarray(dataset.take(position))
    x = chainer.Variable(x_data)
    return model(x, context)


if args.gpu >= 0:
    cuda.get_device(args.gpu).use()

train, _, _ = chainer.datasets.get_ptb_words()
if args.test:
    train = train[:100]

vocab = chainer.datasets.get_ptb_words_vocabulary()
index2word = {wid: word for word, wid in six.iteritems(vocab)}

counts = collections.Counter(train)
n_vocab = max(train) + 1

print('n_vocab: %d' % n_vocab)
print('data length: %d' % len(train))

if args.out_type == 'hsm':
Example #42
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--batchsize", "-b", type=int, default=64)
    parser.add_argument("--total-epochs", "-e", type=int, default=5000)
    parser.add_argument("--num-labeled-data", "-nl", type=int, default=100)
    parser.add_argument("--gpu-device", "-g", type=int, default=0)
    parser.add_argument("--grad-clip", "-gc", type=float, default=5)
    parser.add_argument("--seed", type=int, default=0)
    parser.add_argument("--model", "-m", type=str, default="model.hdf5")
    args = parser.parse_args()

    np.random.seed(args.seed)

    model = Model()
    model.load(args.model)

    mnist_train, mnist_test = chainer.datasets.get_mnist()
    images_train, labels_train = mnist_train._datasets
    images_test, labels_test = mnist_test._datasets

    # normalize
    images_train = (images_train - 0.5) * 2
    images_test = (images_test - 0.5) * 2

    dataset = Dataset(train=(images_train, labels_train),
                      test=(images_test, labels_test),
                      num_labeled_data=args.num_labeled_data,
                      num_classes=model.ndim_y)
    print("#labeled:	{}".format(dataset.get_num_labeled_data()))
    print("#unlabeled:	{}".format(dataset.get_num_unlabeled_data()))
    _, labels = dataset.get_labeled_data()
    print("labeled data:", labels)

    total_iterations_train = len(images_train) // args.batchsize

    # optimizers
    optimizer_encoder = Optimizer("msgd", 0.01, 0.9)
    optimizer_encoder.setup(model.encoder)
    if args.grad_clip > 0:
        optimizer_encoder.add_hook(GradientClipping(args.grad_clip))

    optimizer_semi_supervised = Optimizer("msgd", 0.1, 0.9)
    optimizer_semi_supervised.setup(model.encoder)
    if args.grad_clip > 0:
        optimizer_semi_supervised.add_hook(GradientClipping(args.grad_clip))

    optimizer_generator = Optimizer("msgd", 0.1, 0.1)
    optimizer_generator.setup(model.encoder)
    if args.grad_clip > 0:
        optimizer_generator.add_hook(GradientClipping(args.grad_clip))

    optimizer_decoder = Optimizer("msgd", 0.01, 0.9)
    optimizer_decoder.setup(model.decoder)
    if args.grad_clip > 0:
        optimizer_decoder.add_hook(GradientClipping(args.grad_clip))

    optimizer_discriminator_z = Optimizer("msgd", 0.1, 0.1)
    optimizer_discriminator_z.setup(model.discriminator_z)
    if args.grad_clip > 0:
        optimizer_discriminator_z.add_hook(GradientClipping(args.grad_clip))

    optimizer_discriminator_y = Optimizer("msgd", 0.1, 0.1)
    optimizer_discriminator_y.setup(model.discriminator_y)
    if args.grad_clip > 0:
        optimizer_discriminator_y.add_hook(GradientClipping(args.grad_clip))

    optimizer_linear_transformation = Optimizer("msgd", 0.01, 0.9)
    optimizer_linear_transformation.setup(model.linear_transformation)
    if args.grad_clip > 0:
        optimizer_linear_transformation.add_hook(
            GradientClipping(args.grad_clip))

    using_gpu = False
    if args.gpu_device >= 0:
        cuda.get_device(args.gpu_device).use()
        model.to_gpu()
        using_gpu = True
    xp = model.xp

    # 0 -> true sample
    # 1 -> generated sample
    class_true = np.zeros(args.batchsize, dtype=np.int32)
    class_fake = np.ones(args.batchsize, dtype=np.int32)
    if using_gpu:
        class_true = cuda.to_gpu(class_true)
        class_fake = cuda.to_gpu(class_fake)

    # 2D circle
    # we use a linear transformation to map the 10D representation to a 2D space such that
    # the cluster heads are mapped to the points that are uniformly placed on a 2D circle.
    rad = math.radians(360 / model.ndim_y)
    radius = 5
    mapped_cluster_head_2d_target = np.zeros((10, 2), dtype=np.float32)
    for n in range(model.ndim_y):
        x = math.cos(rad * n) * radius
        y = math.sin(rad * n) * radius
        mapped_cluster_head_2d_target[n] = (x, y)
    if using_gpu:
        mapped_cluster_head_2d_target = cuda.to_gpu(
            mapped_cluster_head_2d_target)

    # training loop
    training_start_time = time.time()
    for epoch in range(args.total_epochs):

        sum_loss_generator = 0
        sum_loss_discriminator = 0
        sum_loss_autoencoder = 0
        sum_loss_supervised = 0
        sum_loss_linear_transformation = 0
        sum_discriminator_z_confidence_true = 0
        sum_discriminator_z_confidence_fake = 0
        sum_discriminator_y_confidence_true = 0
        sum_discriminator_y_confidence_fake = 0

        epoch_start_time = time.time()
        dataset.shuffle()

        # training
        for itr in range(total_iterations_train):
            # update model parameters
            with chainer.using_config("train", True):
                # sample minibatch
                x_u = dataset.sample_unlabeled_minibatch(args.batchsize,
                                                         gpu=using_gpu)
                x_l, y_l, _ = dataset.sample_labeled_minibatch(args.batchsize,
                                                               gpu=using_gpu)

                ### reconstruction phase ###
                if True:
                    y_onehot_u, z_u = model.encode_x_yz(x_u,
                                                        apply_softmax_y=True)
                    repr_u = model.encode_yz_representation(y_onehot_u, z_u)
                    x_reconstruction_u = model.decode_representation_x(repr_u)
                    loss_reconstruction_u = F.mean_squared_error(
                        x_u, x_reconstruction_u)

                    y_onehot_l, z_l = model.encode_x_yz(x_l,
                                                        apply_softmax_y=True)
                    repr_l = model.encode_yz_representation(y_onehot_l, z_l)
                    x_reconstruction_l = model.decode_representation_x(repr_l)
                    loss_reconstruction_l = F.mean_squared_error(
                        x_l, x_reconstruction_l)

                    loss_reconstruction = loss_reconstruction_u + loss_reconstruction_l

                    model.cleargrads()
                    loss_reconstruction.backward()
                    optimizer_encoder.update()
                    optimizer_decoder.update()

                    sum_loss_autoencoder += float(loss_reconstruction.data)

                ### adversarial phase ###
                if True:
                    y_onehot_fake_u, z_fake_u = model.encode_x_yz(
                        x_u, apply_softmax_y=True)

                    z_true = sampler.gaussian(args.batchsize,
                                              model.ndim_y,
                                              mean=0,
                                              var=1)
                    y_onehot_true = sampler.onehot_categorical(
                        args.batchsize, model.ndim_y)
                    if using_gpu:
                        z_true = cuda.to_gpu(z_true)
                        y_onehot_true = cuda.to_gpu(y_onehot_true)

                    dz_true = model.discriminate_z(z_true, apply_softmax=False)
                    dz_fake = model.discriminate_z(z_fake_u,
                                                   apply_softmax=False)
                    dy_true = model.discriminate_y(y_onehot_true,
                                                   apply_softmax=False)
                    dy_fake = model.discriminate_y(y_onehot_fake_u,
                                                   apply_softmax=False)

                    discriminator_z_confidence_true = float(
                        xp.mean(F.softmax(dz_true).data[:, 0]))
                    discriminator_z_confidence_fake = float(
                        xp.mean(F.softmax(dz_fake).data[:, 1]))
                    discriminator_y_confidence_true = float(
                        xp.mean(F.softmax(dy_true).data[:, 0]))
                    discriminator_y_confidence_fake = float(
                        xp.mean(F.softmax(dy_fake).data[:, 1]))

                    loss_discriminator_z = F.softmax_cross_entropy(
                        dz_true, class_true) + F.softmax_cross_entropy(
                            dz_fake, class_fake)
                    loss_discriminator_y = F.softmax_cross_entropy(
                        dy_true, class_true) + F.softmax_cross_entropy(
                            dy_fake, class_fake)
                    loss_discriminator = loss_discriminator_z + loss_discriminator_y

                    model.cleargrads()
                    loss_discriminator.backward()
                    optimizer_discriminator_z.update()
                    optimizer_discriminator_y.update()

                    sum_loss_discriminator += float(loss_discriminator.data)
                    sum_discriminator_z_confidence_true += discriminator_z_confidence_true
                    sum_discriminator_z_confidence_fake += discriminator_z_confidence_fake
                    sum_discriminator_y_confidence_true += discriminator_y_confidence_true
                    sum_discriminator_y_confidence_fake += discriminator_y_confidence_fake

                ### generator phase ###
                if True:
                    y_onehot_fake_u, z_fake_u = model.encode_x_yz(
                        x_u, apply_softmax_y=True)

                    dz_fake = model.discriminate_z(z_fake_u,
                                                   apply_softmax=False)
                    dy_fake = model.discriminate_y(y_onehot_fake_u,
                                                   apply_softmax=False)

                    loss_generator = F.softmax_cross_entropy(
                        dz_fake, class_true) + F.softmax_cross_entropy(
                            dy_fake, class_true)

                    model.cleargrads()
                    loss_generator.backward()
                    optimizer_generator.update()

                    sum_loss_generator += float(loss_generator.data)

                ### supervised phase ###
                if True:
                    logit_l, _ = model.encode_x_yz(x_l, apply_softmax_y=False)
                    loss_supervised = F.softmax_cross_entropy(logit_l, y_l)

                    model.cleargrads()
                    loss_supervised.backward()
                    optimizer_semi_supervised.update()

                    sum_loss_supervised += float(loss_supervised.data)

                ### additional cost ###
                if True:
                    identity = np.identity(model.ndim_y, dtype=np.float32)
                    if using_gpu:
                        identity = cuda.to_gpu(identity)
                    mapped_head = model.linear_transformation(identity)
                    loss_linear_transformation = F.mean_squared_error(
                        mapped_cluster_head_2d_target, mapped_head)

                    model.cleargrads()
                    loss_linear_transformation.backward()
                    optimizer_linear_transformation.update()

                    sum_loss_linear_transformation += float(
                        loss_linear_transformation.data)

            printr("Training ... {:3.0f}% ({}/{})".format(
                (itr + 1) / total_iterations_train * 100, itr + 1,
                total_iterations_train))

        model.save(args.model)

        labeled_iter_train = dataset.get_iterator(args.batchsize * 20,
                                                  train=True,
                                                  labeled=True,
                                                  gpu=using_gpu)
        unlabeled_iter_train = dataset.get_iterator(args.batchsize * 20,
                                                    train=True,
                                                    unlabeled=True,
                                                    gpu=using_gpu)
        average_accuracy_l = 0
        average_accuracy_u = 0
        for x_l, true_label in labeled_iter_train:
            with chainer.no_backprop_mode() and chainer.using_config(
                    "train", False):
                y_onehot_l, _ = model.encode_x_yz(x_l, apply_softmax_y=True)
                accuracy = F.accuracy(y_onehot_l, true_label)
                average_accuracy_l += float(accuracy.data)

        for x_u, true_label in unlabeled_iter_train:
            with chainer.no_backprop_mode() and chainer.using_config(
                    "train", False):
                y_onehot_u, _ = model.encode_x_yz(x_u, apply_softmax_y=True)
                accuracy = F.accuracy(y_onehot_u, true_label)
                average_accuracy_u += float(accuracy.data)

        average_accuracy_l /= labeled_iter_train.get_total_iterations()
        average_accuracy_u /= unlabeled_iter_train.get_total_iterations()

        clear_console()
        print(
            "Epoch {} done in {} sec - loss: g={:.5g}, d={:.5g}, a={:.5g}, s={:.5g}, l={:.5g} - disc_z: true={:.1f}%, fake={:.1f}% - disc_y: true={:.1f}%, fake={:.1f}% - acc: l={:.2f}%, u={:.2f}% - total {} min"
            .format(
                epoch + 1, int(time.time() - epoch_start_time),
                sum_loss_generator / total_iterations_train,
                sum_loss_discriminator / total_iterations_train,
                sum_loss_autoencoder / total_iterations_train,
                sum_loss_supervised / total_iterations_train,
                sum_loss_linear_transformation / total_iterations_train,
                sum_discriminator_z_confidence_true / total_iterations_train *
                100, sum_discriminator_z_confidence_fake /
                total_iterations_train * 100,
                sum_discriminator_y_confidence_true / total_iterations_train *
                100, sum_discriminator_y_confidence_fake /
                total_iterations_train * 100, average_accuracy_l * 100,
                average_accuracy_u * 100,
                int((time.time() - training_start_time) // 60)))

    if epoch == 50:
        optimizer_encoder.set_learning_rate(0.001)
        optimizer_decoder.set_learning_rate(0.001)
        optimizer_semi_supervised.set_learning_rate(0.01)
        optimizer_generator.set_learning_rate(0.01)
        optimizer_discriminator_y.set_learning_rate(0.01)
        optimizer_discriminator_z.set_learning_rate(0.01)

    if epoch == 1000:
        optimizer_encoder.set_learning_rate(0.0001)
        optimizer_decoder.set_learning_rate(0.0001)
        optimizer_semi_supervised.set_learning_rate(0.001)
        optimizer_generator.set_learning_rate(0.001)
        optimizer_discriminator_y.set_learning_rate(0.001)
        optimizer_discriminator_z.set_learning_rate(0.001)
Example #43
0
def main():
    current_id = datetime.datetime.today().isoformat(
        "-") + "-" + os.path.splitext(os.path.basename(__file__))[0]
    parser = argparse.ArgumentParser(description='I-Maze with Block obs')
    parser.add_argument(
        "-modelpath",
        type=str,
        help="modelpath without extension(eg .model, .optimizer)")
    parser.add_argument("-vertical",
                        type=int,
                        default=2,
                        help="vertical corridor length")
    parser.add_argument("-horizontal",
                        type=int,
                        default=0,
                        help="horizontal corridor length")
    parser.add_argument("-validation",
                        type=int,
                        default=0,
                        help="validation flag, default:0")
    parser.add_argument("-outdir",
                        type=str,
                        default="log",
                        help="output dir for loggin, default:'log'")
    parser.add_argument("-epsdelta",
                        type=float,
                        default=10**-6,
                        help="delta of epsilon, default:10**-6")
    parser.add_argument("-initexp",
                        type=int,
                        default=10**4,
                        help="initial exproration, default:10**4")
    parser.add_argument("-eps",
                        type=float,
                        default=1.0,
                        help="epsilon, default:1.0")
    parser.add_argument("-lr",
                        type=float,
                        default=k_default_lr,
                        help="epsilon, default:" + str(k_default_lr))
    parser.add_argument("-modeltype",
                        type=str,
                        default=k_default_modeltype,
                        help="ModelType, default:'" + k_default_modeltype +
                        "'")
    parser.add_argument("-batchsize",
                        type=int,
                        default=k_default_replay_batch_size,
                        help="replay batch size, default:" +
                        str(k_default_replay_batch_size))
    parser.add_argument("-updatefreq",
                        type=int,
                        default=k_default_update_freq,
                        help="update frequency, default:" +
                        str(k_default_update_freq))
    parser.add_argument("-gpu",
                        type=int,
                        default=0,
                        help="gpu id, default:0 (cpu is -1)")
    parser.add_argument("-testoutput",
                        type=int,
                        default=0,
                        help="output only at test, default:0")
    parser.add_argument("-y", type=int, default=0, help="OK?, default:0")
    parser.add_argument("-framehistnum",
                        type=int,
                        default=12,
                        help="frame history num, default:12")
    args = parser.parse_args()

    print(args)
    if args.y == 0:
        input("OK?")

    ## Make directory and write setting log
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    with open(os.path.join(args.outdir, current_id + ".args"), "w") as argsf:
        argsf.write(str(args))

    env = I_MazeEnv(horizontal=args.horizontal,
                    vertical=args.vertical,
                    max_step=k_max_step)

    ## Init model
    input_dim = k_ob_shape[0]
    output_dim = len(env.action_set)
    if args.modeltype == "DQN":
        model = DQN(input_dim * args.framehistnum, output_dim)
    elif args.modeltype == "DRQN":
        model = DRQN(input_dim, output_dim)
    elif args.modeltype == "MQN":
        model = MQN(input_dim,
                    output_dim,
                    max_buff_size=args.framehistnum - 1,
                    m=256,
                    e=256)
    elif args.modeltype == "RMQN":
        model = RMQN(input_dim,
                     output_dim,
                     max_buff_size=args.framehistnum - 1,
                     m=256,
                     e=256)
    elif args.modeltype == "FRMQN":
        model = FRMQN(input_dim,
                      output_dim,
                      max_buff_size=args.framehistnum - 1,
                      m=256,
                      e=256)
    else:
        print("not implemented", args.modeltype)
        exit(0)

    ## Use GPU
    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()

    ## Init agent
    agent = Agent(k_ob_shape,
                  len(env.action_set),
                  args.framehistnum,
                  model,
                  lr=args.lr,
                  eps_delta=args.epsdelta,
                  eps=args.eps,
                  batch_size=args.batchsize)

    if args.modelpath:
        print("load model from ",
              args.modelpath + ".model and " + args.modelpath + ".optimizer")
        agent.load(os.path.expanduser(args.modelpath))

    train_total_step = 0
    if args.validation:
        ## Run validation
        mode = run_mode.validation
        for vertical in [4, 5, 6, 8, 10, 15, 20, 25, 30, 35, 40]:
            env.vertical = vertical
            for _ in range(1):
                run_episode(current_id, args, env, agent, mode, vertical,
                            train_total_step)
        exit(0)

    for episode_id in range(k_max_episode):
        try:
            if args.validation:
                assert (not "!!!")
            else:
                if episode_id % 100 == 0 and episode_id != 0:
                    ## Run test
                    mode = run_mode.test
                    for j in range(10):
                        run_episode(current_id, args, env, agent, mode,
                                    episode_id + j, train_total_step)

                    ## Save model
                    agent.save(
                        os.path.join(args.outdir, current_id + "_episode" +
                                     str(episode_id)))

                ## Run train
                mode = run_mode.train
                train_total_step \
                    = run_episode(current_id, args, env, agent, mode, episode_id, train_total_step)
        except:
            ark = {}
            ark["args"] = vars(args)
            ark["episode_id"] = episode_id
            ark["train_total_step"] = train_total_step
            ark["eps"] = current_eps
            with open(
                    os.path.join(
                        args.outdir, current_id + "_episode" +
                        str(episode_id) + "_ark.json"), "w") as arkf:
                ark_str = json.dumps(ark, indent=4, sort_keys=True)
                arkf.write(ark_str)
            with open(
                    os.path.join(
                        args.outdir, current_id + "_episode" +
                        str(episode_id) + "_dataset.pkl"), "wb") as datasetf:
                pickle.dump(agent.dqn.dataset, datasetf)
            exit(0)
Example #44
0
def main(args):
    start_time = datetime.now().strftime('%Y%m%d_%H_%M_%S')
    dest = "../result/" + start_time
    os.makedirs(dest)
    abs_dest = os.path.abspath(dest)
    with open(os.path.join(dest, "settings.json"), "w") as fo:
        fo.write(json.dumps(vars(args), sort_keys=True, indent=4))

    # load data
    data_processor = DataProcessor(args.data, args.vocab, args.test)
    data_processor.prepare_dataset()
    train_data = data_processor.train_data
    dev_data = data_processor.dev_data

    # create model
    vocab = data_processor.vocab
    embed_dim = args.dim
    cnn = BCNN(n_vocab=len(vocab),
               n_layer=args.layer,
               embed_dim=embed_dim,
               input_channel=1,
               output_channel=50)  # ABCNNはoutput = 50固定らしいが.
    if args.glove:
        cnn.load_glove_embeddings(args.glove_path, data_processor.vocab)
    if args.word2vec:
        cnn.load_word2vec_embeddings(args.word2vec_path, data_processor.vocab)
    model = Classifier(cnn,
                       lossfun=sigmoid_cross_entropy,
                       accfun=binary_accuracy)
    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()

    # setup optimizer
    optimizer = O.AdaGrad(args.lr)
    optimizer.setup(model)
    # do not use weight decay for embeddings
    decay_params = {
        name: 1
        for name, variable in model.namedparams() if "embed" not in name
    }
    optimizer.add_hook(
        SelectiveWeightDecay(rate=args.decay, decay_params=decay_params))

    train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize)
    dev_train_iter = chainer.iterators.SerialIterator(train_data,
                                                      args.batchsize,
                                                      repeat=False)
    dev_iter = DevIterator(dev_data, data_processor.n_dev)
    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       converter=concat_examples,
                                       device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=abs_dest)

    # setup evaluation
    eval_predictor = model.copy().predictor
    eval_predictor.train = False
    iters = {"train": dev_train_iter, "dev": dev_iter}
    trainer.extend(
        WikiQAEvaluator(iters,
                        eval_predictor,
                        converter=concat_examples,
                        device=args.gpu))

    # extentions...
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss',
            'validation/main/map', 'validation/main/mrr',
            'validation/main/svm_map', 'validation/main/svm_mrr'
        ]))
    trainer.extend(extensions.ProgressBar(update_interval=10))
    # take a shapshot when the model achieves highest accuracy in dev set
    trainer.extend(
        extensions.snapshot_object(
            model,
            'model_epoch_{.updater.epoch}',
            trigger=chainer.training.triggers.MaxValueTrigger(
                'validation/main/map')))
    trainer.extend(extensions.ExponentialShift("lr", 0.5, optimizer=optimizer),
                   trigger=chainer.training.triggers.MaxValueTrigger(
                       "validation/main/map"))
    trainer.run()
Example #45
0
def main():
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU device ID')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=50,
                        help='# of epoch')
    parser.add_argument('--batch_size',
                        type=int,
                        default=128,
                        help='size of mini-batch')
    parser.add_argument('--density',
                        type=int,
                        default=1,
                        help='density of cnn kernel')
    parser.add_argument('--small',
                        dest='small',
                        action='store_true',
                        default=False)
    parser.add_argument('--no_bn',
                        dest='use_bn',
                        action='store_false',
                        default=True)
    parser.add_argument('--out', default='')
    parser.set_defaults(test=False)
    args = parser.parse_args()

    model = ValueNet(use_bn=args.use_bn)
    # model = RolloutValueNet(use_bn=args.use_bn, output=41)
    # log directory
    out = datetime.datetime.now().strftime('%m%d')
    if args.out:
        out = out + '_' + args.out
    out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs_value", out))
    os.makedirs(os.path.join(out_dir, 'models'), exist_ok=True)

    # gpu
    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()

    # setting
    with open(os.path.join(out_dir, 'setting.txt'), 'w') as f:
        for k, v in args._get_kwargs():
            print('{} = {}'.format(k, v))
            f.write('{} = {}\n'.format(k, v))

    # prepare for dataset
    if args.small:
        train = PreprocessedDataset(train_small_path)
    else:
        train = PreprocessedDataset(train_path)
    test = PreprocessedDataset(test_path)
    train_iter = iterators.SerialIterator(train, args.batch_size)
    val_iter = iterators.SerialIterator(test, args.batch_size, repeat=False)

    # optimizer
    optimizer = chainer.optimizers.Adam(eps=1e-2)
    optimizer.setup(model)

    # start training
    start = time.time()
    train_count = 0
    for epoch in range(args.epoch):

        # train
        train_loss = []
        train_accuracy = []

        for i in range(len(train) // args.batch_size):
            batch = train_iter.next()
            x = chainer.Variable(
                model.xp.array([b[0] for b in batch], 'float32'))
            y = chainer.Variable(model.xp.array([b[1] for b in batch],
                                                'int32'))
            optimizer.update(model, x, y)
            train_count += 1

            progress_report(train_count, start, args.batch_size)

            train_loss.append(cuda.to_cpu(model.loss.data))
            train_accuracy.append(cuda.to_cpu(model.accuracy.data))

        # test
        test_loss = []
        test_accuracy = []

        it = copy.copy(val_iter)
        for batch in it:
            x = chainer.Variable(model.xp.array([b[0] for b in batch],
                                                'float32'),
                                 volatile=True)
            y = chainer.Variable(model.xp.array([b[1] for b in batch],
                                                'int32'),
                                 volatile=True)
            model(x, y, train=False)

            test_loss.append(cuda.to_cpu(model.loss.data))
            test_accuracy.append(cuda.to_cpu(model.accuracy.data))

        print('\nepoch {}  train_loss {:.5f}  train_accuracy {:.3f} \n'
              '          test_loss {:.5f}  test_accuracy {:.3f}'.format(
                  epoch, np.mean(train_loss), np.mean(train_accuracy),
                  np.mean(test_loss), np.mean(test_accuracy)))
        with open(os.path.join(out_dir, "log"), 'a+') as f:
            f.write(
                'epoch {}  train_loss {:.5f}  train_accuracy {:.3f} \n'
                '          test_loss {:.5f}   test_accuracy {:.3f} \n'.format(
                    epoch, np.mean(train_loss), np.mean(train_accuracy),
                    np.mean(test_loss), np.mean(test_accuracy)))

        if epoch % 5 == 0:
            serializers.save_hdf5(
                os.path.join(out_dir, "models",
                             "value_net_{}.model".format(epoch)), model)
Example #46
0
def experiment():
    ite = 100
    pdata = 1000
    epoch = 100
    batchsize = 1000

    seed = 2018

    gpu = True

    loss_pu = np.zeros((ite, epoch))
    est_error_pu = np.zeros((ite, epoch))
    est_error_pubp = np.zeros((ite, epoch))
    est_precision_pu = np.zeros((ite, epoch))
    est_recall_pu = np.zeros((ite, epoch))
    est_precision_pubp = np.zeros((ite, epoch))
    est_recall_pubp = np.zeros((ite, epoch))

    for i in range(ite):
        np.random.seed(seed)
        #PN classification
        x_train, t_train, x_test, t_test = load_dataset("mnist")
        t_train[t_train == -1] = 0
        t_test[t_test == -1] = 0

        pi = np.mean(t_train)

        x = np.concatenate([x_train, x_test], axis=0)
        t = np.concatenate([t_train, t_test], axis=0)
        x = x.reshape(x.shape[0], x.shape[2]*x.shape[3])
        dim = x.shape[1]
        print(x.shape)

        model = MultiLayerPerceptron(dim)
        optimizer = optimizers.Adam(1e-5)
        optimizer.setup(model)

        if gpu:
            gpu_device = 0
            cuda.get_device(gpu_device).use()
            model.to_gpu(gpu_device)
            xp = cuda.cupy
        else:
            xp = np

        model, optimizer = train(x, t, epoch, model, optimizer, batchsize, xp)

        x_p = x_train[t_train==1]

        xp_prob = np.array([])
        for j in six.moves.range(0, len(x_p), batchsize):
            X = Variable(xp.array(x_p[j:j + batchsize], xp.float32))
            g = chainer.cuda.to_cpu(model(X).data).T[0]
            xp_prob = np.append(xp_prob, 1/(1+np.exp(-g)), axis=0)
        xp_prob /= np.mean(xp_prob)
        xp_prob = xp_prob
        xp_prob /= np.max(xp_prob)
        print(xp_prob)
        rand = np.random.uniform(size=len(x_p))
        x_p = x_p[xp_prob > rand]
        perm = np.random.permutation(len(x_p))
        x_p = x_p[perm[:pdata]]

        tp = np.ones(len(x_p))
        tu = np.zeros(len(x_train))
        t_train = np.concatenate([tp, tu], axis=0)

        x_train = np.concatenate([x_p, x_train], axis=0)

        print(x_train.shape)
        print(t_train.shape)
        print(x_test.shape)
        print(t_test.shape)

        model = MultiLayerPerceptron(dim)
        optimizer = optimizers.Adam(alpha=1e-5)
        optimizer.setup(model)
        optimizer.add_hook(chainer.optimizer.WeightDecay(0.005))

        if gpu:
            gpu_device = 0
            cuda.get_device(gpu_device).use()
            model.to_gpu(gpu_device)
            xp = cuda.cupy
        else:
            xp = np

        model, optimizer, loss_list, acc1, acc2, pre1, rec1, pre2, rec2  = train_pu(x_train, t_train, x_test, t_test, pi, epoch, model, optimizer, batchsize, xp)

        loss_pu[i] = loss_list
        est_error_pu[i] = acc1
        est_error_pubp[i] = acc2
        est_precision_pu[i] = pre1
        est_recall_pu[i] = rec1
        est_precision_pubp[i] = pre2
        est_recall_pubp[i] = rec2

        print(acc1[-1])
        print(acc2[-1])

        seed += 1

        np.savetxt('loss_pu_mnist_%d.csv'%seed, loss_pu, delimiter=',')
        np.savetxt('est_error_pu_mnist_%d.csv'%seed, est_error_pu, delimiter=',')
        np.savetxt('est_error_pubp_mnist__%d.csv'%seed, est_error_pubp, delimiter=',')
        np.savetxt('est_precision_pu_mnist_%d.csv'%seed, est_precision_pu, delimiter=',')
        np.savetxt('est_recall_pu_mnist_%d.csv'%seed, est_recall_pu, delimiter=',')
        np.savetxt('est_precision_pubp_mnist_%d.csv'%seed, est_precision_pubp, delimiter=',')
        np.savetxt('est_recall_pubp_mnist_%d.csv'%seed, est_recall_pubp, delimiter=',')

    loss_pu_mean = np.mean(loss_pu, axis=1)
    est_error_pu_mean = np.mean(est_error_pu, axis=1)
    est_error_pubp_mean = np.mean(est_error_pubp, axis=1)
    est_error_pu_std = np.std(est_error_pu, axis=1)
    est_error_pubp_std = np.std(est_error_pubp, axis=1)
    return loss_pu_mean, est_error_pu_mean, est_error_pubp_mean, est_error_pu_std, est_error_pubp_std 
Example #47
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('model_file')
    parser.add_argument('-g',
                        '--gpu',
                        default=0,
                        type=int,
                        help='if -1, use cpu only (default: 0)')
    args = parser.parse_args()
    alpha = 0.3
    image_alpha = 1
    render_eps = 0.015

    data_path = '../../train_data/linemodSIXD2017'

    ## delta for visibility correspondance
    delta = 0.015  # [m]
    objs = np.arange(15) + 1
    n_class = len(objs) + 1
    distance_sanity = 0.02
    min_distance = 0.005
    output_scale = 0.14
    prob_eps = 0.4
    eps = 0.02
    im_size = (640, 480)
    interval = 15

    ## load object models
    obj_model_fpath_mask = os.path.join(data_path, 'models', 'obj_{0:0>2}.ply')
    obj_models = []
    for obj in objs:
        if obj != 'background':
            print 'Loading data: obj_{0}'.format(obj)
            obj_model_fpath = obj_model_fpath_mask.format(obj)
            obj_models.append(inout.load_ply(obj_model_fpath))

    ## load network model
    model = DualCenterProposalNetworkRes50_predict7(n_class=n_class)
    chainer.serializers.load_npz(args.model_file, model)

    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()

    # pose estimator instance
    # pei = SimplePoseEstimationInterface(distance_sanity=distance_sanity,
    #                                     min_distance=min_distance, eps=prob_eps)
    pei = PoseEstimationInterface(objs=['obj_{0:0>2}'.format(i) for i in objs],
                                  base_path=data_path,
                                  n_ransac=500,
                                  distance_sanity=distance_sanity,
                                  model_scale=1000.0,
                                  model_partial=1,
                                  min_distance=min_distance,
                                  eps=prob_eps,
                                  im_size=im_size)
    scores_pos = []
    scores_rot = []
    ids_arr = []
    scores_5cm5deg = []
    scores_6dpose = []
    test_objs = np.delete(np.arange(15) + 1, [2, 6])
    for obj_id in test_objs:
        test = LinemodSIXDSingleInstanceDataset(data_path,
                                                obj_id,
                                                mode='test',
                                                interval=interval,
                                                metric_filter=output_scale +
                                                eps)
        im_ids = test.__len__()
        # im_ids = 50
        detect_cnt = 0
        cnt_5cm5deg = 0
        cnt_6dpose = 0
        sum_pos = 0
        sum_rot = 0
        for im_id in tqdm.trange(im_ids):
            # print "executing {0} / {1}".format(im_id, test.__len__())
            img_rgb, img_depth, pos, rot, K = test.get_example(im_id)
            x_data = np.expand_dims(img_rgb, axis=0)
            with chainer.no_backprop_mode():
                if args.gpu >= 0:
                    x_data = cuda.to_gpu(x_data)
                    x = chainer.Variable(x_data)
                with chainer.using_config('train', False):
                    y_cls_d, y_cp_d, y_ocp_d = model(x)
                    y_cls = chainer.cuda.to_cpu(y_cls_d.data)[0]
                    y_cp = chainer.cuda.to_cpu(y_cp_d.data)[0]
                    y_ocp = chainer.cuda.to_cpu(y_ocp_d.data)[0]

            y_pos, y_rot = pei.execute(y_cls,
                                       y_cp * output_scale,
                                       y_ocp * output_scale,
                                       img_depth,
                                       K,
                                       estimate_idx=[obj_id - 1])
            for i in six.moves.range(n_class - 1):
                if np.sum(pos[i]) != 0 and np.sum(rot[i]) != 0:
                    # 5cm 5deg metric
                    pos_diff = np.linalg.norm(y_pos[i] - pos[i])
                    quat = quaternion.from_rotation_matrix(
                        np.dot(y_rot[i].T, rot[i]))
                    quat_w = min(1, abs(quat.w))
                    diff_angle = np.rad2deg(np.arccos(quat_w)) * 2
                    # print "obj_{0:0>2} : position_diff = {1}, rotation_diff = {2}".format(i + 1, pos_diff, diff_angle)
                    if i + 1 == obj_id and pos_diff < 1.0:
                        sum_pos += pos_diff
                        sum_rot += diff_angle
                        detect_cnt += 1
                        if pos_diff < 0.05 and diff_angle < 5:
                            cnt_5cm5deg += 1
                    # 6d pose metric
                    model_pts = obj_models[i]['pts'].transpose(1, 0)
                    gt_proj = np.dot(rot[i].T, model_pts) + pos[i, :,
                                                                np.newaxis]
                    pred_proj = np.dot(y_rot[i].T, model_pts) + pos[i, :,
                                                                    np.newaxis]
                    diff_mean = np.mean(
                        np.linalg.norm((gt_proj - pred_proj), axis=0))
                    if i + 1 == obj_id and pos_diff < 1.0 and diff_mean < 0.1 * object_diameters[
                            i]:
                        cnt_6dpose += 1

        print "obj_{0:0>2} : detection_rate = {1},\n position_diff = {2}, rotation_diff = {3}, 5cm5deg = {4}, 6d pose = {5}" \
            .format(obj_id, detect_cnt / (1.0 * im_ids), sum_pos / detect_cnt, sum_rot / detect_cnt, cnt_5cm5deg / (1.0 * im_ids), cnt_6dpose / (1.0 * im_ids))
        scores_pos.append(sum_pos / im_ids)
        scores_rot.append(sum_rot / im_ids)
        scores_5cm5deg.append(cnt_5cm5deg)
        scores_6dpose.append(cnt_6dpose)
        ids_arr.append(im_ids)

    print "-- results --"
    print scores_pos
    print scores_rot
    print "-- total results --"
    ids_arr = np.asarray(ids_arr)
    scores_5cm5deg = np.asarray(scores_5cm5deg)
    ave_5cm5deg = np.sum(scores_5cm5deg) / (1.0 * np.sum(ids_arr))
    ave_6dpose = np.sum(scores_6dpose) / (1.0 * np.sum(ids_arr))
    print "5cm5deg metric : {}".format(ave_5cm5deg)
    print "6d pose metric : {}".format(ave_6dpose)

    # scores_pos = scores_pos * 1000 # m -> mm
    scores_5cm5deg = scores_5cm5deg / ids_arr
    scores_6dpose = scores_6dpose / ids_arr
    scores_ave = np.array([ave_5cm5deg, ave_6dpose])

    if not os.path.exists('eval_results'):
        os.makedirs('eval_results')
    np.save('eval_results/scores_pos.npy', scores_pos)
    np.save('eval_results/scores_rot.npy', scores_rot)
    np.save('eval_results/scores_5cm5deg.npy', scores_5cm5deg)
    np.save('eval_results/scores_6dpose.npy', scores_6dpose)
    np.save('eval_results/scores_ave.npy', scores_ave)
 def test_forward_multi_gpu(self):
     with cuda.get_device(1):
         self.link.to_gpu()
         x = cuda.to_gpu(self.x)
     with cuda.get_device(0):
         self.check_forward(x)
Example #49
0
 def init_state(self, param, state):
     xp = cuda.get_array_module(param.data)
     with cuda.get_device(param.data):
         state['mem'] = xp.ones_like(param.data)
         state['g'] = xp.zeros_like(param.data)
         state['g2'] = xp.zeros_like(param.data)
Example #50
0
def sim_generator(params):

    global sim_params, cells, stims_ext, stims_mem
    global i_ion, phie, i_ext_e, i_ext_i, rhs_phie, rhs_vmem, vmem

    sim_params = params

    assert sim_params is not None

    print "elecpy simulation start!"

    cuda.get_device(0).use()

    # Constants
    Sv = 1400  # Surface-to-volume ratio (cm^-1)
    Cm = 1.0  # Membrane capacitance (uF/cm^2)
    sigma_l_i = 1.74  # (mS/cm)
    sigma_t_i = 0.19  # (mS/cm)
    sigma_l_e = 6.25  # (mS/cm)
    sigma_t_e = 2.36  # (mS/cm)

    # Geometory settings
    im_h = sim_params['geometory']['height']
    im_w = sim_params['geometory']['width']
    ds = sim_params['geometory']['ds']  # Spatial discretization step (cm)
    N = im_h * im_w

    # Time settings
    udt = sim_params['time']['udt']  # Universal time step (ms)
    time_end = sim_params['time']['end']

    # Logging settings
    cnt_log = sim_params['log']['cnt']  # num of udt for logging
    savepath = sim_params['log']['path']

    # Cell model settings
    if sim_params['cell_type'] == 'ohararudy':
        cells = cell_model_ohararudy((N))
    if sim_params['cell_type'] == 'luorudy':
        cells = cell_model_luorudy((N))
    if sim_params['cell_type'] == 'mahajan':
        cells = cell_model_mahajan((N))
    assert cells is not None

    print "Stimulation settings",
    stims_ext = []
    stims_mem = []
    if 'stimulation' in sim_params.keys():
        stim_param = sim_params['stimulation']
        if 'extracellular' in stim_param:
            for param in stim_param['extracellular']:
                stim = ExtracellularStimulator(**param)
                assert tuple(stim.shape) == (im_h, im_w)
                stims_ext.append(stim)
        if 'membrane' in stim_param:
            for param in stim_param['membrane']:
                stim = MembraneStimulator(**param)
                assert tuple(stim.shape) == (im_h, im_w)
                stims_mem.append(stim)
    print "...done"

    print "Allocating data...",
    cells.create()
    i_ion = np.zeros((N), dtype=np.float64)
    phie = np.zeros((N), dtype=np.float64)
    i_ext_e = np.zeros((N), dtype=np.float64)
    i_ext_i = np.zeros((N), dtype=np.float64)
    rhs_phie = np.zeros((N), dtype=np.float64)
    rhs_vmem = np.zeros((N), dtype=np.float64)
    tone = np.zeros((N), dtype=np.float64)
    vmem = np.copy(cells.get_param('v'))

    mask_ion = np.ones((im_h, im_w), dtype=np.float64)
    for h in range(im_h):
        for w in range(im_w):
            distance = (h - im_h // 2)**2 + (w - im_w // 2)**2
            if distance < ((5**2) * 2):
                mask_ion[h, w] = 0.
    mask_ion = mask_ion.flatten()

    print "...done"

    print "Initializing data...",
    if 'restart' in sim_params.keys():
        cnt_restart = sim_params['restart']['count']
        srcpath = sim_params['restart']['source']
        pfx = '_{0:0>4}'.format(cnt_restart)
        phie = np.load('{0}/phie{1}.npy'.format(srcpath, pfx)).flatten()
        vmem = np.load('{0}/vmem{1}.npy'.format(srcpath, pfx)).flatten()
        cells.load('{0}/cell{1}'.format(srcpath, pfx))
        cnt_udt = cnt_restart * cnt_log
    print "...done"

    print 'Building PDE system ...',
    sigma_l = sigma_l_e + sigma_l_i
    sigma_t = sigma_t_e + sigma_t_i
    pde_i = PDE(im_h, im_w, sigma_l_i, sigma_t_i, ds)
    pde_m = PDE(im_h, im_w, sigma_l, sigma_t, ds)
    print '...done'

    # Initialization
    t = 0.  # Time (ms)
    cnt_udt = 0  # Count of udt
    dstep = 1  # Time step (# of udt)
    cnt_save = -1

    run_udt = True  # Flag of running sim in udt
    flg_st = False  # Flaf of stimulation
    cnt_st_off = 0

    print 'Main loop start!'
    while t < time_end:

        t = conv_cntUdt2time(cnt_udt)
        dt = dstep * udt

        # Stimulation control
        i_ext_e[:] = 0.0
        flg_st_temp = False
        for s in stims_ext:
            i_ext_e += s.get_current(t) * Sv
            flg_st_temp = flg_st_temp or s.get_flag(t)
        for s in stims_mem:
            cells.set_param('st', s.get_current(t))

        # step.1 cell state transition
        cells.set_param('dt', dt)
        cells.set_param('v', cuda.to_gpu(vmem))
        cells.update()
        i_ion = cells.get_param('it')

        i_ion = i_ion * mask_ion

        # step.2 phie
        rhs_phie = i_ext_e - i_ext_i - pde_i.forward(vmem)
        pde_cnt, phie = pde_m.solve(phie, rhs_phie, tol=1e-2, maxcnt=1e5)
        phie -= phie[0]

        # step.3 vmem
        rhs_vmem = pde_i.forward(vmem)
        rhs_vmem += pde_i.forward(phie)
        tone = (rhs_vmem * dt) / (Cm * Sv)
        rhs_vmem -= i_ion * Sv
        rhs_vmem += i_ext_i
        rhs_vmem *= 1 / (Cm * Sv)
        vmem += dt * rhs_vmem

        # Logging & error check
        cnt_save_now = conv_time2cntSave(t)
        if cnt_save_now != cnt_save:
            cnt_save = cnt_save_now
            sys.stdout.write('\r------------------{0}/{1}ms'.format(
                t, time_end))
            sys.stdout.flush()
            np.save('{0}/phie_{1:0>4}'.format(savepath, cnt_save),
                    phie.reshape((im_h, im_w)))
            np.save('{0}/vmem_{1:0>4}'.format(savepath, cnt_save),
                    vmem.reshape((im_h, im_w)))
            np.save('{0}/tone_{1:0>4}'.format(savepath, cnt_save),
                    tone.reshape((im_h, im_w)))
            cells.save('{0}/cell_{1:0>4}'.format(savepath, cnt_save))
            yield vmem

            flg = False
            for i, v in enumerate(vmem):
                if v != v:
                    print "error : invalid value {1} @ {0} ms, index {2}".format(
                        t, v, i)
                    flg = True
                    break
            if flg is True:
                break

        # Stim off count
        if flg_st_temp is False:
            if flg_st is True:
                cnt_st_off = 0
            else:
                cnt_st_off += 1
            flg_st = flg_st_temp

        # Time step control
        if run_udt:
            if cnt_st_off >= 3 and cnt_udt % 10 == 0:
                dstep = 2
                run_udt = False
        else:
            if pde_cnt > 5:
                dstep = 1
                run_udt = True

        cnt_udt += dstep

    print "elecpy done"
    yield False
Example #51
0
    print("Train data loaded: %d" % len(train_vis))
    print("Test data loaded: %d" % len(test_vis))
    print("num of labels: %d" % num_labels)
    logging.info("Train data loaded: %d" % len(train_vis))
    logging.info("Test data loaded: %d" % len(test_vis))
    logging.info("num of labels: %d" % num_labels)

    # prepare model
    model = syuwa_cnn(num_labels)

    if args.restart_from is not None:
        model = pickle.load(open(args.restart_from, 'rb'))
    if args.gpu >= 0:
        import cupy
        cuda.check_cuda_available()
        cuda.get_device(args.device_num).use()
        model.to_gpu()

    def xparray(data):
        if args.gpu >= 0:
            return cupy.asnumpy(data)
        else:
            return data
    
    opt = get_optimizer(args.opt)
    opt.setup(model)

    train_vis = [np.asarray(x).astype(np.float32) for x in train_vis]
    train_dep = [np.asarray(x).astype(np.float32) for x in train_dep]
    test_vis = [np.asarray(x).astype(np.float32) for x in test_vis]
    test_dep = [np.asarray(x).astype(np.float32) for x in test_dep] 
Example #52
0
def gan_test(args, model_path):
    # Prepare Flow and Texture GAN model, defined in net.py

    gen_flow = net.FlowGenerator()
    serializers.load_npz(model_path["gen_flow"], gen_flow)
    gen_tex = net.Generator(dimz=100)
    serializers.load_npz(model_path["gen_tex"], gen_tex)

    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        gen_flow.to_gpu()
        gen_tex.to_gpu()
    xp = np if args.gpu < 0 else cuda.cupy

    rows = 5
    cols = 5

    ### generate videos from Z
    np.random.seed(0)
    for i in range(10):
        print(i)
        z_flow = Variable(xp.asarray(gen_flow.make_hidden(rows * cols)))
        z_tex = Variable(xp.asarray(gen_tex.make_hidden(rows * cols)))

        ### generate flow
        with chainer.using_config('train', False):
            flow_fake, _, _ = gen_flow(z_flow)
        flow_fake_tmp = chainer.cuda.to_cpu(flow_fake.data)

        ### generate video
        with chainer.using_config('train', False):
            y, fore_vid, back_img, h_mask = gen_tex(z_tex, flow_fake)
        y = chainer.cuda.to_cpu(y.data)
        fore_vid = chainer.cuda.to_cpu(fore_vid.data)
        back_img = chainer.cuda.to_cpu(back_img.data)
        y_mask = chainer.cuda.to_cpu(h_mask.data)
        flow = flow_fake_tmp

        preview_dir = '{}/{:03}/'.format(args.out, i)
        if not os.path.exists(preview_dir):
            os.makedirs(preview_dir)

        ## save video
        y = np.asarray(np.clip((y + 1.) * (255. / 2.), 0.0, 255.0),
                       dtype=np.uint8)
        B, CH, T, H, W = y.shape
        Y = y.reshape((rows, cols, CH, T, H, W))
        Y = Y.transpose(3, 0, 4, 1, 5, 2)  ### T, rows, H, cols, W, ch
        Y = Y.reshape((T, rows * H, cols * W, CH))  # T, H, W, ch
        for j in range(0, T):
            preview_path = preview_dir + 'img_{:03}.jpg'.format(j + 1)
            Image.fromarray(Y[j]).save(preview_path)

        ### save fore video
        y = np.asarray(np.clip((fore_vid + 1.) * (255. / 2.), 0.0, 255.0),
                       dtype=np.uint8)
        B, CH, T, H, W = y.shape
        Y = y.reshape((rows, cols, CH, T, H, W))
        Y = Y.transpose(3, 0, 4, 1, 5, 2)  ### T, rows, H, cols, W, ch
        Y = Y.reshape((T, rows * H, cols * W, CH))  # T, H, W, ch
        for j in range(0, T):
            preview_path = preview_dir + 'fore_{:03}.jpg'.format(j + 1)
            Image.fromarray(Y[j]).save(preview_path)

        ### save mask video
        y = np.asarray(np.clip(y_mask * 255., 0.0, 255.0), dtype=np.uint8)
        B, CH, T, H, W = y.shape
        Y = y.reshape((rows, cols, CH, T, H, W))
        Y = Y.transpose(3, 0, 4, 1, 5, 2)  ### T, rows, H, cols, W, ch
        Y = Y.reshape((T, rows * H, cols * W, CH))  # T, H, W, ch
        for j in range(0, T):
            preview_path = preview_dir + 'mask_{:03}.jpg'.format(j + 1)
            Image.fromarray(Y[j]).save(preview_path)

        ### save back img
        y = np.asarray(np.clip((back_img + 1.) * (255. / 2.), 0.0, 255.0),
                       dtype=np.uint8)
        B, CH, T, H, W = y.shape
        y = y[:, :, 0]
        Y = y.reshape((rows, cols, CH, H, W))
        Y = Y.transpose(0, 3, 1, 4, 2)  ### rows, H, cols, W, ch
        Y = Y.reshape((rows * H, cols * W, CH))  # T, H, W, ch
        preview_path = preview_dir + 'back.jpg'
        Image.fromarray(Y).save(preview_path)

        ### save flow
        y = np.asarray(np.clip((flow + 1.) * (255. / 2.), 0.0, 255.0),
                       dtype=np.uint8)
        B, CH, T, H, W = y.shape
        Y = y.reshape((rows, cols, CH, T, H, W))
        Y = Y.transpose(3, 0, 4, 1, 5, 2)  ### T, rows, H, cols, W, ch
        Y = Y.reshape((T, rows * H, cols * W, CH))  # T, H, W, ch

        for j in range(0, T):
            preview_path = preview_dir + 'flow_{:03}.jpg'.format(j + 1)
            flow_img = np.hstack((Y[j, :, :, 0], Y[j, :, :, 1]))
            Image.fromarray(flow_img).save(preview_path)
Example #53
0
    def roll_out(self, args):
        """
        compute expected rewards

        :param samples: generated_sample
        :param given: use x_0 ~ x_given as generated (state)
        :param dis: discriminator
        :param pool: multiprocess.Pool
        :param rollout_num: num of roll out

        :return: rewards (batch_size)
        """
        tag = None
        if len(args) == 4:
            samples, given, dis, rollout_num = args
        elif len(args) == 5:
            samples, given, dis, rollout_num, gpu = args
            cuda.get_device(gpu).use()
            dis.to_gpu()
            self.to_gpu()
        elif len(args) == 6:
            samples, given, dis, rollout_num, gpu, tag = args
        else:
            raise AssertionError('undesired argument')

        batch_size = len(samples)
        self.reset_state()

        if tag is not None:
            self.lstm1.h = self.tag_embed(
                chainer.Variable(self.xp.array(tag, 'int32')))

        gen_x = np.zeros((batch_size, self.sequence_length), 'int32')

        x = chainer.Variable(self.xp.asanyarray([self.start_token] *
                                                batch_size, 'int32'),
                             volatile=True)
        self.decode_one_step(x, False)
        for i in range(given):
            gen_x[:, i] = samples[:, i]
            x = chainer.Variable(self.xp.asanyarray(samples[:, i], 'int32'),
                                 volatile=True)
            scores = self.decode_one_step(x, False)

        scores_ = scores
        self.save_state()

        rewards = []
        for _ in range(rollout_num):
            self.set_state()
            scores = chainer.Variable(scores_.data.copy(), volatile=True)
            for i in range(given, self.sequence_length):

                pred = F.softmax(scores)
                pred = cuda.to_cpu(pred.data)

                generated = [
                    np.random.choice(self.vocab_size, p=pred[j])
                    for j in range(batch_size)
                ]

                # pred = cuda.to_cpu(pred.data) - np.finfo(np.float32).epsneg
                # generated = []
                # for j in range(batch_size):
                #     histogram = np.random.multinomial(1, pred[j])
                #     generated.append(int(np.nonzero(histogram)[0]))

                gen_x[:, i] = generated
                x = chainer.Variable(self.xp.asanyarray(generated, 'int32'),
                                     volatile=True)
                scores = self.decode_one_step(x, False)

            rewards.append(dis.get_reward(gen_x))

        return np.mean(rewards, axis=0)
def main():
    parser = argparse.ArgumentParser()
    # logging
    parser.add_argument('--logfile',
                        '-l',
                        default='',
                        type=str,
                        help='write log data into a file')
    parser.add_argument('--debug',
                        '-d',
                        action='store_true',
                        help='run in debug mode')
    parser.add_argument('--silent',
                        '-s',
                        action='store_true',
                        help='run in silent mode')
    parser.add_argument('--no-progress-bar',
                        action='store_true',
                        help='hide progress bar')
    # train and validate data
    parser.add_argument('--train',
                        default='train.txt',
                        type=str,
                        help='set filename of training data')
    parser.add_argument('--validate',
                        default='dev.txt',
                        type=str,
                        help='set filename of validation data')
    parser.add_argument('--vocab-size',
                        '-V',
                        default=0,
                        type=int,
                        help='set vocabulary size (0 means no limitation)')
    parser.add_argument(
        '--target-speaker',
        '-T',
        default='S',
        help='set target speaker name to be learned for system output')
    # file settings
    parser.add_argument('--initial-model',
                        '-i',
                        help='start training from an initial model')
    parser.add_argument('--model',
                        '-m',
                        required=True,
                        help='set prefix of output model files')
    parser.add_argument(
        '--resume',
        action='store_true',
        help='resume training from a previously saved snapshot')
    parser.add_argument('--snapshot',
                        type=str,
                        help='dump a snapshot to a file after each epoch')
    # Model structure
    parser.add_argument('--enc-layer',
                        default=2,
                        type=int,
                        help='number of encoder layers')
    parser.add_argument('--enc-esize',
                        default=100,
                        type=int,
                        help='number of encoder input-embedding units')
    parser.add_argument('--enc-hsize',
                        default=512,
                        type=int,
                        help='number of encoder hidden units')

    parser.add_argument('--dec-layer',
                        default=2,
                        type=int,
                        help='number of decoder layers')
    parser.add_argument('--dec-esize',
                        default=100,
                        type=int,
                        help='number of decoder input-embedding units')
    parser.add_argument('--dec-hsize',
                        default=512,
                        type=int,
                        help='number of decoder hidden units')
    parser.add_argument('--dec-psize',
                        default=100,
                        type=int,
                        help='number of decoder pre-output projection units')
    # training conditions
    parser.add_argument(
        '--optimizer',
        default='Adam',
        type=str,
        help="set optimizer (SGD, Adam, AdaDelta, RMSprop, ...)")
    parser.add_argument('--L2-weight',
                        default=0.0,
                        type=float,
                        help="set weight for L2-regularization term")
    parser.add_argument('--clip-grads',
                        default=5.,
                        type=float,
                        help="set gradient clipping threshold")
    parser.add_argument('--dropout-rate',
                        default=0.5,
                        type=float,
                        help="set dropout rate in training")
    parser.add_argument('--num-epochs',
                        '-e',
                        default=20,
                        type=int,
                        help='number of epochs to be trained')
    parser.add_argument('--learn-rate',
                        '-R',
                        default=1.0,
                        type=float,
                        help='set initial learning rate for SGD')
    parser.add_argument('--learn-decay',
                        default=1.0,
                        type=float,
                        help='set decaying ratio of learning rate or epsilon')
    parser.add_argument(
        '--lower-bound',
        default=1e-16,
        type=float,
        help='set threshold of learning rate or epsilon for early stopping')
    parser.add_argument('--batch-size',
                        '-b',
                        default=50,
                        type=int,
                        help='set batch size for training and validation')
    parser.add_argument(
        '--max-batch-length',
        default=20,
        type=int,
        help='set maximum sequence length to control batch size')
    parser.add_argument('--seed',
                        default=99,
                        type=int,
                        help='set a seed for random numbers')
    # select a GPU device
    parser.add_argument('--gpu',
                        '-g',
                        default=0,
                        type=int,
                        help='GPU ID (negative value indicates CPU)')

    args = parser.parse_args()

    # flush stdout
    if six.PY2:
        sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
    # set up the logger
    tqdm_logging.config(logger,
                        args.logfile,
                        mode=('a' if args.resume else 'w'),
                        silent=args.silent,
                        debug=args.debug)
    # gpu setup
    if args.gpu >= 0:
        cuda.check_cuda_available()
        cuda.get_device(args.gpu).use()
        xp = cuda.cupy
        xp.random.seed(args.seed)
    else:
        xp = np

    # randomize
    np.random.seed(args.seed)
    random.seed(args.seed)

    logger.info('----------------------------------')
    logger.info('Train a neural conversation model')
    logger.info('----------------------------------')
    if args.resume:
        if not args.snapshot:
            logger.error('snapshot file is not spacified.')
            sys.exit()

        with open(args.snapshot, 'rb') as f:
            vocab, optimizer, status, args = pickle.load(f)
        logger.info('Resume training from epoch %d' % status.epoch)
        logger.info('Args ' + str(args))
        model = optimizer.target
    else:
        logger.info('Args ' + str(args))
        # Prepare RNN model and load data
        if args.initial_model:
            logger.info('Loading a model from ' + args.initial_model)
            with open(args.initial_model, 'rb') as f:
                vocab, model, tmp_args = pickle.load(f)
            status.cur_at = time.time()
        else:
            logger.info('Making vocabulary from ' + args.train)
            vocab = dialog_corpus.get_vocabulary(args.train,
                                                 vocabsize=args.vocab_size)
            model = Sequence2SequenceModel(
                LSTMEncoder(args.enc_layer,
                            len(vocab),
                            args.enc_hsize,
                            args.enc_esize,
                            dropout=args.dropout_rate),
                LSTMDecoder(args.dec_layer,
                            len(vocab),
                            len(vocab),
                            args.dec_esize,
                            args.dec_hsize,
                            args.dec_psize,
                            dropout=args.dropout_rate))
        # Setup optimizer
        optimizer = vars(optimizers)[args.optimizer]()
        if args.optimizer == 'SGD':
            optimizer.lr = args.learn_rate
        optimizer.use_cleargrads()
        optimizer.setup(model)
        optimizer.add_hook(chainer.optimizer.GradientClipping(args.clip_grads))
        if args.L2_weight > 0.:
            optimizer.add_hook(chainer.optimizer.WeightDecay(args.L2_weight))
        status = None

    logger.info('Loading text data from ' + args.train)
    train_set = dialog_corpus.load(args.train, vocab, args.target_speaker)
    logger.info('Loading validation data from ' + args.validate)
    validate_set = dialog_corpus.load(args.validate, vocab,
                                      args.target_speaker)
    logger.info('Making mini batches')
    train_batchset = dialog_corpus.make_minibatches(
        train_set, batchsize=args.batch_size, max_length=args.max_batch_length)
    validate_batchset = dialog_corpus.make_minibatches(
        validate_set,
        batchsize=args.batch_size,
        max_length=args.max_batch_length)
    # report data summary
    logger.info('vocabulary size = %d' % len(vocab))
    logger.info('#train sample = %d  #mini-batch = %d' %
                (len(train_set), len(train_batchset)))
    logger.info('#validate sample = %d  #mini-batch = %d' %
                (len(validate_set), len(validate_batchset)))
    random.shuffle(train_batchset, random.random)

    # initialize status parameters
    if status is None:
        status = Status(max(round(len(train_batchset), -3) / 50, 500),
                        progress_bar=not args.no_progress_bar)
    else:
        status.progress_bar = not args.no_progress_bar

    # move model to gpu
    if args.gpu >= 0:
        model.to_gpu()

    while status.epoch <= args.num_epochs:
        logger.info('---------------------training--------------------------')
        if args.optimizer == 'SGD':
            logger.info('Epoch %d/%d : SGD learning rate = %g' %
                        (status.epoch, args.num_epochs, optimizer.lr))
        else:
            logger.info(
                'Epoch %d/%d : %s eps = %g' %
                (status.epoch, args.num_epochs, args.optimizer, optimizer.eps))
        train_ppl = train_step(model, optimizer, train_set, train_batchset,
                               status, xp)
        logger.info("epoch %d training perplexity: %f" %
                    (status.epoch, train_ppl))
        # write the model params
        modelfile = args.model + '.' + str(status.epoch)
        logger.info('writing model params to ' + modelfile)
        model.to_cpu()
        with open(modelfile, 'wb') as f:
            pickle.dump((vocab, model, args), f, -1)
        if args.gpu >= 0:
            model.to_gpu()

        # start validation step
        logger.info('---------------------validation------------------------')
        start_at = time.time()
        validate_ppl = validate_step(model, validate_set, validate_batchset,
                                     status, xp)
        logger.info('epoch %d validation perplexity: %.4f' %
                    (status.epoch, validate_ppl))
        # update best model with the minimum perplexity
        if status.min_validate_ppl >= validate_ppl:
            status.bestmodel_num = status.epoch
            logger.info('validation perplexity reduced: %.4f -> %.4f' %
                        (status.min_validate_ppl, validate_ppl))
            status.min_validate_ppl = validate_ppl

        elif args.optimizer == 'SGD':
            modelfile = args.model + '.' + str(status.bestmodel_num)
            logger.info('reloading model params from ' + modelfile)
            with open(modelfile, 'rb') as f:
                vocab, model, tmp_args = pickle.load(f)
            if args.gpu >= 0:
                model.to_gpu()
            optimizer.lr *= args.learn_decay
            if optimizer.lr < args.lower_bound:
                break
            optimizer.setup(model)
        else:
            optimizer.eps *= args.learn_decay
            if optimizer.eps < args.lower_bound:
                break

        status.new_epoch(validate_time=time.time() - start_at)
        # dump snapshot
        if args.snapshot:
            logger.info('writing snapshot to ' + args.snapshot)
            model.to_cpu()
            with open(args.snapshot, 'wb') as f:
                pickle.dump((vocab, optimizer, status, args), f, -1)
            if args.gpu >= 0:
                model.to_gpu()

    logger.info('----------------')
    # make a symbolic link to the best model
    logger.info('the best model is %s.%d.' %
                (args.model, status.bestmodel_num))
    logger.info('a symbolic link is made as ' + args.model + '.best')
    if os.path.exists(args.model + '.best'):
        os.remove(args.model + '.best')
    os.symlink(os.path.basename(args.model + '.' + str(status.bestmodel_num)),
               args.model + '.best')
    logger.info('done')
Example #55
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("--batchsize", "-b", type=int, default=64)
	parser.add_argument("--total-epochs", "-e", type=int, default=5000)
	parser.add_argument("--num-labeled-data", "-nl", type=int, default=10000)
	parser.add_argument("--gpu-device", "-g", type=int, default=0)
	parser.add_argument("--grad-clip", "-gc", type=float, default=5)
	parser.add_argument("--learning-rate", "-lr", type=float, default=0.0001)
	parser.add_argument("--momentum", "-mo", type=float, default=0.5)
	parser.add_argument("--optimizer", "-opt", type=str, default="adam")
	parser.add_argument("--seed", type=int, default=0)
	parser.add_argument("--model", "-m", type=str, default="model.hdf5")
	args = parser.parse_args()

	np.random.seed(args.seed)

	model = Model()
	model.load(args.model)

	mnist_train, mnist_test = chainer.datasets.get_mnist()
	images_train, labels_train = mnist_train._datasets
	images_test, labels_test = mnist_test._datasets

	# normalize
	images_train = (images_train - 0.5) * 2
	images_test = (images_test - 0.5) * 2

	dataset = Dataset(train=(images_train, labels_train), 
					  test=(images_test, labels_test), 
					  num_labeled_data=args.num_labeled_data, 
					  num_classes=model.ndim_y - 1,
					  num_extra_classes=1)
	print("#labeled:	{}".format(dataset.get_num_labeled_data()))
	print("#unlabeled:	{}".format(dataset.get_num_unlabeled_data()))
	_, labels = dataset.get_labeled_data()

	total_iterations_train = len(images_train) // args.batchsize

	# optimizers
	optimizer_encoder = Optimizer(args.optimizer, args.learning_rate, args.momentum)
	optimizer_encoder.setup(model.encoder)
	if args.grad_clip > 0:
		optimizer_encoder.add_hook(GradientClipping(args.grad_clip))

	optimizer_decoder = Optimizer(args.optimizer, args.learning_rate, args.momentum)
	optimizer_decoder.setup(model.decoder)
	if args.grad_clip > 0:
		optimizer_decoder.add_hook(GradientClipping(args.grad_clip))

	optimizer_discriminator = Optimizer(args.optimizer, args.learning_rate, args.momentum)
	optimizer_discriminator.setup(model.discriminator)
	if args.grad_clip > 0:
		optimizer_discriminator.add_hook(GradientClipping(args.grad_clip))

	using_gpu = False
	if args.gpu_device >= 0:
		cuda.get_device(args.gpu_device).use()
		model.to_gpu()
		using_gpu = True
	xp = model.xp

	# 0 -> true sample
	# 1 -> generated sample
	class_true = np.zeros(args.batchsize, dtype=np.int32)
	class_fake = np.ones(args.batchsize, dtype=np.int32)
	if using_gpu:
		class_true = cuda.to_gpu(class_true)
		class_fake = cuda.to_gpu(class_fake)

	y_onehot_u = xp.zeros((1, model.ndim_y), dtype=xp.float32)
	y_onehot_u[0, -1] = 1	# turn on the extra class
	y_onehot_u = xp.repeat(y_onehot_u, args.batchsize, axis=0)

	training_start_time = time.time()
	for epoch in range(args.total_epochs):

		sum_loss_generator 		= 0
		sum_loss_discriminator 	= 0
		sum_loss_autoencoder 	= 0
		sum_discriminator_confidence_true_l = 0
		sum_discriminator_confidence_fake_l = 0
		sum_discriminator_confidence_true_u = 0
		sum_discriminator_confidence_fake_u = 0

		epoch_start_time = time.time()
		dataset.shuffle()

		# training
		for itr in range(total_iterations_train):
			# update model parameters
			with chainer.using_config("train", True):
				# sample minibatch
				x_u = dataset.sample_unlabeled_minibatch(args.batchsize, gpu=using_gpu)
				x_l, y_l, y_onehot_l = dataset.sample_labeled_minibatch(args.batchsize, gpu=using_gpu)
				
				### reconstruction phase ###
				if True:
					z_u = model.encode_x_z(x_u)
					x_reconstruction_u = model.decode_z_x(z_u)
					loss_reconstruction_u = F.mean_squared_error(x_u, x_reconstruction_u)

					z_l = model.encode_x_z(x_l)
					x_reconstruction_l = model.decode_z_x(z_l)
					loss_reconstruction_l = F.mean_squared_error(x_l, x_reconstruction_l)

					loss_reconstruction = loss_reconstruction_u + loss_reconstruction_l

					model.cleargrads()
					loss_reconstruction.backward()
					optimizer_encoder.update()
					optimizer_decoder.update()

				### adversarial phase ###
				if True:
					z_fake_u = model.encode_x_z(x_u)
					z_fake_l = model.encode_x_z(x_l)

					if False:
						z_true_l = sampler.supervised_swiss_roll(args.batchsize, model.ndim_z, y_l, model.ndim_y - 1)
						z_true_u = sampler.swiss_roll(args.batchsize, model.ndim_z, model.ndim_y - 1)
					else:
						z_true_l = sampler.supervised_gaussian_mixture(args.batchsize, model.ndim_z, y_l, model.ndim_y - 1)
						z_true_u = sampler.gaussian_mixture(args.batchsize, model.ndim_z, model.ndim_y - 1)

					if using_gpu:
						z_true_u = cuda.to_gpu(z_true_u)
						z_true_l = cuda.to_gpu(z_true_l)

					dz_true_u = model.discriminate(y_onehot_u, z_true_u, apply_softmax=False)
					dz_fake_u = model.discriminate(y_onehot_u, z_fake_u, apply_softmax=False)
					dz_true_l = model.discriminate(y_onehot_l, z_true_l, apply_softmax=False)
					dz_fake_l = model.discriminate(y_onehot_l, z_fake_l, apply_softmax=False)

					discriminator_confidence_true_u = float(xp.mean(F.softmax(dz_true_u).data[:, 0]))
					discriminator_confidence_fake_u = float(xp.mean(F.softmax(dz_fake_u).data[:, 1]))
					discriminator_confidence_true_l = float(xp.mean(F.softmax(dz_true_l).data[:, 0]))
					discriminator_confidence_fake_l = float(xp.mean(F.softmax(dz_fake_l).data[:, 1]))

					loss_discriminator = (F.softmax_cross_entropy(dz_true_u, class_true)
											 + F.softmax_cross_entropy(dz_fake_u, class_fake) 
											 	+ F.softmax_cross_entropy(dz_true_l, class_true) 
											 		+ F.softmax_cross_entropy(dz_fake_l, class_fake))

					model.cleargrads()
					loss_discriminator.backward()
					optimizer_discriminator.update()

				### generator phase ###
				if True:
					z_fake_u = model.encode_x_z(x_u)
					z_fake_l = model.encode_x_z(x_l)
					dz_fake_u = model.discriminate(y_onehot_u, z_fake_u, apply_softmax=False)
					dz_fake_l = model.discriminate(y_onehot_l, z_fake_l, apply_softmax=False)

					loss_generator = F.softmax_cross_entropy(dz_fake_u, class_true) + F.softmax_cross_entropy(dz_fake_l, class_true)

					model.cleargrads()
					loss_generator.backward()
					optimizer_encoder.update()

				sum_loss_discriminator 	+= float(loss_discriminator.data)
				sum_loss_generator 		+= float(loss_generator.data)
				sum_loss_autoencoder 	+= float(loss_reconstruction.data)
				sum_discriminator_confidence_true_u += discriminator_confidence_true_u
				sum_discriminator_confidence_fake_u += discriminator_confidence_fake_u
				sum_discriminator_confidence_true_l += discriminator_confidence_true_l
				sum_discriminator_confidence_fake_l += discriminator_confidence_fake_l

			printr("Training ... {:3.0f}% ({}/{})".format((itr + 1) / total_iterations_train * 100, itr + 1, total_iterations_train))

		model.save(args.model)
			
		clear_console()
		print("Epoch {} done in {} sec - loss: g={:.5g}, d={:.5g}, a={:.5g} - disc_u: true={:.1f}%, fake={:.1f}% - disc_l: true={:.1f}%, fake={:.1f}% - total {} min".format(
			epoch + 1, int(time.time() - epoch_start_time), 
			sum_loss_generator / total_iterations_train, 
			sum_loss_discriminator / total_iterations_train, 
			sum_loss_autoencoder / total_iterations_train, 
			sum_discriminator_confidence_true_u / total_iterations_train * 100, 
			sum_discriminator_confidence_fake_u / total_iterations_train * 100, 
			sum_discriminator_confidence_true_l / total_iterations_train * 100, 
			sum_discriminator_confidence_fake_l / total_iterations_train * 100, 
			int((time.time() - training_start_time) // 60)))
Example #56
0
# and train an LDA-like model on it
import os.path
import pickle
import time

from chainer import cuda
from chainer import serializers
import chainer.optimizers as O
import numpy as np

from lda2vec import utils
from lda2vec import prepare_topics, print_top_words_per_topic
from lda2vec_model import LDA2Vec

gpu_id = int(os.getenv('CUDA_GPU', 0))
cuda.get_device(gpu_id).use()
print "Using GPU " + str(gpu_id)

# You must run preprocess.py before this data becomes available
vocab = pickle.load(open('vocab', 'r'))
corpus = pickle.load(open('corpus', 'r'))
data = np.load(open('data.npz', 'r'))
flattened = data['flattened']
story_id = data['story_id']
author_id = data['author_id']
time_id = data['time_id']
ranking = data['ranking'].astype('float32')
score = data['score'].astype('float32')

# Model Parameters
# Number of documents
                    default='0',
                    type=int,
                    help='whether to show the graph of loss values or not')
parser.add_argument(
    '--lang',
    '-l',
    default='ja',
    type=str,
    help='the choice of a language (Japanese "ja" or English "en" )')
args = parser.parse_args()

# GPU settings
gpu_device = args.gpu
if args.gpu >= 0:
    cuda.check_cuda_available()
    cuda.get_device(gpu_device).use()


def parse_ja_text(text):
    """
    Function to parse Japanese text.
    :param text: string: sentence written by Japanese
    :return: list: parsed text
    """
    import MeCab
    mecab = MeCab.Tagger("mecabrc")
    mecab.parse('')

    # list up noun
    mecab_result = mecab.parseToNode(text)
    parse_list = []
Example #58
0
def experiment():
    ite = 100
    pdata = 1000
    epoch = 100
    batchsize = 1000

    seed = 2018

    gpu = True

    loss_pu = np.zeros((ite, epoch))
    est_error_pu = np.zeros((ite, epoch))
    est_error_pubp = np.zeros((ite, epoch))
    est_precision_pu = np.zeros((ite, epoch))
    est_recall_pu = np.zeros((ite, epoch))
    est_precision1_pubp = np.zeros((ite, epoch))
    est_recall1_pubp = np.zeros((ite, epoch))
    est_precision2_pubp = np.zeros((ite, epoch))
    est_recall2_pubp = np.zeros((ite, epoch))
    est_precision3_pubp = np.zeros((ite, epoch))
    est_recall3_pubp = np.zeros((ite, epoch))
    est_precision4_pubp = np.zeros((ite, epoch))
    est_recall4_pubp = np.zeros((ite, epoch))
    est_precision5_pubp = np.zeros((ite, epoch))
    est_recall5_pubp = np.zeros((ite, epoch))

    xp = np.loadtxt('xp.csv')
    xq = np.loadtxt('xq.csv')
    xn = np.loadtxt('xn.csv')

    x_train = np.concatenate([xp, xq, xn], axis=0)
    t_train = np.concatenate([np.ones(len(xp)), np.zeros(len(xq)+len(xn))], axis=0)
    x_test = np.concatenate([xq, xn], axis=0)
    t_test = np.concatenate([np.ones(len(xq)), np.zeros(len(xn))], axis=0)
    for i in range(ite):
        np.random.seed(seed)
        #PN classification
        
        (KM1, KM2) = wrapper(x_train[t_train==0],x_train[t_train==1])
        pi = KM2
        #pi = np.mean(t_test)

        print(x_train.shape)
        print(t_train.shape)
        print(x_test.shape)
        print(t_test.shape)
        print(pi)

        dim = x_train.shape[1]        

        model = MultiLayerPerceptron(dim)
        optimizer = optimizers.Adam(alpha=1e-5)
        optimizer.setup(model)
        optimizer.add_hook(chainer.optimizer.WeightDecay(0.005))

        if gpu:
            gpu_device = 0
            cuda.get_device(gpu_device).use()
            model.to_gpu(gpu_device)
            xp = cuda.cupy
        else:
            xp = np

        model, optimizer, loss_list, acc1, acc2, pre0, rec0, pre1, rec1, pre2, rec2, pre3, rec3, pre4, rec4, pre5, rec5, pre, rec = train_pu(x_train, t_train, x_test, t_test, pi, epoch, model, optimizer, batchsize, xp)
       
        if i == 0:
            pres = pre
            recs = rec
        else:
            pres += pre
            recs += rec

        loss_pu[i] = loss_list
        est_error_pu[i] = acc1
        est_error_pubp[i] = acc2
        est_precision_pu[i] = pre0
        est_recall_pu[i] = rec0
        est_precision1_pubp[i] = pre1
        est_recall1_pubp[i] = rec1
        est_precision2_pubp[i] = pre2
        est_recall2_pubp[i] = rec2
        est_precision3_pubp[i] = pre3
        est_recall3_pubp[i] = rec3
        est_precision4_pubp[i] = pre4
        est_recall4_pubp[i] = rec4
        est_precision5_pubp[i] = pre5
        est_recall5_pubp[i] = rec5


        print(acc1[-1])
        print(acc2[-1])

        seed += 1
        
        np.savetxt('loss_pu_elkan_%d.csv'%seed, loss_pu, delimiter=',')
        np.savetxt('est_error_pu_elkan_%d.csv'%seed, est_error_pu, delimiter=',')
        np.savetxt('est_error_pubp_elkan_%d.csv'%seed, est_error_pubp, delimiter=',')
        np.savetxt('est_precision_pu_elkan_%d.csv'%seed, est_precision_pu, delimiter=',')
        np.savetxt('est_recal-l_pu_elkan_%d.csv'%seed, est_recall_pu, delimiter=',')
        np.savetxt('est_precision-1_pubp_elkan_%d.csv'%seed, est_precision1_pubp, delimiter=',')
        np.savetxt('est_recall-1_pubp_elkan_%d.csv'%seed, est_recall1_pubp, delimiter=',')
        np.savetxt('est_precision-2_pubp_elkan_%d.csv'%seed, est_precision2_pubp, delimiter=',')
        np.savetxt('est_recall-2_pubp_elkan_%d.csv'%seed, est_recall2_pubp, delimiter=',')
        np.savetxt('est_precision-3_pubp_elkan_%d.csv'%seed, est_precision3_pubp, delimiter=',')
        np.savetxt('est_recall-3_pubp_elkan_%d.csv'%seed, est_recall3_pubp, delimiter=',')
        np.savetxt('est_precision-4_pubp_elkan_%d.csv'%seed, est_precision4_pubp, delimiter=',')
        np.savetxt('est_recall-4_pubp_elkan_%d.csv'%seed, est_recall4_pubp, delimiter=',')
        np.savetxt('est_precision-5_pubp_elkan_%d.csv'%seed, est_precision5_pubp, delimiter=',')
        np.savetxt('est_recall-5_pubp_elkan_%d.csv'%seed, est_recall5_pubp, delimiter=',')
        np.savetxt('precisions_pubp_elkan_%d.csv'%seed, pres/(i+1), delimiter=',')
        np.savetxt('recalls_pubp_elkan_%d.csv'%seed, recs/(i+1), delimiter=',')
        
    loss_pu_mean = np.mean(loss_pu, axis=1)
    est_error_pu_mean = np.mean(est_error_pu, axis=1)
    est_error_pubp_mean = np.mean(est_error_pubp, axis=1)
    est_error_pu_std = np.std(est_error_pu, axis=1)
    est_error_pubp_std = np.std(est_error_pubp, axis=1)
    return loss_pu_mean, est_error_pu_mean, est_error_pubp_mean, est_error_pu_std, est_error_pubp_std 
Example #59
0
 def test_get_dummy_device(self):
     if not cuda.available:
         self.assertIs(cuda.get_device(), cuda.DummyDevice)
Example #60
0
def CifarAnalysis(folderName=None, n_epoch=1, batchsize=1000, **kwd):
    id_gpu = 0

    OutStr = ""
    OutStr += 'GPU: {}\n'.format(id_gpu)
    OutStr += 'Minibatch-size: {}\n'.format(batchsize)
    OutStr += 'epoch: {}\n'.format(n_epoch)
    OutStr += 'kwd: {}\n'.format(kwd)
    OutStr += ''

    print OutStr

    fOutput = None
    fInfo = None
    if folderName:
        if not os.path.exists(folderName):
            os.makedirs(folderName)
        fOutput = open(os.path.join(folderName, "output.dat"), "w")
        fInfo = open(os.path.join(folderName, "info.dat"), "w")
        shutil.copyfile(__file__,
                        os.path.join(folderName, os.path.basename(__file__)))

    if fInfo: fInfo.write(OutStr)

    # Prepare dataset
    InDataBatch = []

    data_tr = np.zeros((50000, 3 * 32 * 32), dtype=np.float32)
    data_ev = np.zeros((10000, 3 * 32 * 32), dtype=np.float32)
    label_tr = np.zeros((50000), dtype=np.int32)
    label_ev = np.zeros((10000), dtype=np.int32)

    for i in range(1, 5 + 1):
        with open("data_cifar10/data_batch_%d" % i, "r") as f:
            tmp = pickle.load(f)
            data_tr[(i - 1) * 10000:i * 10000] = tmp["data"]
            label_tr[(i - 1) * 10000:i * 10000] = tmp["labels"]
    with open("data_cifar10/test_batch", "r") as f:
        tmp = pickle.load(f)
        data_ev[:] = tmp["data"]
        label_ev[:] = tmp["labels"]

## Prep
    print "Normalizing data ..."

    def Normalize(x):
        avg = np.average(x, axis=1).reshape((len(x), 1))
        std = np.sqrt(np.sum(x * x, axis=1) - np.sum(x, axis=1)).reshape(
            (len(x), 1))
        y = (x - avg) / std
        return y

    data_tr = Normalize(data_tr)
    data_ev = Normalize(data_ev)
    print "done"

    x_tr = data_tr.reshape((len(data_tr), 3, 32, 32))
    x_ev = data_ev.reshape((len(data_ev), 3, 32, 32))
    y_tr = label_tr
    y_ev = label_ev
    N_tr = len(data_tr)  # 50000
    N_ev = len(data_ev)  # 10000

    ## Define analisis
    Resume = None
    if "Resume" in kwd:
        Resume = kwd["Resume"]
        del kwd["Resume"]

    model = L.Classifier(
        ImageProcessNetwork(I_colors=3,
                            I_Xunit=32,
                            I_Yunit=32,
                            F_unit=10,
                            **kwd))
    if id_gpu >= 0:
        cuda.get_device(id_gpu).use()
        model.to_gpu()
    xp = np if id_gpu < 0 else cuda.cupy

    # Setup optimizer
    optimizer = optimizers.Adam()
    optimizer.setup(model)

    # Init/Resume
    if Resume:
        print('Load optimizer state from', Resume)
        serializers.load_hdf5(Resume + ".state", optimizer)
        serializers.load_hdf5(Resume + ".model", model)

# Learning loop
    if fOutput: fOutput.write("epoch,mode,loss,accuracy\n")
    for epoch in six.moves.range(1, n_epoch + 1):
        print 'epoch %d' % epoch

        # training
        perm = np.random.permutation(N_tr)
        sum_accuracy = 0
        sum_loss = 0
        start = time.time()
        for i in six.moves.range(0, N_tr, batchsize):
            x = chainer.Variable(xp.asarray(x_tr[perm[i:i + batchsize]]))
            t = chainer.Variable(xp.asarray(y_tr[perm[i:i + batchsize]]))

            # Pass the loss function (Classifier defines it) and its arguments
            model.predictor.setTrainMode(True)
            optimizer.update(model, x, t)

            if (epoch == 1 and i == 0) and folderName:
                with open(os.path.join(folderName, 'graph.dot'), 'w') as o:
                    g = computational_graph.build_computational_graph(
                        (model.loss, ))
                    o.write(g.dump())
                print 'graph generated'

            sum_loss += float(model.loss.data) * len(t.data)
            sum_accuracy += float(model.accuracy.data) * len(t.data)
        end = time.time()
        elapsed_time = end - start
        throughput = N_tr / elapsed_time
        print 'train mean loss=%.3f, accuracy=%.1f%%, throughput=%.0f images/sec' % (
            sum_loss / N_tr, sum_accuracy / N_tr * 100., throughput)
        if fOutput:
            fOutput.write("%d,Train,%e,%e\n" %
                          (epoch, sum_loss / N_tr, sum_accuracy / N_tr))

        # evaluation
        sum_accuracy = 0
        sum_loss = 0
        for i in six.moves.range(0, N_ev, batchsize):
            x = chainer.Variable(xp.asarray(x_ev[i:i + batchsize]),
                                 volatile='on')
            t = chainer.Variable(xp.asarray(y_ev[i:i + batchsize]),
                                 volatile='on')
            model.predictor.setTrainMode(False)
            loss = model(x, t)
            sum_loss += float(loss.data) * len(t.data)
            sum_accuracy += float(model.accuracy.data) * len(t.data)

        print 'test  mean loss=%.3f, accuracy=%.1f%%' % (
            sum_loss / N_ev,
            sum_accuracy / N_ev * 100,
        )
        if fOutput:
            fOutput.write("%d,Test,%e,%e\n" %
                          (epoch, sum_loss / N_ev, sum_accuracy / N_ev))

        if folderName and (epoch % 10 == 0 or epoch == n_epoch):
            # Save the model and the optimizer
            if epoch == n_epoch:
                myFname = os.path.join(folderName, 'mlp_final')
            else:
                myFname = os.path.join(folderName, 'mlp_%d' % n_epoch)

            #print 'save the model'
            serializers.save_hdf5(myFname + ".model", model)
            serializers.save_hdf5(myFname + ".state", optimizer)

    if fOutput: fOutput.close()
    if fInfo: fInfo.close()