class CnnMaxPool(object): def __init__(self): self.model = dy.Model() self.options = {'channel_1': 512, 'channel_2': 512, 'channel_3': 512} self.params = self.init_params() self.trainer = dy.AdamTrainer(self.model, alpha=0.01) self.loader = Loader(sanity_check=True) def load(self, filename): self.model.load(filename) def save(self, filename): self.model.save(filename) def init_params(self): params = {} # cnn层参数 params['conv_W_1'] = self.model.add_parameters( (1, 4, 100, self.options['channel_1'])) params['conv_b_1'] = self.model.add_parameters( self.options['channel_1']) params['conv_W_2'] = self.model.add_parameters( (1, 8, 100, self.options['channel_2'])) params['conv_b_2'] = self.model.add_parameters( self.options['channel_2']) params['conv_W_3'] = self.model.add_parameters( (1, 12, 100, self.options['channel_3'])) params['conv_b_3'] = self.model.add_parameters( self.options['channel_3']) # 输出层参数 params['W'] = self.model.add_parameters(self.options['channel_1'] + self.options['channel_2'] + self.options['channel_3']) params['b'] = self.model.add_parameters(1) return params def build_graph(self, x): conv_W_1 = dy.parameter(self.params['conv_W_1']) conv_b_1 = dy.parameter(self.params['conv_b_1']) conv_W_2 = dy.parameter(self.params['conv_W_2']) conv_b_2 = dy.parameter(self.params['conv_b_2']) conv_W_3 = dy.parameter(self.params['conv_W_3']) conv_b_3 = dy.parameter(self.params['conv_b_3']) W = dy.parameter(self.params['W']) b = dy.parameter(self.params['b']) (n, d), _ = x.dim() x = dy.reshape(x, (1, n, d)) # 一维卷积网络 conv_1 = dy.tanh( dy.conv2d_bias(x, conv_W_1, conv_b_1, (1, 1), is_valid=False)) conv_2 = dy.tanh( dy.conv2d_bias(x, conv_W_2, conv_b_2, (1, 1), is_valid=False)) conv_3 = dy.tanh( dy.conv2d_bias(x, conv_W_3, conv_b_3, (1, 1), is_valid=False)) pool_1 = dy.max_dim(dy.reshape(conv_1, (n, self.options['channel_1']))) pool_2 = dy.max_dim(dy.reshape(conv_2, (n, self.options['channel_2']))) pool_3 = dy.max_dim(dy.reshape(conv_3, (n, self.options['channel_3']))) # 全连接分类 pool = dy.concatenate([pool_1, pool_2, pool_3], 0) logit = dy.dot_product(pool, W) + b return logit def backward(self, word_vectors, label): dy.renew_cg() x = dy.inputTensor(word_vectors) y = dy.inputTensor(label) logit = self.build_graph(x) # q表示对正样本的加权 # 公式见https://www.tensorflow.org/api_docs/python/tf/nn/weighted_cross_entropy_with_logits q = 15 l = 1 + (q - 1) * y loss = (1 - y) * logit + l * (dy.log(1 + dy.exp(-dy.abs(logit))) + dy.rectify(-logit)) res = loss.value() loss.backward() return res def train(self): epoch = 5 for i in xrange(epoch): for j in xrange(7297 / 4): for input, label in self.loader.next_batch(): loss = self.backward(input, label) if np.isnan(loss): print 'somthing went wrong, loss is nan.' return self.trainer.update() print j, loss
def main(): for k, v in a._get_kwargs(): print(k, "=", v) with open(os.path.join(a.output_dir, "options.json"), "w") as f: f.write(json.dumps(vars(a), sort_keys=True, indent=4)) loader = Loader(a.batch_size) # initialize models here model = CNN(a) if a.checkpoint is not None: print("loading model from checkpoint") model.load(a.checkpoint) if a.mode == 'test': if a.checkpoint is None: print('need checkpoint to continue') return draw(model, os.path.join(a.output_dir, 'test_output.jpg')) else: # training start = time.time() for epoch in range(a.max_epochs): def should(freq): return freq > 0 and ((epoch + 1) % freq == 0 or epoch == a.max_epochs - 1) training_loss = 0 for _ in range(loader.ntrain): X, y = loader.next_batch(0) model.step(X, y) training_loss += model.loss.data[0] training_loss /= loader.ntrain if should(a.validation_freq): print('validating model') validation_loss = 0 for _ in range(loader.nval): X, y = loader.next_batch(1) model.validate_step(X, y) validation_loss += model.loss.data[0] validation_loss /= loader.nval if should(a.summary_freq): print("recording summary") with open(os.path.join(a.output_dir, 'loss_record.txt'), "a") as loss_file: loss_file.write("%s\t%s\t%s\n" % (epoch, training_loss, validation_loss)) if should(a.progress_freq): rate = (epoch + 1) / (time.time() - start) remaining = (a.max_epochs - 1 - epoch) / rate print("progress epoch %d remaining %dh" % (epoch, remaining / 3600)) print("training loss", training_loss) if should(a.display_freq): draw(model, os.path.join(a.output_dir, '%s.jpg' % epoch)) if should(a.save_freq): print("saving model") model.save(os.path.join(a.output_dir, '%s.pth' % epoch))