def train(data):
    save_data_name = data.model_dir + ".dset"
    data.save(save_data_name)
    model = SeqLabel(data)
    # 加载预训练
    print('loading model %s' % model_path)
    model.load_state_dict(torch.load(model_path, map_location=map_location))
    print('data.seg:', data.seg)
    optimizer = ''
    if data.optimizer.lower() == "sgd":
        optimizer = optim.SGD(model.parameters(),
                              lr=data.HP_lr,
                              momentum=data.HP_momentum,
                              weight_decay=data.HP_l2)

    best_dev = -10
    print('data.HP_gpu:', data.HP_gpu)
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" % (idx, data.HP_iteration))
        if data.optimizer == "SGD":
            optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)

        instance_count = 0
        sample_id = 0
        sample_loss = 0  # 每500个batch清零
        total_loss = 0  # 一个epoch里的完整loss
        right_token = 0
        whole_token = 0
        # print("Before Shuffle: first input word list:", data.train_Ids[0][0])
        random.shuffle(data.train_Ids)
        print("Shuffle: first input word list:", data.train_Ids[0][0])
        model.train()
        model.zero_grad()
        batch_size = data.HP_batch_size
        # batch_id = 0
        train_num = len(data.train_Ids)
        print('train_num:', train_num)  # 训练样本的数量
        total_batch = train_num // batch_size + 1
        print('total_batch:', total_batch)

        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start:end]

            if not instance:
                continue

            batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, \
             batch_label, mask = batchify_with_label(instance, data.HP_gpu, True, data.sentence_classification)
            instance_count += 1
            loss, tag_seq = model.calculate_loss(batch_word, batch_features,
                                                 batch_wordlen, batch_char,
                                                 batch_charlen,
                                                 batch_charrecover,
                                                 batch_label, mask)

            right, whole = predict_check(
                tag_seq, batch_label, mask,
                data.sentence_classification)  # pred与gold的校验
            right_token += right
            whole_token += whole
            sample_loss += loss.item()
            total_loss += loss.item()

            if end % 500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                print(
                    "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"
                    % (end, temp_cost, sample_loss, right_token, whole_token,
                       (right_token + 0.) / whole_token))
                if sample_loss > 1e8 or str(sample_loss) == "nan":
                    print(
                        "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
                    )
                    exit(1)
                sys.stdout.flush()
                sample_loss = 0
            loss.backward()
            optimizer.step()
            model.zero_grad()

        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" %
              (end, temp_cost, sample_loss, right_token, whole_token,
               (right_token + 0.) / whole_token))

        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print(
            "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
            % (idx, epoch_cost, train_num / epoch_cost, total_loss))
        print("total_loss:", total_loss)
        if total_loss > 1e8 or str(total_loss) == "nan":
            print(
                "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
            )
            exit(1)

        speed, acc, p, r, f, _, _ = evaluate(data, model, "dev")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        current_score = f
        print(
            "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
            % (dev_cost, speed, acc, p, r, f))

        if current_score > best_dev:
            print("Exceed previous best f score:", best_dev)
            model_name = data.model_dir + '.' + str(idx) + ".model"
            print("Save current best torch_model in file:",
                  model_name)  # 保存当前epoch结束的模型
            torch.save(model.state_dict(), model_name)
            best_dev = current_score
        # 每50轮保存一下
        if idx % 50 == 0:
            model_name = data.model_dir + '.' + str(idx) + ".model"
            print('Save every 50 epoch in file: %s' % model_name)
            torch.save(model.state_dict(), model_name)

        speed, acc, p, r, f, _, _ = evaluate(data, model, "test")
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        print(
            "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
            % (test_cost, speed, acc, p, r, f))

        # 对自己add对样本做一下evaluate:
        speed, acc, p, r, f, _, _ = evaluate(data, model, "raw")
        raw_finish = time.time()
        raw_cost = raw_finish - test_finish
        print(
            "Raw: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
            % (raw_cost, speed, acc, p, r, f))

        gc.collect()
Esempio n. 2
0
def train(data):
    print("Training model...")
    data.show_data_summary()
    save_data_name = data.model_dir + ".dset"
    data.save(save_data_name)
    model = SeqLabel(data)
    loss_function = nn.NLLLoss()
    if data.optimizer.lower() == "sgd":
        optimizer = optim.SGD(model.parameters(),
                              lr=data.HP_lr,
                              momentum=data.HP_momentum,
                              weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adagrad":
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adadelta":
        optimizer = optim.Adadelta(model.parameters(),
                                   lr=data.HP_lr,
                                   weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "rmsprop":
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adam":
        optimizer = optim.Adam(model.parameters(),
                               lr=data.HP_lr,
                               weight_decay=data.HP_l2)
    else:
        print("Optimizer illegal: %s" % (data.optimizer))
        exit(0)
    best_dev = -10
    # data.HP_iteration = 1
    ## start training
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" % (idx, data.HP_iteration))
        if data.optimizer == "SGD":
            optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = data.HP_batch_size
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size + 1
        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            if not instance:
                continue
            batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                instance, data.HP_gpu)
            instance_count += 1
            loss, tag_seq = model.neg_log_likelihood_loss(
                batch_word, batch_features, batch_wordlen, batch_char,
                batch_charlen, batch_charrecover, batch_label, mask)
            right, whole = predict_check(tag_seq, batch_label, mask)
            right_token += right
            whole_token += whole
            sample_loss += loss.data[0]
            total_loss += loss.data[0]
            if end % 500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                print(
                    "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"
                    % (end, temp_cost, sample_loss, right_token, whole_token,
                       (right_token + 0.) / whole_token))
                sys.stdout.flush()
                sample_loss = 0
            loss.backward()
            optimizer.step()
            model.zero_grad()
        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" %
              (end, temp_cost, sample_loss, right_token, whole_token,
               (right_token + 0.) / whole_token))
        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print(
            "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
            % (idx, epoch_cost, train_num / epoch_cost, total_loss))
        # continue
        speed, acc, p, r, f, _, _ = evaluate(data, model, "dev")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if data.seg:
            current_score = f
            print(
                "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (dev_cost, speed, acc, p, r, f))
        else:
            current_score = acc
            print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                  (dev_cost, speed, acc))

        if current_score > best_dev:
            if data.seg:
                print("Exceed previous best f score:", best_dev)
            else:
                print("Exceed previous best acc score:", best_dev)
            model_name = data.model_dir + '.' + str(idx) + ".model"
            print("Save current best model in file:", model_name)
            torch.save(model.state_dict(), model_name)
            best_dev = current_score
        # ## decode test
        speed, acc, p, r, f, _, _ = evaluate(data, model, "test")
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        if data.seg:
            print(
                "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (test_cost, speed, acc, p, r, f))
        else:
            print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" %
                  (test_cost, speed, acc))
        gc.collect()
def train():
	total_batch = 0
	# model = CnnLstmCrf(config)
	model = SeqLabel(data)
	optimizer = optim.SGD(model.parameters(), lr=config.lr, momentum=config.momentum, weight_decay=config.l2)
	if gpu:
		model = model.cuda()
	best_dev = -10

	for idx in range(config.epoch):
		epoch_start = time.time()
		temp_start = epoch_start
		print("Epoch: %s/%s" % (idx, config.epoch))
		optimizer = lr_decay(optimizer, idx, config.lr_decay, config.lr)
		instance_count = 0
		sample_id = 0
		sample_loss = 0  # 每500个batch清零
		total_loss = 0  # 一个epoch里的完整loss
		right_token = 0  # 一个epoch里预测正确的token数量
		whole_token = 0
		random.shuffle(data.train_ids)
		print("Shuffle: first input word list:", data.train_ids[0][1])

		model.train()
		model.zero_grad()
		batch_size = config.batch_size
		train_num = len(data.train_ids)
		print('batch_size:', batch_size, 'train_num:', train_num)
		total_batch = train_num // batch_size + 1

		for batch_id in range(total_batch):
			start = batch_id * batch_size
			end = (batch_id + 1) * batch_size
			if end > train_num:
				end = train_num
			instance = data.train_ids[start:end]  # [char,word,feat,label]
			if not instance:
				continue
			batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, \
				batch_label, mask = batchify_sequence_labeling_with_label(instance, gpu, if_train=True)
			# loss, tag_seq = model(batch_char, batch_word, batch_features, mask, batch_charrecover, batch_wordlen, batch_label)
			loss, tag_seq = model.calculate_loss(
				batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask)
			right, whole = predict_check(tag_seq, batch_label, mask)
			right_token += right
			whole_token += whole
			# print('right_token/whole_token:', right_token/whole_token)
			sample_loss += loss.item()
			total_loss += loss.item()
			if end % 6400 == 0:
				temp_time = time.time()
				temp_cost = temp_time - temp_start
				temp_start = temp_time
				print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (
					end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token))
				if sample_loss > 1e8 or str(sample_loss) == "nan":
					print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....")
					exit(1)
				sample_loss = 0
			loss.backward()
			optimizer.step()
			model.zero_grad()
		temp_time = time.time()
		temp_cost = temp_time - temp_start
		print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (
			end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token))

		epoch_finish = time.time()
		epoch_cost = epoch_finish - epoch_start
		print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s" % (
			idx, epoch_cost, train_num / epoch_cost, total_loss))
		if total_loss > 1e8 or str(total_loss) == "nan":
			print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....")
			exit(1)
		logger.info("Epoch: %s, Total loss: %s" % (idx, total_loss))
		speed, acc, p, r, f, _, _ = evaluate(data, model, "dev")
		dev_finish = time.time()
		dev_cost = dev_finish - epoch_finish

		current_score = f
		print("Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f))
		logger.info(
			"Epoch: %s, Loss: %s, Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (
				idx, total_loss, dev_cost, speed, acc, p, r, f))
		if current_score > best_dev:
			model_name = config.model_path + '.' + str(idx) + '.model'
			torch.save(model.state_dict(), model_name)
			best_dev = current_score
			# logger.info("data:dev, epoch:%s, f1:%s, precision:%s, recall:%s" % (idx, current_score, p, r))

		speed, acc, p, r, f, _, _ = evaluate(data, model, "test")
		test_finish = time.time()
		test_cost = test_finish - dev_finish
		logger.info("Epoch: %s, Loss: %s, Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (
			idx, total_loss, test_cost, speed, acc, p, r, f))