def test_fitlog_callback(self): import fitlog fitlog.set_log_dir(self.tempdir, new_log=True) data_set, model = prepare_env() from fastNLP import Tester tester = Tester(data=data_set, model=model, metrics=AccuracyMetric(pred="predict", target="y")) fitlog_callback = FitlogCallback(data_set, tester) trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=5, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True, callbacks=fitlog_callback, check_code_level=2) trainer.train()
def __init__(self, save_path, delete_when_train_finish=True, recovery_fitlog=True): r""" 用于在每个epoch结束的时候保存一下当前的Trainer状态,可以用于恢复之前的运行。使用最近的一个epoch继续训练 一段示例代码 Example1:: >>> callback = CheckPointCallback('chkp.pt') >>> trainer = Trainer(xxx, callback=callback) >>> trainer.train() # 如果训练过程没结束就fail,请直接再次运行即可(请务必保证与上次使用了完全相同的数据与超参数) Example2:: >>> fitlog.set_log_dir('xxx') >>> callback = CheckPointCallback('chkp.pt') # 一定要在set_log_dir下一行就接着CheckPointCallback >>> trainer = Trainer(xxx, callback=callback) >>> trainer.train() # 如果训练过程没结束就fail,请直接再次运行即可(请务必保证与上次使用了完全相同的数据与超参数) :param str save_path: 将状态保存到哪个位置。需要指定一个具体的路径,比如'checkpoints/chtp.pt'。如果检查到该文件存在,将在 Trainer开始训练的时候自动从这个Checkpoint处开始运行。 :param bool delete_when_train_finish: 如果Train正常运行完毕,是否自动删除。删除该文件可以使得路径自动复用。 :param bool recovery_fitlog: 是否恢复fitlog为对应的log,如果为True请将本Callback放在fitlog.set_log_dir后面一行初始化。 如果为False,将新建一个log folder否则继续使用之前的。 """ super().__init__() self.save_path = os.path.abspath(os.path.expanduser(save_path)) self.delete_when_train_finish = delete_when_train_finish self.recover_fitlog = recovery_fitlog try: import fitlog except: self.recover_fitlog = False if os.path.exists(os.path.expanduser(self.save_path)): logger.info( "The train will start from the checkpoint saved in {}.".format( self.save_path)) if self.recover_fitlog: states = torch.load(self.save_path) if 'fitlog_log_dir' in states: try: import fitlog log_dir = states['fitlog_log_dir'] if 'fitlog_save_log_dir' in states: log_dir = states['fitlog_save_log_dir'] fitlog.set_log_dir(log_dir, new_log=True) except: logger.error("Fail to recovery the fitlog states.")
def after_parse_t2g(C , need_logger = False): #----- make logger ----- logger = Logger(C.log_file) logger.log = logger.log_print_w_time if C.no_log: logger.log = logger.nolog C.tmp_file_name = random_tmp_name() #----- other stuff ----- if C.auto_hyperparam: auto_hyperparam(C) logger.log("Hyper parameters autoset.") if C.no_fitlog: fitlog.debug() fitlog.set_log_dir("logs") fitlog.add_hyper(C) logger.log ("------------------------------------------------------") logger.log (pformat(C.__dict__)) logger.log ("------------------------------------------------------") C.gpus = list(range(tc.cuda.device_count())) #----- initialize ----- if C.t2g_seed > 0: random.seed(C.t2g_seed) tc.manual_seed(C.t2g_seed) np.random.seed(C.t2g_seed) tc.cuda.manual_seed_all(C.t2g_seed) tc.backends.cudnn.deterministic = True tc.backends.cudnn.benchmark = False logger.log ("Seed set. %d" % (C.t2g_seed)) tc.cuda.set_device(C.gpus[0]) C.device = C.gpus[0] if need_logger: return C , logger return C
def test_CheckPointCallback(self): from fastNLP import CheckPointCallback, Callback from fastNLP import Tester class RaiseCallback(Callback): def __init__(self, stop_step=10): super().__init__() self.stop_step = stop_step def on_backward_begin(self, loss): if self.step > self.stop_step: raise RuntimeError() data_set, model = prepare_env() tester = Tester(data=data_set, model=model, metrics=AccuracyMetric(pred="predict", target="y")) import fitlog fitlog.set_log_dir(self.tempdir, new_log=True) tempfile_path = os.path.join(self.tempdir, 'chkt.pt') callbacks = [CheckPointCallback(tempfile_path)] fitlog_callback = FitlogCallback(data_set, tester) callbacks.append(fitlog_callback) callbacks.append(RaiseCallback(100)) try: trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=5, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True, callbacks=callbacks, check_code_level=2) trainer.train() except: pass # 用下面的代码模拟重新运行 data_set, model = prepare_env() callbacks = [CheckPointCallback(tempfile_path)] tester = Tester(data=data_set, model=model, metrics=AccuracyMetric(pred="predict", target="y")) fitlog_callback = FitlogCallback(data_set, tester) callbacks.append(fitlog_callback) trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=5, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True, callbacks=callbacks, check_code_level=2) trainer.train()
def train(): args = parse_args() if args.debug: fitlog.debug() args.save_model = False # ================= define ================= tokenizer = RobertaTokenizer.from_pretrained('roberta-base') word_mask_index = tokenizer.mask_token_id word_vocab_size = len(tokenizer) if get_local_rank() == 0: fitlog.set_log_dir(args.log_dir) fitlog.commit(__file__, fit_msg=args.name) fitlog.add_hyper_in_file(__file__) fitlog.add_hyper(args) # ================= load data ================= dist.init_process_group('nccl') init_logger_dist() n_proc = dist.get_world_size() bsz = args.batch_size // args.grad_accumulation // n_proc args.local_rank = get_local_rank() args.save_dir = os.path.join(args.save_dir, args.name) if args.save_model else None if args.save_dir is not None and os.path.exists(args.save_dir): raise RuntimeError('save_dir has already existed.') logger.info('save directory: {}'.format( 'None' if args.save_dir is None else args.save_dir)) devices = list(range(torch.cuda.device_count())) NUM_WORKERS = 4 ent_vocab, rel_vocab = load_ent_rel_vocabs() logger.info('# entities: {}'.format(len(ent_vocab))) logger.info('# relations: {}'.format(len(rel_vocab))) ent_freq = get_ent_freq() assert len(ent_vocab) == len(ent_freq), '{} {}'.format( len(ent_vocab), len(ent_freq)) ##### root = args.data_dir dirs = os.listdir(root) drop_files = [] for dir in dirs: path = os.path.join(root, dir) max_idx = 0 for file_name in os.listdir(path): if 'large' in file_name: continue max_idx = int(file_name) if int(file_name) > max_idx else max_idx drop_files.append(os.path.join(path, str(max_idx))) ##### file_list = [] for path, _, filenames in os.walk(args.data_dir): for filename in filenames: file = os.path.join(path, filename) if 'large' in file or file in drop_files: continue file_list.append(file) logger.info('used {} files in {}.'.format(len(file_list), args.data_dir)) if args.data_prop > 1: used_files = file_list[:int(args.data_prop)] else: used_files = file_list[:round(args.data_prop * len(file_list))] data = GraphOTFDataSet(used_files, n_proc, args.local_rank, word_mask_index, word_vocab_size, args.n_negs, ent_vocab, rel_vocab, ent_freq) dev_data = GraphDataSet(used_files[0], word_mask_index, word_vocab_size, args.n_negs, ent_vocab, rel_vocab, ent_freq) sampler = OTFDistributedSampler(used_files, n_proc, get_local_rank()) train_data_iter = TorchLoaderIter(dataset=data, batch_size=bsz, sampler=sampler, num_workers=NUM_WORKERS, collate_fn=data.collate_fn) dev_data_iter = TorchLoaderIter(dataset=dev_data, batch_size=bsz, sampler=RandomSampler(), num_workers=NUM_WORKERS, collate_fn=dev_data.collate_fn) if args.test_data is not None: test_data = FewRelDevDataSet(path=args.test_data, label_vocab=rel_vocab, ent_vocab=ent_vocab) test_data_iter = TorchLoaderIter(dataset=test_data, batch_size=32, sampler=RandomSampler(), num_workers=NUM_WORKERS, collate_fn=test_data.collate_fn) if args.local_rank == 0: print('full wiki files: {}'.format(len(file_list))) print('used wiki files: {}'.format(len(used_files))) print('# of trained samples: {}'.format(len(data) * n_proc)) print('# of trained entities: {}'.format(len(ent_vocab))) print('# of trained relations: {}'.format(len(rel_vocab))) # ================= prepare model ================= logger.info('model init') if args.rel_emb is not None: # load pretrained relation embeddings rel_emb = np.load(args.rel_emb) # add_embs = np.random.randn(3, rel_emb.shape[1]) # add <pad>, <mask>, <unk> # rel_emb = np.r_[add_embs, rel_emb] rel_emb = torch.from_numpy(rel_emb).float() assert rel_emb.shape[0] == len(rel_vocab), '{} {}'.format( rel_emb.shape[0], len(rel_vocab)) # assert rel_emb.shape[1] == args.rel_dim logger.info('loaded pretrained relation embeddings. dim: {}'.format( rel_emb.shape[1])) else: rel_emb = None if args.model_name is not None: logger.info('further pre-train.') config = RobertaConfig.from_pretrained('roberta-base', type_vocab_size=3) model = CoLAKE(config=config, num_ent=len(ent_vocab), num_rel=len(rel_vocab), ent_dim=args.ent_dim, rel_dim=args.rel_dim, ent_lr=args.ent_lr, ip_config=args.ip_config, rel_emb=None, emb_name=args.emb_name) states_dict = torch.load(args.model_name) model.load_state_dict(states_dict, strict=True) else: model = CoLAKE.from_pretrained( 'roberta-base', num_ent=len(ent_vocab), num_rel=len(rel_vocab), ent_lr=args.ent_lr, ip_config=args.ip_config, rel_emb=rel_emb, emb_name=args.emb_name, cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'dist_{}'.format(args.local_rank)) model.extend_type_embedding(token_type=3) # if args.local_rank == 0: # for name, param in model.named_parameters(): # if param.requires_grad is True: # print('{}: {}'.format(name, param.shape)) # ================= train model ================= # lr=1e-4 for peak value, lr=5e-5 for initial value logger.info('trainer init') no_decay = [ 'bias', 'LayerNorm.bias', 'LayerNorm.weight', 'layer_norm.bias', 'layer_norm.weight' ] param_optimizer = list(model.named_parameters()) optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] word_acc = WordMLMAccuracy(pred='word_pred', target='masked_lm_labels', seq_len='word_seq_len') ent_acc = EntityMLMAccuracy(pred='entity_pred', target='ent_masked_lm_labels', seq_len='ent_seq_len') rel_acc = RelationMLMAccuracy(pred='relation_pred', target='rel_masked_lm_labels', seq_len='rel_seq_len') metrics = [word_acc, ent_acc, rel_acc] if args.test_data is not None: test_metric = [rel_acc] tester = Tester(data=test_data_iter, model=model, metrics=test_metric, device=list(range(torch.cuda.device_count()))) # tester.test() else: tester = None optimizer = optim.AdamW(optimizer_grouped_parameters, lr=args.lr, betas=(0.9, args.beta), eps=1e-6) # warmup_callback = WarmupCallback(warmup=args.warm_up, schedule='linear') fitlog_callback = MyFitlogCallback(tester=tester, log_loss_every=100, verbose=1) gradient_clip_callback = GradientClipCallback(clip_value=1, clip_type='norm') emb_callback = EmbUpdateCallback(model.ent_embeddings) all_callbacks = [gradient_clip_callback, emb_callback] if args.save_dir is None: master_callbacks = [fitlog_callback] else: save_callback = SaveModelCallback(args.save_dir, model.ent_embeddings, only_params=True) master_callbacks = [fitlog_callback, save_callback] if args.do_test: states_dict = torch.load(os.path.join(args.save_dir, args.model_name)).state_dict() model.load_state_dict(states_dict) data_iter = TorchLoaderIter(dataset=data, batch_size=args.batch_size, sampler=RandomSampler(), num_workers=NUM_WORKERS, collate_fn=data.collate_fn) tester = Tester(data=data_iter, model=model, metrics=metrics, device=devices) tester.test() else: trainer = DistTrainer(train_data=train_data_iter, dev_data=dev_data_iter, model=model, optimizer=optimizer, loss=LossInForward(), batch_size_per_gpu=bsz, update_every=args.grad_accumulation, n_epochs=args.epoch, metrics=metrics, callbacks_master=master_callbacks, callbacks_all=all_callbacks, validate_every=5000, use_tqdm=True, fp16='O1' if args.fp16 else '') trainer.train(load_best_model=False)
# mode config.mode = args.mode config.setting = args.setting # save model if not os.path.exists(config.model_path): if config.model_path.__contains__("/"): os.makedirs(config.model_path, 0o777) else: os.mkdir(config.model_path) # fitlog dir logger.info(f"set fitlog dir to {args.fitlog_dir}") if not os.path.exists(args.fitlog_dir): os.mkdir(args.fitlog_dir) fitlog.set_log_dir(args.fitlog_dir) fitlog.add_hyper(args) if not os.path.exists(config.model_path): os.mkdir(config.model_path) if args.visible_gpu != -1: config.use_gpu = True torch.cuda.set_device(args.visible_gpu) device = torch.device(args.visible_gpu) else: config.use_gpu = False mode = args.mode logger.info("------start mode train------") run_train()
parser.add_argument('--random-search', type=bool, default=False) parser.add_argument('--verbose', type=int, default=1) # bitempered loss parser.add_argument('--t1', type=float, default=1.) parser.add_argument('--t2', type=float, default=1.) parser.add_argument('--smooth-ratio', type=float, default=0.2) parser.add_argument('--gpu', type=str, default='0,1') args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu min_loss = 1e10 max_acc = 0. save_dir = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M') fitlog.set_log_dir('logs/', save_dir) fitlog.add_hyper(args) train = pd.read_csv('/media/limzero/qyl/leaf/dataset/train.csv') submission = pd.read_csv( '/media/limzero/qyl/leaf/dataset/sample_submission.csv') def seed_everything(seed): random.seed(seed) os.environ['PYTHONHASHSEED'] = str(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = True
import fitlog fitlog.commit(__file__) # auto commit your codes fitlog.set_log_dir('logs/') # set the logging directory fitlog.add_hyper_in_file(__file__) # record your hyper-parameters """ Your training code here, you may use these functions to log your result: fitlog.add_hyper() fitlog.add_loss() fitlog.add_metric() fitlog.add_best_metric() ...... """ fitlog.finish() # finish the logging
def main(): args = parse_args() if args.debug: fitlog.debug() fitlog.set_log_dir(args.log_dir) fitlog.commit(__file__) fitlog.add_hyper_in_file(__file__) fitlog.add_hyper(args) if args.gpu != 'all': os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_set, dev_set, test_set, temp_ent_vocab = load_fewrel_graph_data( data_dir=args.data_dir) print('data directory: {}'.format(args.data_dir)) print('# of train samples: {}'.format(len(train_set))) print('# of dev samples: {}'.format(len(dev_set))) print('# of test samples: {}'.format(len(test_set))) ent_vocab, rel_vocab = load_ent_rel_vocabs(path='../') # load entity embeddings ent_index = [] for k, v in temp_ent_vocab.items(): ent_index.append(ent_vocab[k]) ent_index = torch.tensor(ent_index) ent_emb = np.load(os.path.join(args.model_path, 'entities.npy')) ent_embedding = nn.Embedding.from_pretrained(torch.from_numpy(ent_emb)) ent_emb = ent_embedding(ent_index.view(1, -1)).squeeze().detach() # load CoLAKE parameters config = RobertaConfig.from_pretrained('roberta-base', type_vocab_size=3) model = CoLAKEForRE(config, num_types=len(train_set.label_vocab), ent_emb=ent_emb) states_dict = torch.load(os.path.join(args.model_path, 'model.bin')) model.load_state_dict(states_dict, strict=False) print('parameters below are randomly initializecd:') for name, param in model.named_parameters(): if name not in states_dict: print(name) # tie relation classification head rel_index = [] for k, v in train_set.label_vocab.items(): rel_index.append(rel_vocab[k]) rel_index = torch.LongTensor(rel_index) rel_embeddings = nn.Embedding.from_pretrained( states_dict['rel_embeddings.weight']) rel_index = rel_index.cuda() rel_cls_weight = rel_embeddings(rel_index.view(1, -1)).squeeze() model.tie_rel_weights(rel_cls_weight) model.rel_head.dense.weight.data = states_dict['rel_lm_head.dense.weight'] model.rel_head.dense.bias.data = states_dict['rel_lm_head.dense.bias'] model.rel_head.layer_norm.weight.data = states_dict[ 'rel_lm_head.layer_norm.weight'] model.rel_head.layer_norm.bias.data = states_dict[ 'rel_lm_head.layer_norm.bias'] model.resize_token_embeddings( len(RobertaTokenizer.from_pretrained('roberta-base')) + 4) print('parameters of CoLAKE has been loaded.') # fine-tune no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight', 'embedding'] param_optimizer = list(model.named_parameters()) optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': args.weight_decay }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] optimizer = optim.AdamW(optimizer_grouped_parameters, lr=args.lr, betas=(0.9, args.beta), eps=1e-6) metrics = [MacroMetric(pred='pred', target='target')] test_data_iter = TorchLoaderIter(dataset=test_set, batch_size=args.batch_size, sampler=RandomSampler(), num_workers=4, collate_fn=test_set.collate_fn) devices = list(range(torch.cuda.device_count())) tester = Tester(data=test_data_iter, model=model, metrics=metrics, device=devices) # tester.test() fitlog_callback = FitlogCallback(tester=tester, log_loss_every=100, verbose=1) gradient_clip_callback = GradientClipCallback(clip_value=1, clip_type='norm') warmup_callback = WarmupCallback(warmup=args.warm_up, schedule='linear') bsz = args.batch_size // args.grad_accumulation train_data_iter = TorchLoaderIter(dataset=train_set, batch_size=bsz, sampler=RandomSampler(), num_workers=4, collate_fn=train_set.collate_fn) dev_data_iter = TorchLoaderIter(dataset=dev_set, batch_size=bsz, sampler=RandomSampler(), num_workers=4, collate_fn=dev_set.collate_fn) trainer = Trainer( train_data=train_data_iter, dev_data=dev_data_iter, model=model, optimizer=optimizer, loss=LossInForward(), batch_size=bsz, update_every=args.grad_accumulation, n_epochs=args.epoch, metrics=metrics, callbacks=[fitlog_callback, gradient_clip_callback, warmup_callback], device=devices, use_tqdm=True) trainer.train(load_best_model=False)
"--dataset", type=str, ) parser.add_argument("--learning_rate", default=0.001, type=float) parser.add_argument("--l2reg", default=0.00001, type=float) parser.add_argument("--num_epoch", default=100, type=int) parser.add_argument("--batch_size", default=32, type=int) parser.add_argument("--embed_dim", default=300, type=int) parser.add_argument("--hidden_dim", default=300, type=int) parser.add_argument("--dropout", default=0.7, type=float) opt = parser.parse_args() # opt--->all args if opt.dataset.endswith("/"): opt.dataset = opt.dataset[:-1] ################fitlog code#################### fitlog.set_log_dir("logs") fitlog.set_rng_seed() fitlog.add_hyper(opt) fitlog.add_hyper(value="ASGCN", name="model") ################fitlog code#################### opt.polarities_dim = 3 opt.initializer = "xavier_uniform_" opt.optimizer = "adam" opt.model_name = "asgcn" opt.log_step = 20 opt.l2reg = 1e-5 opt.early_stop = 25 if "/" in opt.dataset: pre_model_name, layer, dataset = opt.dataset.split("/")[-3:] else:
SequentialSampler, SortedSampler, Trainer, WarmupCallback, cache_results, ) from fastNLP.embeddings import BertWordPieceEncoder, RobertaWordPieceEncoder from fitlog import _committer from torch import optim, nn from transformers import XLMRobertaModel, XLNetModel from pipe import ResPipe # fitlog.debug() os.makedirs("./FT_logs", exist_ok=True) fitlog.set_log_dir("FT_logs") import argparse parser = argparse.ArgumentParser() parser.add_argument( "--dataset", type=str, default="Laptop", choices=[ "Restaurants", "Laptop", "Tweets", "fr", "sp", "dutch",
from transformers import RobertaTokenizer, BertTokenizer, AdamW from create_graph import iter_data from util import evaluate from hotpot_evaluate_v1 import eval from config import set_config import fitlog logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) logger = logging.getLogger(__name__) IGNORE_INDEX = -100 os.makedirs("./logs", exist_ok=True) fitlog.set_log_dir("./logs") def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, orig_answer_text): """Returns tokenized answer spans that better match the annotated answer.""" # The SQuAD annotations are character based. We first project them to # whitespace-tokenized words. But then after WordPiece tokenization, we can # often find a "better match". For example: # # Question: What year was John Smith born? # Context: The leader was John Smith (1895-1943). # Answer: 1895 # # The original whitespace-tokenized answer will be "(1895-1943).". However # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match
import fitlog use_fitlog = True if not use_fitlog: fitlog.debug() fitlog.set_log_dir('../logs') load_dataset_seed = 100 fitlog.add_hyper(load_dataset_seed, 'load_dataset_seed') fitlog.set_rng_seed(load_dataset_seed) import sys sys.path.append('../') from load_data import * import argparse from paths import * from fastNLP.core import Trainer # from trainer import Trainer from fastNLP.core import Callback from V0.models import Lattice_Transformer_SeqLabel, Transformer_SeqLabel import torch import collections import torch.optim as optim import torch.nn as nn from fastNLP import LossInForward from fastNLP.core.metrics import SpanFPreRecMetric, AccuracyMetric from fastNLP.core.callback import WarmupCallback, GradientClipCallback, EarlyStopCallback, FitlogCallback from fastNLP import LRScheduler from torch.optim.lr_scheduler import LambdaLR # from models import LSTM_SeqLabel,LSTM_SeqLabel_True from fastNLP import logger from utils import get_peking_time
import os if 'p' in os.environ: os.environ['CUDA_VISIBLE_DEVICES']=os.environ['p'] import torch from torch import optim from fastNLP import Trainer, CrossEntropyLoss from fastNLP import BucketSampler, cache_results, WarmupCallback, GradientClipCallback, FitlogCallback from mono.data.pipe import ENBertPipe from mono.model.bert import ENBertReverseDict import fitlog from mono.model.metrics import MonoMetric from joint.data.utils import clip_max_length # fitlog.debug() fitlog.set_log_dir('en_logs') fitlog.add_hyper_in_file(__file__) fitlog.add_other('uncased', name='note') paths = '../data/en' #######hyper model_name = 'bert' max_word_len = 5 lr = 2e-5 batch_size = 64 n_epochs = 10 #######hyper pre_name = 'bert-base-uncased' # 在transformers中的名字叫做bert-base-cased
sys.path.append('../') import os if 'p' in os.environ: os.environ['CUDA_VISIBLE_DEVICES'] = os.environ['p'] import torch from torch import optim from fastNLP import Trainer, CrossEntropyLoss from fastNLP import BucketSampler, cache_results, WarmupCallback, GradientClipCallback, FitlogCallback from mono.data.pipe import CNBertPipe from mono.model.bert import CNBertReverseDict import fitlog from mono.model.metrics import MonoMetric from joint.data.utils import clip_max_length # fitlog.debug() fitlog.set_log_dir('cn_logs') fitlog.add_hyper_in_file(__file__) paths = '../data/cn' #########hyper model_name = 'bert' # change this to roberta, you can run the roberta version max_word_len = 4 lr = 5e-5 batch_size = 80 n_epochs = 20 #########hyper if model_name == 'bert': pre_name = 'bert-chinese-wwm' elif model_name == 'roberta': pre_name = 'chinese_roberta_wwm_ext' else:
import fitlog use_fitlog = False if not use_fitlog: fitlog.debug() fitlog.set_log_dir('logs') load_dataset_seed = 100 fitlog.add_hyper(load_dataset_seed, 'load_dataset_seed') fitlog.set_rng_seed(load_dataset_seed) import sys sys.path.append('../') import argparse from fastNLP.core import Trainer from fastNLP.core import Callback from fastNLP import LossInForward from fastNLP.core.metrics import SpanFPreRecMetric, AccuracyMetric from fastNLP.core.callback import WarmupCallback, GradientClipCallback, EarlyStopCallback, FitlogCallback from fastNLP import LRScheduler from fastNLP import logger import torch import torch.optim as optim import torch.nn as nn from torch.optim.lr_scheduler import LambdaLR import collections from load_data import *
print(f'fold {fold} epoch {epoch}, valid dice {dice:.4f}') if __name__ == "__main__": seed_everything(seed=args.seed) save_dir = None group = GroupKFold(n_splits=5) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # fitlog 初始化 if args.find_lr: fitlog.debug() fitlog.set_log_dir('aug/') fitlog.add_hyper(args) # fitlog.add_hyper({'v2':True}) # fitlog.commit(__file__) #自动保存对应版本训练代码 epochs = args.epochs if os.path.exists(f'{args.data_root}/train.csv'): train = pd.read_csv(f'{args.data_root}/train.csv') else: imgs = os.listdir(f'{args.data_root}/train/') train = pd.DataFrame(imgs, columns=['id']) train['parent'] = train['id'].apply(lambda x: x.split('_')[0]) train.to_csv(f'{args.data_root}/train.csv', index=False) dice = []