os.makedirs(output_path) args.output_file = os.path.join(output_path, suffix) if args.started_sentence_id==1 and os.path.exists(args.output_file): os.remove(args.output_file) print('The output file is ', args.output_file) args.input_file = os.path.join(args.input_file, f'''{args.dataset}/{args.keywords}keywords.txt''') device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("device:", device) if args.random==0: classifier_model_path = '../checkpoints/xlnet_classifier/{}'.format(args.dataset) args.classifier_model_path = classifier_model_path classifier_model = XLNetForTokenClassification.from_pretrained(classifier_model_path,num_labels=4) classifier_model_tokenizer = XLNetTokenizer.from_pretrained(classifier_model_path) logger.logger.info('Initialize backward XLNetForTokenClassification from checkpoint {}.'.format(classifier_model_path)) classifier_model = classifier_model.to(device) classifier_model.eval() else: classifier_model = None classifier_model_tokenizer = None if args.model_name == 'LSTMLMGenerate': forward_lm_path = '../checkpoints/forward_lstm_lm/{}/best.pt'.format(args.dataset) backward_lm_path = '../checkpoints/backward_lstm_lm/{}/best.pt'.format(args.dataset) args.forward_lm_path = forward_lm_path args.backward_lm_path = backward_lm_path
for i, j in enumerate(sep_sentence[1:-2]): out[i + pad_num + 1] = max(char_label[current_idx:current_idx + len(j)]) if j == "<unk>": current_idx = current_idx + 1 else: current_idx = current_idx + len(j) return out.tolist() config = AutoConfig.from_pretrained(model_path) tokenizer = XLNetTokenizer.from_pretrained(model_path, unk_token=unk_token) model = XLNetForTokenClassification.from_pretrained(model_path, num_labels=13) if torch.cuda.is_available(): device = torch.device("cuda") print('There are %d GPU(s) available.' % torch.cuda.device_count()) print('We will use the GPU:', torch.cuda.get_device_name(0)) else: print('No GPU available, using the CPU instead.') # device = torch.device("cpu") model.to(device) train_input_ids = [] train_labels = [] train_masks = []
args.model_path = model_path args.log_path = log_path if not os.path.exists(log_path): os.makedirs(log_path) log_file = '{}/{}.log'.format(log_path, args.dataset) print('The log file is ', log_file) logger = Logger(log_file) logger.logger.info(args) if not os.path.exists(model_path): os.makedirs(model_path) try: # load the pre-trained model and tokenizer tokenizer = XLNetTokenizer.from_pretrained(args.model_path) model = XLNetForTokenClassification.from_pretrained( args.model_path, num_labels=args.num_labels) logger.logger.info('Initialize XLNet from checkpoint {}.'.format( args.model_path)) except: tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased') model = XLNetForTokenClassification.from_pretrained( 'xlnet-base-cased', num_labels=args.num_labels) logger.logger.info('Initialize XLNet with default parameters.') model = XLNetClassifier(model) """ copy: 0 replace: 1 insert: 2 delete: 3 """