def main(test_file, config): params = load_config(config) print(params) set_seed(params.seed) tokenizer = transformers.AutoTokenizer.from_pretrained( params.output.tokenizer_dir) model = transformers.TFAutoModelWithLMHead.from_pretrained( params.output.model_dir) test_texts = load_dataset(test_file) x_test, y_test = build_data(tokenizer, test_texts, params.block_size) # Create optimizer loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True) # optimizer = keras.optimizers.Adam() model.compile( loss=[loss, *[None] * model.config.n_layer], metrics=[ keras.metrics.SparseCategoricalCrossentropy(from_logits=True), keras.metrics.SparseCategoricalAccuracy(), ], ) # Evaluate best model with test set res = model.evaluate(x_test, y_test) print(res)
def main(): args = get_args() args.n_gpu = 1 set_seed(args) # Construct tokenizer tokenizer = CharTokenizer([]) tokenizer.load(args.load_vocab) args.vocab_size = len(tokenizer) logger.info(f"args: {json.dumps(args.__dict__, indent=2, sort_keys=True)}") # GPU setting os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Construct model model = TransformerModel( vocab_size=args.vocab_size, hidden_size=args.hidden_size, num_attention_heads=args.num_attention_heads, num_encoder_layers=args.num_encoder_layers, num_decoder_layers=args.num_decoder_layers, intermediate_size=args.intermediate_size, dropout=args.dropout, ).to(args.device) logger.info( f"# of model parameters: {sum(p.numel() for p in model.parameters()) * 1e-6:.2f}M" ) # Load data noisy_sents = read_strings(os.path.join('sejong_corpus', args.noisy_file)) clean_sents = read_strings(os.path.join('sejong_corpus', args.clean_file)) sents_annotation = ['None'] * len(noisy_sents) pairs = [{ "noisy": noisy, "clean": clean, "annotation": annot } for noisy, clean, annot in zip(noisy_sents, clean_sents, sents_annotation)] # Train-validation split train_data, valid_data = train_test_split( pairs, test_size=args.val_ratio, random_state=args.seed) # test: about 1000 logger.info(f"# of train data: {len(train_data)}") logger.info(f"# of valid data: {len(valid_data)}") train(model, tokenizer, train_data, valid_data, args, eos=args.eos_setting)
def main(): set_seed(seed=1234) NUM_WORKERS = os.cpu_count() * 3 classes = ["car", "motorcycle", "bus", "bicycle", "truck", "pedestrian", "other_vehicle", "animal", "emergency_vehicle"] #train_dataset = LyftDataset(data_path='.', json_path='../input/3d-object-detection-for-autonomous-vehicles/train_data', verbose=True) #level5data = LyftTestDataset(data_path='.', json_path='../input/3d-object-detection-for-autonomous-vehicles/test_data', verbose=True) class_heights = {'animal':0.51,'bicycle':1.44,'bus':3.44,'car':1.72,'emergency_vehicle':2.39,'motorcycle':1.59, 'other_vehicle':3.23,'pedestrian':1.78,'truck':3.44} # load data data_folder = os.path.join(OUTPUT_ROOT, "bev_test_1024") # choose test samples input_filepaths = sorted(glob.glob(os.path.join(data_folder, "*_input.png"))) sample_tokens = [x.split("/")[-1].replace("_input.png","") for x in input_filepaths] sample_tokens = [x.replace("bev_data\\","") for x in sample_tokens] df = pd.read_csv('folds/test_host_scenes.csv') print(df.head()) # model model = get_smp_model(encoder='resnext101', num_classes=len(classes)+1) # load model checkpoint checkpoint= f'{OUTPUT_ROOT}/checkpoints/unet_4_32_768_fold_3/unet_4_32_1024_fold_3_epoch_15.pth' load_model(model, checkpoint) model = model.to(device) model.eval() test_dataset = BEVTestDataset(sample_tokens = samples_test, debug=True, img_size=IMG_SIZE, input_dir: str, transforms = albu_test_tansforms) im, sample_token = test_dataset[1] im = im.numpy() plt.figure(figsize=(16,8)) # Transpose the input volume CXY to XYC order, which is what matplotlib requires. plt.imshow(im.transpose(1,2,0)[...,:3]) plt.title(sample_token) plt.show() visualize_lidar_of_sample(sample_token) box_scale = 0.8
parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank") args = parser.parse_args() device = torch.device("cpu") args.n_gpu = torch.cuda.device_count() print(device) print(args.n_gpu) args.device = device set_seed(args) dataProcessor = DataProcessor() dev_eg = dataProcessor.get_dev_examples() train_eg = dataProcessor.get_train_examples() test_eg = dataProcessor.get_test_dataset() tokenizer = BertTokenizer.from_pretrained('bert-large-uncased') train_dataset = convert_features_to_dataset(convert_examples_to_features( examples=train_eg, label2id=LABEL2ID, max_seq_length=121, tokenizer=tokenizer)) dev_dataset = convert_features_to_dataset(convert_examples_to_features(
def run(): parser = ArgumentParser() parser.add_argument("--run_name", type=str, default='run1', help="The name of the run (subdirectory in ./runs)") parser.add_argument("--model", type=str, default="openai-gpt", help="Model type (openai-gpt or gpt2)", choices=['openai-gpt', 'gpt2']) parser.add_argument("--max_history", type=int, default=2, help="Number of previous utterances to keep in history") parser.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu", help="Device (cuda or cpu)") parser.add_argument("--no_sample", action='store_true', help="Set to use greedy decoding instead of sampling") parser.add_argument("--max_length", type=int, default=80, help="Maximum length of the output utterances") parser.add_argument("--seed", type=int, default=42, help="Seed") parser.add_argument("--temperature", type=int, default=1.0, help="Sampling softmax temperature") parser.add_argument("--top_k", type=int, default=0, help="Filter top-k tokens before sampling (<=0: no filtering)") parser.add_argument("--top_p", type=float, default=0.8, help="Nucleus filtering (top-p) before sampling (<=0.0: no filtering)") parser.add_argument("--no_info", action='store_true', default=False, help="Only show conversation output") args = parser.parse_args() # set seed set_seed(args) logger.info("Get pretrained model and tokenizer") model_path = os.path.join('runs', args.run_name) tokenizer_class, model_class = (GPT2Tokenizer, GPT2LMHeadModel) if args.model == 'gpt2' else (OpenAIGPTTokenizer, OpenAIGPTLMHeadModel) tokenizer = tokenizer_class.from_pretrained(model_path) model = model_class.from_pretrained(model_path) model.to(args.device) history = [] personality = [] speaker1_tag = '<speaker1>' speaker2_tag = '<speaker2>' speaker1_tag_id = tokenizer.convert_tokens_to_ids(speaker1_tag) speaker2_tag_id = tokenizer.convert_tokens_to_ids(speaker2_tag) history = f""" {speaker2_tag} Hi! {speaker1_tag} Hello {speaker2_tag} Are you ready? {speaker1_tag} Yes! {speaker2_tag} Ok let's start chatting {speaker1_tag} Sure, what do you want to talk about?""" print(history) print('\n[Chat with the model! Send "h" to see the full history]\n') history = history.split('\n') while True: message = None while not message: message = input(f'{speaker2_tag} ') if message == 'h': print('\n'.join(history)) message = None # add new message to history history.append(f'{speaker2_tag} {message}') # keep only most recent conversation as input to the model recent_history = history[-(2*args.max_history):] # concatenate history into single string and add trigger word "bot:" history_str = '{}\n{}'.format('\n'.join(recent_history), speaker1_tag) # tokenize text and convert into vocabulary ids (input ids) history_enc = tokenizer.encode(history_str, add_special_tokens=True) with torch.no_grad(): out_ids = sample_sequence(history_enc, model, args) out_ids = out_ids[:, len(history_enc):].tolist()[0] if not args.no_info: print(20*'-') print('Output of model:') full_output = tokenizer.decode(out_ids, clean_up_tokenization_spaces=True) print(full_output) print('\nInput to the model:') print(history_str) print(20*'-' + '\n') # Select part before speaker tags as answer for i, out_id in enumerate(out_ids): if out_id in [speaker1_tag_id, speaker2_tag_id]: break answer = '{} {}'.format(speaker1_tag, tokenizer.decode(out_ids[:i])) print(answer) # add answer to history history.append(answer)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batch-size', default=50, type=int) parser.add_argument('--dropout', default=0.5, type=float) parser.add_argument('--epoch', default=20, type=int) parser.add_argument('--learning-rate', default=0.1, type=float) parser.add_argument("--mode", default="non-static", help="available models: rand, static, non-static") parser.add_argument('--num-feature-maps', default=100, type=int) parser.add_argument("--pretrained-word-vectors", default="fasttext", help="available models: fasttext, Word2Vec") parser.add_argument("--save-word-vectors", action='store_true', default=False, help='save trained word vectors') parser.add_argument("--predict", action='store_true', default=False, help='classify your sentence') args = parser.parse_args() # load data print("Load data...\n") texts, labels = dataset.load_data() print("Tokenizing...\n") tokenized_texts, word2idx, max_len = dataset.tokenize(texts) input_ids = dataset.encode(tokenized_texts, word2idx, max_len) train_inputs, val_inputs, train_labels, val_labels = train_test_split( input_ids, labels, test_size=0.1, random_state=42) print("Creating Dataloader...\n") train_dataloader, val_dataloader = dataset.data_loader( train_inputs, val_inputs, train_labels, val_labels, batch_size=args.batch_size) if args.mode == 'rand': # CNN-rand: Word vectors are randomly initialized. train.set_seed(42) cnn_model, optimizer = model.initilize_model( vocab_size=len(word2idx), embed_dim=300, learning_rate=args.learning_rate, dropout=args.dropout) train.train(cnn_model, optimizer, train_dataloader, val_dataloader, epochs=args.epoch) elif args.mode == 'static': # CNN-static: fastText pretrained word vectors are used and freezed during training. train.set_seed(42) embeddings = pretrained_vectors.get_embeddings( word2idx, args.pretrained_word_vectors) cnn_model, optimizer = model.initilize_model( pretrained_embedding=embeddings, freeze_embedding=True, learning_rate=args.learning_rate, dropout=args.dropout) train.train(cnn_model, optimizer, train_dataloader, val_dataloader, epochs=args.epoch) else: # CNN-non-static: fastText pretrained word vectors are fine-tuned during training. train.set_seed(42) embeddings = pretrained_vectors.get_embeddings( word2idx, args.pretrained_word_vectors) cnn_model, optimizer = model.initilize_model( pretrained_embedding=embeddings, freeze_embedding=False, learning_rate=args.learning_rate, dropout=args.dropout) train.train(cnn_model, optimizer, train_dataloader, val_dataloader, epochs=args.epoch) if args.save_word_vectors == True: save_embeddings.write_embeddings( 'trained_embeddings_{}.txt'.format(args.mode), cnn_model.embedding.weight.data, word2idx) if args.predict == True: x = input('영어 텍스트를 입력하세요! : ') x = str(x) train.predict(x, cnn_model, word2idx) while True: conti = input('계속하시겠습니까? (y/n) : ') if conti == 'y': x1 = input('영어 텍스트를 입력하세요! : ') x1 = str(x1) train.predict(x1, cnn_model, word2idx) else: break
def train(args, train_dataset, logger, model: PreTrainedModel, tokenizer: PreTrainedTokenizer) -> Tuple[int, float]: """ Main loop for training the model """ if args.local_rank in [-1, 0]: tb_writer = SummaryWriter() args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu) def collate(examples: List[torch.Tensor]): if tokenizer._pad_token is None: return pad_sequence(examples, batch_first=True) return pad_sequence(examples, batch_first=True, padding_value=tokenizer.pad_token_id) train_sampler = RandomSampler( train_dataset) if args.local_rank == -1 else DistributedSampler( train_dataset) train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size, collate_fn=collate, drop_last=True) if args.max_steps > 0: t_total = args.max_steps args.num_train_epochs = args.max_steps // ( len(train_dataloader) // args.gradient_accumulation_steps) + 1 else: t_total = len( train_dataloader ) // args.gradient_accumulation_steps * args.num_train_epochs model = model.module if hasattr( model, "module") else model # Take care of distributed/parallel training model.resize_token_embeddings(len(tokenizer)) # add_special_tokens_(model, tokenizer) # Prepare optimizer and schedule (linear warmup and decay) no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], "weight_decay": args.weight_decay, }, { "params": [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], "weight_decay": 0.0 }, ] optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total) # Check if saved optimizer or scheduler states exist if (args.model_name_or_path and os.path.isfile( os.path.join(args.model_name_or_path, "optimizer.pt")) and os.path.isfile( os.path.join(args.model_name_or_path, "scheduler.pt"))): # Load in optimizer and scheduler states optimizer.load_state_dict( torch.load(os.path.join(args.model_name_or_path, "optimizer.pt"))) scheduler.load_state_dict( torch.load(os.path.join(args.model_name_or_path, "scheduler.pt"))) if args.fp16: try: from apex import amp except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use fp16 training." ) model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level) # multi-gpu training (should be after apex fp16 initialization) if args.n_gpu > 1: model = torch.nn.DataParallel(model) # Distributed training (should be after apex fp16 initialization) if args.local_rank != -1: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) # Train! logger.info("***** Running training *****") logger.info(" Num examples = %d", len(train_dataset)) logger.info(" Num Epochs = %d", args.num_train_epochs) logger.info(" Instantaneous batch size per GPU = %d", args.per_gpu_train_batch_size) logger.info( " Total train batch size (w. parallel, distributed & accumulation) = %d", args.train_batch_size * args.gradient_accumulation_steps * (torch.distributed.get_world_size() if args.local_rank != -1 else 1), ) logger.info(" Gradient Accumulation steps = %d", args.gradient_accumulation_steps) logger.info(" Total optimization steps = %d", t_total) global_step = 0 epochs_trained = 0 steps_trained_in_current_epoch = 0 # Check if continuing training from a checkpoint if args.model_name_or_path and os.path.exists(args.model_name_or_path): try: # set global_step to gobal_step of last saved checkpoint from model path checkpoint_suffix = args.model_name_or_path.split("-")[-1].split( "/")[0] global_step = int(checkpoint_suffix) epochs_trained = global_step // (len(train_dataloader) // args.gradient_accumulation_steps) steps_trained_in_current_epoch = global_step % ( len(train_dataloader) // args.gradient_accumulation_steps) logger.info( " Continuing training from checkpoint, will skip to saved global_step" ) logger.info(" Continuing training from epoch %d", epochs_trained) logger.info(" Continuing training from global step %d", global_step) logger.info(" Will skip the first %d steps in the first epoch", steps_trained_in_current_epoch) except ValueError: logger.info(" Starting fine-tuning.") tr_loss, logging_loss = 0.0, 0.0 model.zero_grad() train_iterator = trange(epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]) set_seed(args) # Added here for reproducibility for _ in train_iterator: epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0]) for step, batch in enumerate(epoch_iterator): # Skip past any already trained steps if resuming training if steps_trained_in_current_epoch > 0: steps_trained_in_current_epoch -= 1 continue inputs, labels = (batch, batch) if inputs.shape[1] > 1024: continue inputs = inputs.to(args.device) labels = labels.to(args.device) model.train() outputs = model(inputs, labels=labels) loss = outputs[ 0] # model outputs are always tuple in transformers (see doc) if args.n_gpu > 1: loss = loss.mean( ) # mean() to average on multi-gpu parallel training if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps if args.fp16: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() tr_loss += loss.item() if (step + 1) % args.gradient_accumulation_steps == 0: if args.fp16: torch.nn.utils.clip_grad_norm_( amp.master_params(optimizer), args.max_grad_norm) else: torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step() # Update learning rate schedule model.zero_grad() global_step += 1 if args.local_rank in [ -1, 0 ] and args.logging_steps > 0 and global_step % args.logging_steps == 0: # Log metrics if ( args.local_rank == -1 and args.evaluate_during_training ): # Only evaluate when single GPU otherwise metrics may not average well results = evaluate(args, model, tokenizer) for key, value in results.items(): tb_writer.add_scalar("eval_{}".format(key), value, global_step) tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step) tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step) logging_loss = tr_loss if args.local_rank in [ -1, 0 ] and args.save_steps > 0 and global_step % args.save_steps == 0: checkpoint_prefix = "checkpoint" # Save model checkpoint output_dir = os.path.join( args.output_dir, "{}-{}".format(checkpoint_prefix, global_step)) os.makedirs(output_dir, exist_ok=True) model_to_save = ( model.module if hasattr(model, "module") else model ) # Take care of distributed/parallel training model_to_save.save_pretrained(output_dir) tokenizer.save_pretrained(output_dir) torch.save(args, os.path.join(output_dir, "training_args.bin")) logger.info("Saving model checkpoint to %s", output_dir) _rotate_checkpoints(args, checkpoint_prefix) torch.save(optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt")) torch.save(scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt")) logger.info("Saving optimizer and scheduler states to %s", output_dir) if args.max_steps > 0 and global_step > args.max_steps: epoch_iterator.close() break if args.max_steps > 0 and global_step > args.max_steps: train_iterator.close() break if args.local_rank in [-1, 0]: tb_writer.close() return global_step, tr_loss / global_step