def initialize(): global args global model global tokenizer global db # initialize args config = yaml.safe_load(open('config/config.yaml', 'r')) args = config['default'] args['device'] = 'cuda' if torch.cuda.is_available() else 'cpu' logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__file__) logger.info(pformat(args)) # initialize model and tokenizer logger.info("Get pretrained model and tokenizer") model_class, tokenizer_class = GPT2LMHeadModel, GPT2Tokenizer tokenizer = tokenizer_class.from_pretrained(args['model_checkpoint']) model = model_class.from_pretrained(args['model_checkpoint']) model.to(args['device']) model.eval() add_special_tokens_(model, tokenizer) # connect to database # db_config = config['mysql'] # db = mysql.connector.connect( # host=db_config['host'], # user=db_config['user'], # passwd=db_config['passwd'], # database=db_config['database'] # ) logger.info("Initialization of model and tokenizer complete.")
def build_pretrain_feature_model(self): mn = self.args.pretrain_feature_model_name if 'albert' in mn: pretrain_feature_tokenizer = BertTokenizer.from_pretrained(mn) config = AlbertConfig.from_pretrained(mn) config.output_hidden_states = True self.pretrain_feature_model = AlbertModel.from_pretrained( mn, config=config).to(self.device) else: pretrain_feature_tokenizer = AutoTokenizer.from_pretrained(mn) config = AutoConfig.from_pretrained(mn) config.output_hidden_states = True self.pretrain_feature_model = AutoModel.from_pretrained( mn, config=config).to(self.device) self.pretrain_feature_model.requires_grad_(False) # self.pretrain_feature_model.requires_grad_(True) # pipeline input is raw data, we have ids, so direct use model # self.pretrain_feature_pipeline = Pipeline('feature-extraction', # model=self.pretrain_feature_model, tokenizer=pretrain_feature_tokenizer) # TODO: pre calc feature and save to file, it use less memory for train and faster # XXX: only used this tokenizer vocab, did not used for byte pair split, now just split by space utils.add_special_tokens_(self.pretrain_feature_model, pretrain_feature_tokenizer) # FIXME: this changed args should saved to checkpoint file if self.args.pretrain_feature_type == 'mem_n2n': self.args.emb_dim = self.pretrain_feature_model.config.hidden_size self.args.d_model = self.pretrain_feature_model.config.hidden_size elif self.args.pretrain_feature_type == 'feature': self.args.emb_dim = self.pretrain_feature_model.config.hidden_size else: if self.pretrain_feature_model.base_model_prefix != 'bert': self.args.emb_dim = self.pretrain_feature_model.config.embedding_size else: self.args.emb_dim = self.pretrain_feature_model.config.hidden_size # XXX: for 'xlnet' # self.args.d_model = self.pretrain_feature_model.config.hidden_size if 'weight' in self.args.pretrain_feature_type: # few effects self.args.d_model = self.pretrain_feature_model.config.hidden_size self.args.n_head = self.pretrain_feature_model.config.num_attention_heads self.args.d_ff = self.pretrain_feature_model.config.intermediate_size self.args.factor_ff = False self.vocab = datasets.ChatVocab(pretrain_feature_tokenizer) self.input_dim = len(self.vocab) self.pad_idx = self.vocab.stoi(utils.PAD) self.embeddings = None # too slow # self.tokenizer = pretrain_feature_tokenizer.tokenize self.tokenizer = None
def build_pretrain_feature_model(self): mn = self.model_config.pretrain_feature_model_name if 'albert' in mn: pretrain_feature_tokenizer = BertTokenizer.from_pretrained(mn) config = AlbertConfig.from_pretrained(mn) config.output_hidden_states = True self.pretrain_feature_model = AlbertModel.from_pretrained( mn, config=config).to(self.device) else: pretrain_feature_tokenizer = AutoTokenizer.from_pretrained(mn) config = AutoConfig.from_pretrained(mn) config.output_hidden_states = True self.pretrain_feature_model = AutoModel.from_pretrained( mn, config=config).to(self.device) self.pretrain_feature_model.requires_grad_(False) # pipeline input is raw data, we have ids, so direct use model # self.pretrain_feature_pipeline = Pipeline('feature-extraction', # model=self.pretrain_feature_model, tokenizer=pretrain_feature_tokenizer) # TODO: pre calc feature and save to file, it use less memory for train and faster # XXX: only used this tokenizer vocab, did not used for byte pair split, now just split by space utils.add_special_tokens_(self.pretrain_feature_model, pretrain_feature_tokenizer) # FIXME: this changed args should saved to checkpoint file # for use feature # self.args.emb_dim = self.pretrain_feature_model.config.hidden_size # self.model_config.emb_dim = self.pretrain_feature_model.config.hidden_size # for use emb self.args.emb_dim = self.pretrain_feature_model.config.embedding_size self.model_config.emb_dim = self.pretrain_feature_model.config.embedding_size self.vocab = datasets.ChatVocab(pretrain_feature_tokenizer) self.input_dim = len(self.vocab) self.pad_idx = self.vocab.stoi(utils.PAD) # pretrain_feature_model emb and weight no need anymore, use trained model self.pretrain_feature_model = None self.tokenizer = pretrain_feature_tokenizer.tokenize
def train(): parser = ArgumentParser() parser.add_argument( "--data_path", default=None, help= "Path to conversational data (by default will look for single file in ./data)" ) parser.add_argument("--run_name", type=str, default='run1', help="The name of the run (subdirectory in ./runs)") parser.add_argument( "--model", type=str, default="openai-gpt", choices=['openai-gpt', 'gpt2'], help= "Initialize model from path to checkpoint or with model name (openai-gpt/openai-gpt2)" ) parser.add_argument("--save_every", type=int, default=50, help="Save checkpoint every n updates steps.") parser.add_argument( "--max_input_length", type=int, default=400, help= "Number of tokens which will be fed into the model (reduce this number if you have memory constraints)" ) parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.") parser.add_argument("--train_batch_size", type=int, default=4, help="Batch size for training") parser.add_argument("--valid_batch_size", type=int, default=4, help="Batch size for validation") parser.add_argument("--gradient_accumulation_steps", type=int, default=8, help="Accumulate gradients on several steps") parser.add_argument("--lr", type=float, default=5e-5, help="Learning rate") parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.") parser.add_argument("--max_norm", type=float, default=1.0, help="Clipping gradient norm") parser.add_argument("--n_epochs", type=int, default=2, help="Number of training epochs") parser.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu", help="Device (cuda or cpu)") parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.") parser.add_argument("--seed", type=int, default=42, help="random seed for initialization") args = parser.parse_args() # Set seed set_seed(args.seed) # Load tokenizer logger.info("Prepare tokenizer, pretrained model and optimizer.") tokenizer_class = GPT2Tokenizer if "gpt2" in args.model else OpenAIGPTTokenizer # cant use Autotokenizer because checkpoint could be a Path tokenizer = tokenizer_class.from_pretrained(args.model) # Load model model_class = GPT2LMHeadModel if "gpt2" in args.model else OpenAIGPTLMHeadModel model = model_class.from_pretrained(args.model) model.to(args.device) # Add special tokens if they are not already added add_special_tokens_(model, tokenizer) # Get data loaders logger.info("Prepare datasets") data_loader = get_data_loader(args, tokenizer) # Prepare optimizer and schedule (linear warmup and decay) no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], "weight_decay": args.weight_decay, }, { "params": [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], "weight_decay": 0.0 }, ] optimizer = AdamW(optimizer_grouped_parameters, lr=args.lr, eps=args.adam_epsilon) t_total = len( data_loader) // args.gradient_accumulation_steps * args.n_epochs # scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args.warmup_steps, t_total=t_total) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total) # Train! logger.info("***** Running training *****") global_step = 0 epochs_trained = 0 steps_trained_in_current_epoch = 0 # Check if continuing training from a checkpoint if os.path.exists(args.model): # set global_step to gobal_step of last saved checkpoint from model path global_step = int(args.model.split("-")[-1].split("/")[0]) epochs_trained = global_step // (len(data_loader) // args.gradient_accumulation_steps) steps_trained_in_current_epoch = global_step % ( len(data_loader) // args.gradient_accumulation_steps) logger.info( "Continuing training from checkpoint, will skip to saved global_step" ) logger.info(f"Continuing training from epoch {epochs_trained}") logger.info(f"Continuing training from global step {global_step}") logger.info( f"Will skip the first {steps_trained_in_current_epoch} steps in the first epoch" ) # Training loop model.zero_grad() epoch_pbar = trange(epochs_trained, int(args.n_epochs)) av_loss = 0 for current_epoch in epoch_pbar: epoch_pbar.set_description( f"Epoch [{current_epoch+1}/{args.n_epochs}]") pbar = tqdm(data_loader) for step, batch in enumerate(pbar): # Skip past any already trained steps if resuming training if steps_trained_in_current_epoch > 0: steps_trained_in_current_epoch -= 1 continue model.train() inputs, labels = (batch, batch) inputs = inputs.to(args.device) labels = labels.to(args.device) loss, *_ = model(inputs, labels=labels) loss.backward() tr_loss = loss.item() av_loss = (step * av_loss + tr_loss) / (step + 1) pbar.set_description(f"Average loss: {av_loss:.4f}") torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_norm) if (step + 1) % args.gradient_accumulation_steps == 0: optimizer.step() scheduler.step() # Update learning rate schedule model.zero_grad() global_step += 1 if global_step % args.save_every == 0 and global_step > 0: checkpoint_prefix = "checkpoint" output_dir = os.path.join( 'runs', args.run_name, "{}-{}".format(checkpoint_prefix, global_step)) if not os.path.exists(output_dir): os.makedirs(output_dir) logger.info(f"Saving model checkpoint to {output_dir}") model.save_pretrained(output_dir) tokenizer.save_pretrained(output_dir) logger.info( f"Saving optimizer and scheduler states to {output_dir}" ) torch.save(optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt")) torch.save(scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt")) # save model output_dir = os.path.join('runs', args.run_name) if not os.path.exists(output_dir): os.makedirs(output_dir) logger.info(f"Saving model checkpoint to {output_dir}") model.save_pretrained(output_dir) tokenizer.save_pretrained(output_dir) # Good practice: save your training arguments together with the trained model torch.save(args, os.path.join(output_dir, "training_args.bin"))
def run(): parser = ArgumentParser() parser.add_argument("--run_name", type=str, default='run1', help="The name of the run (subdirectory in ./runs)") parser.add_argument( "--model", type=str, default="openai-gpt", help="Model type (openai-gpt or gpt2)", choices=['openai-gpt', 'gpt2']) # anything besides gpt2 will load openai-gpt parser.add_argument( "--max_history", type=int, default=2, help="Number of previous utterances to keep in history") parser.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu", help="Device (cuda or cpu)") parser.add_argument("--no_sample", action='store_true', help="Set to use greedy decoding instead of sampling") parser.add_argument("--max_length", type=int, default=40, help="Maximum length of the output utterances") parser.add_argument("--min_length", type=int, default=1, help="Minimum length of the output utterances") parser.add_argument("--seed", type=int, default=0, help="Seed") parser.add_argument("--temperature", type=int, default=1, help="Sampling softmax temperature") parser.add_argument( "--top_k", type=int, default=0, help="Filter top-k tokens before sampling (<=0: no filtering)") parser.add_argument( "--top_p", type=float, default=0.8, help="Nucleus filtering (top-p) before sampling (<=0.0: no filtering)") args = parser.parse_args() # set seed set_seed(args.seed) logger.info("Get pretrained model and tokenizer") model_path = os.path.join('runs', args.run_name) tokenizer_class, model_class = ( GPT2Tokenizer, GPT2LMHeadModel) if args.model == 'gpt2' else (OpenAIGPTTokenizer, OpenAIGPTLMHeadModel) tokenizer = tokenizer_class.from_pretrained(model_path) model = model_class.from_pretrained(model_path) model.to(args.device) add_special_tokens_(model, tokenizer) history = [] while True: raw_text = input(">>> ") while not raw_text: print('Prompt should not be empty!') raw_text = input(">>> ") history.append(tokenizer.encode(raw_text)) with torch.no_grad(): out_ids = sample_sequence(history, tokenizer, model, args) history.append(out_ids) history = history[-(2 * args.max_history + 1):] out_text = tokenizer.decode(out_ids, skip_special_tokens=True) print(out_text)
def train(): parser = ArgumentParser() parser.add_argument( "--data_path", type=str, default=None, help="Path to conversational data (by default will look for single file in ./data)", ) parser.add_argument( "--run_name", type=str, default="run1", help="The name of the run (subdirectory in ./runs)", ) parser.add_argument( "--model", type=str, default="openai-gpt", help="Initialize model from path to checkpoint or with model name (openai-gpt/openai-gpt2)", ) parser.add_argument( "--save_every", type=int, default=100, help="Save checkpoint every n updates steps.", ) parser.add_argument( "--start_from", type=int, default=0, help="Continue training from a checkpoint.", ) parser.add_argument( "--num_candidates", type=int, default=2, help="Number of candidates for training", ) parser.add_argument( "--max_history", type=int, default=2, help="Number of previous exchanges to keep in history", ) parser.add_argument( "--max_input_length", type=int, default=200, help="Number of tokens which will be fed into the model (reduce this number if you have memory constraints)", ) parser.add_argument( "--weight_decay", default=0.0, type=float, help="Weight decay if we apply some." ) parser.add_argument( "--train_batch_size", type=int, default=4, help="Batch size for training" ) parser.add_argument( "--gradient_accumulation_steps", type=int, default=8, help="Accumulate gradients on several steps", ) parser.add_argument("--lr", type=float, default=6.25e-5, help="Learning rate") parser.add_argument( "--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer." ) parser.add_argument( "--lm_coef", type=float, default=1.0, help="LM loss coefficient" ) parser.add_argument( "--mc_coef", type=float, default=1.0, help="Multiple-choice loss coefficient" ) parser.add_argument( "--max_norm", type=float, default=1.0, help="Clipping gradient norm" ) parser.add_argument( "--n_epochs", type=int, default=3, help="Number of training epochs" ) parser.add_argument( "--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu", help="Device (cuda or cpu)", ) parser.add_argument( "--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps." ) parser.add_argument( "--seed", type=int, default=42, help="random seed for initialization" ) parser.add_argument( "--use_huggingface_model", action="store_true", help="Start training from pre-trained model by Huggingface", ) args = parser.parse_args() # Set seed set_seed(args.seed) if args.use_huggingface_model: args.model = download_pretrained_model() logger.info(f'Using pre-trained Personachat model {args.model}') # if args.model == "gpt2": # tokenizer_class, model_class = GPT2Tokenizer, GPT2DoubleHeadsModel # elif args.model == "distilbert-base-multilingual-cased": # tokenizer_class, model_class = tr.DistilBertTokenizer, tr.DistilBertForMaskedLM # elif args.model == "bert-base-multilingual-cased": # tokenizer_class, model_class = tr.DistilBertTokenizer, tr.BertForMaskedLM # else: # tokenizer_class, model_class = OpenAIGPTTokenizer, OpenAIGPTLMHeadModel # model_path = os.path.join("runs", args.run_name) # tokenizer = tokenizer_class.from_pretrained(args.model) # model = model_class.from_pretrained(args.model) tokenizer = tr.GPT2Tokenizer.from_pretrained(args.model) model = tr.GPT2DoubleHeadsModel.from_pretrained(args.model) model.to(args.device) # Add special tokens if they are not already added add_special_tokens_(model, tokenizer) # Get data loaders logger.info("Prepare datasets") train_loader = get_data_loader(args, tokenizer, use_cache=True) # Prepare optimizer and schedule (linear warmup and decay) no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], "weight_decay": args.weight_decay, }, { "params": [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], "weight_decay": 0.0, }, ] optimizer = AdamW(optimizer_grouped_parameters, lr=args.lr, eps=args.adam_epsilon) t_total = len(train_loader) // args.gradient_accumulation_steps * args.n_epochs scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total ) # Train! logger.info("***** Running training *****") global_step = 0 epochs_trained = 0 steps_trained_in_current_epoch = 0 # Check if continuing training from a checkpoint if os.path.exists(args.model): # set global_step to gobal_step of last saved checkpoint from model path try: global_step = args.start_from except: global_step = 0 epochs_trained = global_step // ( len(train_loader) // args.gradient_accumulation_steps ) steps_trained_in_current_epoch = global_step % ( len(train_loader) // args.gradient_accumulation_steps ) logger.info( "Continuing training from checkpoint, will skip to saved global_step" ) logger.info(f"Continuing training from epoch {epochs_trained}") logger.info(f"Continuing training from global step {global_step}") logger.info( f"Will skip the first {steps_trained_in_current_epoch} steps in the first epoch" ) # Training loop model.zero_grad() epoch_pbar = trange(epochs_trained, int(args.n_epochs)) av_loss = 0 for current_epoch in epoch_pbar: epoch_pbar.set_description(f"Epoch [{current_epoch + 1}/{args.n_epochs}]") pbar = tqdm(train_loader) for step, batch in enumerate(pbar): # Skip past any already trained steps if resuming training if steps_trained_in_current_epoch > 0: steps_trained_in_current_epoch -= 1 continue model.train() batch = tuple(input_tensor.to(args.device) for input_tensor in batch) input_ids, mc_token_ids, lm_labels, mc_labels, token_type_ids = batch (lm_loss), (mc_loss), *_ = model( input_ids, token_type_ids=token_type_ids, mc_token_ids=mc_token_ids, mc_labels=mc_labels, lm_labels=lm_labels, ) loss = ( lm_loss * args.lm_coef + mc_loss * args.mc_coef ) / args.gradient_accumulation_steps loss.backward() tr_loss = loss.item() # caclulate exponential moving average av_loss = (step * av_loss + loss) / (step + 1) pbar.set_description(f"Average loss: {av_loss:.4f}") torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_norm) if (step + 1) % args.gradient_accumulation_steps == 0: optimizer.step() scheduler.step() # Update learning rate schedule model.zero_grad() global_step += 1 # if global_step % args.save_every == 0 and global_step > 0: # checkpoint_prefix = "checkpoint" # output_dir = os.path.join( # "runs", # args.run_name, # "{}-{}".format(checkpoint_prefix, global_step), # ) # if not os.path.exists(output_dir): # os.makedirs(output_dir) # logger.info(f"Saving model checkpoint to {output_dir}") # model.save_pretrained(output_dir) # tokenizer.save_pretrained(output_dir) # logger.info( # f"Saving optimizer and scheduler states to {output_dir}" # ) # torch.save( # optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt") # ) # torch.save( # scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt") # ) # save model output_dir = os.path.join("runs", args.run_name) if not os.path.exists(output_dir): os.makedirs(output_dir) logger.info(f"Saving model checkpoint to {output_dir}") model.save_pretrained(output_dir) tokenizer.save_pretrained(output_dir) # Good practice: save your training arguments together with the trained model torch.save(args, os.path.join(output_dir, "training_args.bin"))
def run(): parser = ArgumentParser() parser.add_argument( "--dataset_path", type=str, default="", help="Path or url of the dataset. If empty download from S3.") parser.add_argument( "--dataset_cache", type=str, default='./dataset_cache/dataset_cache_OpenAIGPTTokenizer', help="Path or url of the dataset cache") parser.add_argument("--model_checkpoint", type=str, default="./Model", help="Path, url or short name of the model") parser.add_argument( "--max_history", type=int, default=2, help="Number of previous utterances to keep in history") parser.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu", help="Device (cuda or cpu)") parser.add_argument("--no_sample", action='store_true', help="Set to use greedy decoding instead of sampling") parser.add_argument("--max_length", type=int, default=20, help="Maximum length of the output utterances") parser.add_argument("--min_length", type=int, default=1, help="Minimum length of the output utterances") parser.add_argument("--seed", type=int, default=0, help="Seed") parser.add_argument("--temperature", type=int, default=0.7, help="Sampling softmax temperature") parser.add_argument( "--top_k", type=int, default=0, help="Filter top-k tokens before sampling (<=0: no filtering)") parser.add_argument( "--top_p", type=float, default=0.9, help="Nucleus filtering (top-p) before sampling (<=0.0: no filtering)") args = parser.parse_args() logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__file__) logger.info(pformat(args)) if args.seed != 0: random.seed(args.seed) torch.random.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) #Loading model class and tokenizer logger.info("Get pretrained model and tokenizer") tokenizer_class, model_class = OpenAIGPTTokenizer, OpenAIGPTLMHeadModel tokenizer = tokenizer_class.from_pretrained(args.model_checkpoint) model = model_class.from_pretrained(args.model_checkpoint) model.to(args.device) add_special_tokens_(model, tokenizer) logger.info("Sample a personality") dataset = torch.load(args.dataset_cache) personalities = [ dialog["personality"] for dataset in dataset.values() for dialog in dataset ] personality = random.choice(personalities) logger.info("Selected personality: %s", tokenizer.decode(chain(*personality))) history = [] while True: raw_text = input(">>> ") while not raw_text: print('Prompt should not be empty!') raw_text = input(">>> ") history.append(tokenizer.encode(raw_text)) with torch.no_grad(): out_ids = sample_sequence(personality, history, tokenizer, model, args) history.append(out_ids) history = history[-(2 * args.max_history + 1):] out_text = tokenizer.decode(out_ids, skip_special_tokens=True) print(out_text)
logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__file__) logger.info(pformat(args)) if args.seed != 0: random.seed(args.seed) torch.random.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) #Loading model class and tokenizer logger.info("Get pretrained model and tokenizer") tokenizer_class, model_class = OpenAIGPTTokenizer, OpenAIGPTLMHeadModel tokenizer = tokenizer_class.from_pretrained(args.model_checkpoint) model = model_class.from_pretrained(args.model_checkpoint) model.to(args.device) add_special_tokens_(model, tokenizer) logger.info("Sample a personality") dataset = torch.load(args.dataset_cache) personalities = [ dialog["personality"] for dataset in dataset.values() for dialog in dataset ] personality = None history = [] app = Flask(__name__) @app.route("/") def home(): global personality
def main(): def get_item(data, item): if item in data: message = data[item] elif 'message' in data: if item in data['message']: message = data['message'][item] else: return None return message @RTMClient.run_on(event="message") async def slack_interact(**payload): data = payload['data'] user = get_item(data, 'user') if user == SLACK_USER: print(f'Receiving new payload by user {user}') print(payload) print(history) web_client = payload['web_client'] message = get_item(data, 'text') if message is None: return history.append(tokenizer.encode(message)) with torch.no_grad(): out_ids = sample_sequence(history, tokenizer, model, args) history.append(out_ids) del history[:-(2 * args.max_history + 1)] out_text = tokenizer.decode(out_ids, skip_special_tokens=True) # respond channel_id = data['channel'] await web_client.chat_postMessage(channel=channel_id, text=out_text) else: return parser = ArgumentParser() parser.add_argument("--run_name", type=str, default='run1', help="The name of the run (subdirectory in ./runs)") parser.add_argument("--model", type=str, default="openai-gpt", help="Model type (openai-gpt or gpt2)", choices=['openai-gpt', 'gpt2']) parser.add_argument( "--max_history", type=int, default=2, help="Number of previous utterances to keep in history") parser.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu", help="Device (cuda or cpu)") parser.add_argument("--no_sample", action='store_true', help="Set to use greedy decoding instead of sampling") parser.add_argument("--max_length", type=int, default=40, help="Maximum length of the output utterances") parser.add_argument("--min_length", type=int, default=1, help="Minimum length of the output utterances") parser.add_argument("--seed", type=int, default=0, help="Seed") parser.add_argument("--temperature", type=int, default=1, help="Sampling softmax temperature") parser.add_argument( "--top_k", type=int, default=0, help="Filter top-k tokens before sampling (<=0: no filtering)") parser.add_argument( "--top_p", type=float, default=0.8, help="Nucleus filtering (top-p) before sampling (<=0.0: no filtering)") args = parser.parse_args() # set seed set_seed(args) logger.info("Get pretrained model and tokenizer") model_path = os.path.join('runs', args.run_name) tokenizer_class, model_class = ( GPT2Tokenizer, GPT2LMHeadModel) if args.model == 'gpt2' else (OpenAIGPTTokenizer, OpenAIGPTLMHeadModel) tokenizer = tokenizer_class.from_pretrained(model_path) model = model_class.from_pretrained(model_path) model.to(args.device) add_special_tokens_(model, tokenizer) history = [] # start RTM API loop = asyncio.get_event_loop() rtm_client = RTMClient(token=SLACK_API_TOKEN, run_async=True, loop=loop) loop.run_until_complete(rtm_client.start())