Exemple #1
0
def initialize():
    global args
    global model
    global tokenizer
    global db

    # initialize args
    config = yaml.safe_load(open('config/config.yaml', 'r'))
    args = config['default']
    args['device'] = 'cuda' if torch.cuda.is_available() else 'cpu'

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__file__)
    logger.info(pformat(args))

    # initialize model and tokenizer
    logger.info("Get pretrained model and tokenizer")
    model_class, tokenizer_class = GPT2LMHeadModel, GPT2Tokenizer
    tokenizer = tokenizer_class.from_pretrained(args['model_checkpoint'])
    model = model_class.from_pretrained(args['model_checkpoint'])
    model.to(args['device'])
    model.eval()
    add_special_tokens_(model, tokenizer)

    # connect to database
    # db_config = config['mysql']
    # db = mysql.connector.connect(
    #   host=db_config['host'],
    #   user=db_config['user'],
    #   passwd=db_config['passwd'],
    #   database=db_config['database']
    # )

    logger.info("Initialization of model and tokenizer complete.")
Exemple #2
0
    def build_pretrain_feature_model(self):
        mn = self.args.pretrain_feature_model_name
        if 'albert' in mn:
            pretrain_feature_tokenizer = BertTokenizer.from_pretrained(mn)
            config = AlbertConfig.from_pretrained(mn)
            config.output_hidden_states = True
            self.pretrain_feature_model = AlbertModel.from_pretrained(
                mn, config=config).to(self.device)
        else:
            pretrain_feature_tokenizer = AutoTokenizer.from_pretrained(mn)
            config = AutoConfig.from_pretrained(mn)
            config.output_hidden_states = True
            self.pretrain_feature_model = AutoModel.from_pretrained(
                mn, config=config).to(self.device)
        self.pretrain_feature_model.requires_grad_(False)
        # self.pretrain_feature_model.requires_grad_(True)
        # pipeline input is raw data, we have ids, so direct use model
        # self.pretrain_feature_pipeline = Pipeline('feature-extraction',
        #        model=self.pretrain_feature_model, tokenizer=pretrain_feature_tokenizer)

        # TODO: pre calc feature and save to file, it use less memory for train and faster
        # XXX: only used this tokenizer vocab, did not used for byte pair split, now just split by space
        utils.add_special_tokens_(self.pretrain_feature_model,
                                  pretrain_feature_tokenizer)
        # FIXME: this changed args should saved to checkpoint file
        if self.args.pretrain_feature_type == 'mem_n2n':
            self.args.emb_dim = self.pretrain_feature_model.config.hidden_size
            self.args.d_model = self.pretrain_feature_model.config.hidden_size
        elif self.args.pretrain_feature_type == 'feature':
            self.args.emb_dim = self.pretrain_feature_model.config.hidden_size
        else:
            if self.pretrain_feature_model.base_model_prefix != 'bert':
                self.args.emb_dim = self.pretrain_feature_model.config.embedding_size
            else:
                self.args.emb_dim = self.pretrain_feature_model.config.hidden_size

        # XXX: for 'xlnet'
        # self.args.d_model = self.pretrain_feature_model.config.hidden_size

        if 'weight' in self.args.pretrain_feature_type:
            # few effects
            self.args.d_model = self.pretrain_feature_model.config.hidden_size
            self.args.n_head = self.pretrain_feature_model.config.num_attention_heads
            self.args.d_ff = self.pretrain_feature_model.config.intermediate_size
            self.args.factor_ff = False

        self.vocab = datasets.ChatVocab(pretrain_feature_tokenizer)
        self.input_dim = len(self.vocab)
        self.pad_idx = self.vocab.stoi(utils.PAD)
        self.embeddings = None
        # too slow
        # self.tokenizer = pretrain_feature_tokenizer.tokenize
        self.tokenizer = None
Exemple #3
0
    def build_pretrain_feature_model(self):
        mn = self.model_config.pretrain_feature_model_name
        if 'albert' in mn:
            pretrain_feature_tokenizer = BertTokenizer.from_pretrained(mn)
            config = AlbertConfig.from_pretrained(mn)
            config.output_hidden_states = True
            self.pretrain_feature_model = AlbertModel.from_pretrained(
                mn, config=config).to(self.device)
        else:
            pretrain_feature_tokenizer = AutoTokenizer.from_pretrained(mn)
            config = AutoConfig.from_pretrained(mn)
            config.output_hidden_states = True
            self.pretrain_feature_model = AutoModel.from_pretrained(
                mn, config=config).to(self.device)
        self.pretrain_feature_model.requires_grad_(False)
        # pipeline input is raw data, we have ids, so direct use model
        # self.pretrain_feature_pipeline = Pipeline('feature-extraction',
        #        model=self.pretrain_feature_model, tokenizer=pretrain_feature_tokenizer)

        # TODO: pre calc feature and save to file, it use less memory for train and faster
        # XXX: only used this tokenizer vocab, did not used for byte pair split, now just split by space
        utils.add_special_tokens_(self.pretrain_feature_model,
                                  pretrain_feature_tokenizer)
        # FIXME: this changed args should saved to checkpoint file
        # for use feature
        # self.args.emb_dim = self.pretrain_feature_model.config.hidden_size
        # self.model_config.emb_dim = self.pretrain_feature_model.config.hidden_size
        # for use emb
        self.args.emb_dim = self.pretrain_feature_model.config.embedding_size
        self.model_config.emb_dim = self.pretrain_feature_model.config.embedding_size

        self.vocab = datasets.ChatVocab(pretrain_feature_tokenizer)
        self.input_dim = len(self.vocab)
        self.pad_idx = self.vocab.stoi(utils.PAD)

        # pretrain_feature_model emb and weight no need anymore, use trained model
        self.pretrain_feature_model = None
        self.tokenizer = pretrain_feature_tokenizer.tokenize
Exemple #4
0
def train():
    parser = ArgumentParser()
    parser.add_argument(
        "--data_path",
        default=None,
        help=
        "Path to conversational data (by default will look for single file in ./data)"
    )
    parser.add_argument("--run_name",
                        type=str,
                        default='run1',
                        help="The name of the run (subdirectory in ./runs)")
    parser.add_argument(
        "--model",
        type=str,
        default="openai-gpt",
        choices=['openai-gpt', 'gpt2'],
        help=
        "Initialize model from path to checkpoint or with model name (openai-gpt/openai-gpt2)"
    )
    parser.add_argument("--save_every",
                        type=int,
                        default=50,
                        help="Save checkpoint every n updates steps.")
    parser.add_argument(
        "--max_input_length",
        type=int,
        default=400,
        help=
        "Number of tokens which will be fed into the model (reduce this number if you have memory constraints)"
    )
    parser.add_argument("--weight_decay",
                        default=0.0,
                        type=float,
                        help="Weight decay if we apply some.")
    parser.add_argument("--train_batch_size",
                        type=int,
                        default=4,
                        help="Batch size for training")
    parser.add_argument("--valid_batch_size",
                        type=int,
                        default=4,
                        help="Batch size for validation")
    parser.add_argument("--gradient_accumulation_steps",
                        type=int,
                        default=8,
                        help="Accumulate gradients on several steps")
    parser.add_argument("--lr", type=float, default=5e-5, help="Learning rate")
    parser.add_argument("--adam_epsilon",
                        default=1e-8,
                        type=float,
                        help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_norm",
                        type=float,
                        default=1.0,
                        help="Clipping gradient norm")
    parser.add_argument("--n_epochs",
                        type=int,
                        default=2,
                        help="Number of training epochs")
    parser.add_argument("--device",
                        type=str,
                        default="cuda" if torch.cuda.is_available() else "cpu",
                        help="Device (cuda or cpu)")
    parser.add_argument("--warmup_steps",
                        default=0,
                        type=int,
                        help="Linear warmup over warmup_steps.")
    parser.add_argument("--seed",
                        type=int,
                        default=42,
                        help="random seed for initialization")
    args = parser.parse_args()

    # Set seed
    set_seed(args.seed)

    # Load tokenizer
    logger.info("Prepare tokenizer, pretrained model and optimizer.")
    tokenizer_class = GPT2Tokenizer if "gpt2" in args.model else OpenAIGPTTokenizer  # cant use Autotokenizer because checkpoint could be a Path
    tokenizer = tokenizer_class.from_pretrained(args.model)
    # Load model
    model_class = GPT2LMHeadModel if "gpt2" in args.model else OpenAIGPTLMHeadModel
    model = model_class.from_pretrained(args.model)
    model.to(args.device)
    # Add special tokens if they are not already added
    add_special_tokens_(model, tokenizer)

    # Get data loaders
    logger.info("Prepare datasets")
    data_loader = get_data_loader(args, tokenizer)

    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            args.weight_decay,
        },
        {
            "params": [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0
        },
    ]
    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=args.lr,
                      eps=args.adam_epsilon)
    t_total = len(
        data_loader) // args.gradient_accumulation_steps * args.n_epochs
    # scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args.warmup_steps, t_total=t_total)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=args.warmup_steps,
        num_training_steps=t_total)

    # Train!
    logger.info("***** Running training *****")
    global_step = 0
    epochs_trained = 0
    steps_trained_in_current_epoch = 0
    # Check if continuing training from a checkpoint
    if os.path.exists(args.model):
        # set global_step to gobal_step of last saved checkpoint from model path
        global_step = int(args.model.split("-")[-1].split("/")[0])
        epochs_trained = global_step // (len(data_loader) //
                                         args.gradient_accumulation_steps)
        steps_trained_in_current_epoch = global_step % (
            len(data_loader) // args.gradient_accumulation_steps)
        logger.info(
            "Continuing training from checkpoint, will skip to saved global_step"
        )
        logger.info(f"Continuing training from epoch {epochs_trained}")
        logger.info(f"Continuing training from global step {global_step}")
        logger.info(
            f"Will skip the first {steps_trained_in_current_epoch} steps in the first epoch"
        )

    # Training loop
    model.zero_grad()
    epoch_pbar = trange(epochs_trained, int(args.n_epochs))
    av_loss = 0
    for current_epoch in epoch_pbar:
        epoch_pbar.set_description(
            f"Epoch [{current_epoch+1}/{args.n_epochs}]")
        pbar = tqdm(data_loader)
        for step, batch in enumerate(pbar):
            # Skip past any already trained steps if resuming training
            if steps_trained_in_current_epoch > 0:
                steps_trained_in_current_epoch -= 1
                continue
            model.train()
            inputs, labels = (batch, batch)
            inputs = inputs.to(args.device)
            labels = labels.to(args.device)
            loss, *_ = model(inputs, labels=labels)
            loss.backward()
            tr_loss = loss.item()
            av_loss = (step * av_loss + tr_loss) / (step + 1)
            pbar.set_description(f"Average loss: {av_loss:.4f}")
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_norm)
            if (step + 1) % args.gradient_accumulation_steps == 0:
                optimizer.step()
                scheduler.step()  # Update learning rate schedule
                model.zero_grad()
                global_step += 1
                if global_step % args.save_every == 0 and global_step > 0:
                    checkpoint_prefix = "checkpoint"
                    output_dir = os.path.join(
                        'runs', args.run_name,
                        "{}-{}".format(checkpoint_prefix, global_step))
                    if not os.path.exists(output_dir):
                        os.makedirs(output_dir)
                    logger.info(f"Saving model checkpoint to {output_dir}")
                    model.save_pretrained(output_dir)
                    tokenizer.save_pretrained(output_dir)
                    logger.info(
                        f"Saving optimizer and scheduler states to {output_dir}"
                    )
                    torch.save(optimizer.state_dict(),
                               os.path.join(output_dir, "optimizer.pt"))
                    torch.save(scheduler.state_dict(),
                               os.path.join(output_dir, "scheduler.pt"))

    # save model
    output_dir = os.path.join('runs', args.run_name)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    logger.info(f"Saving model checkpoint to {output_dir}")
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)
    # Good practice: save your training arguments together with the trained model
    torch.save(args, os.path.join(output_dir, "training_args.bin"))
def run():
    parser = ArgumentParser()
    parser.add_argument("--run_name",
                        type=str,
                        default='run1',
                        help="The name of the run (subdirectory in ./runs)")
    parser.add_argument(
        "--model",
        type=str,
        default="openai-gpt",
        help="Model type (openai-gpt or gpt2)",
        choices=['openai-gpt',
                 'gpt2'])  # anything besides gpt2 will load openai-gpt
    parser.add_argument(
        "--max_history",
        type=int,
        default=2,
        help="Number of previous utterances to keep in history")
    parser.add_argument("--device",
                        type=str,
                        default="cuda" if torch.cuda.is_available() else "cpu",
                        help="Device (cuda or cpu)")
    parser.add_argument("--no_sample",
                        action='store_true',
                        help="Set to use greedy decoding instead of sampling")
    parser.add_argument("--max_length",
                        type=int,
                        default=40,
                        help="Maximum length of the output utterances")
    parser.add_argument("--min_length",
                        type=int,
                        default=1,
                        help="Minimum length of the output utterances")
    parser.add_argument("--seed", type=int, default=0, help="Seed")
    parser.add_argument("--temperature",
                        type=int,
                        default=1,
                        help="Sampling softmax temperature")
    parser.add_argument(
        "--top_k",
        type=int,
        default=0,
        help="Filter top-k tokens before sampling (<=0: no filtering)")
    parser.add_argument(
        "--top_p",
        type=float,
        default=0.8,
        help="Nucleus filtering (top-p) before sampling (<=0.0: no filtering)")
    args = parser.parse_args()

    # set seed
    set_seed(args.seed)

    logger.info("Get pretrained model and tokenizer")
    model_path = os.path.join('runs', args.run_name)
    tokenizer_class, model_class = (
        GPT2Tokenizer,
        GPT2LMHeadModel) if args.model == 'gpt2' else (OpenAIGPTTokenizer,
                                                       OpenAIGPTLMHeadModel)
    tokenizer = tokenizer_class.from_pretrained(model_path)
    model = model_class.from_pretrained(model_path)
    model.to(args.device)
    add_special_tokens_(model, tokenizer)
    history = []
    while True:
        raw_text = input(">>> ")
        while not raw_text:
            print('Prompt should not be empty!')
            raw_text = input(">>> ")
        history.append(tokenizer.encode(raw_text))
        with torch.no_grad():
            out_ids = sample_sequence(history, tokenizer, model, args)
        history.append(out_ids)
        history = history[-(2 * args.max_history + 1):]
        out_text = tokenizer.decode(out_ids, skip_special_tokens=True)
        print(out_text)
def train():
    parser = ArgumentParser()
    parser.add_argument(
        "--data_path",
        type=str,
        default=None,
        help="Path to conversational data (by default will look for single file in ./data)",
    )
    parser.add_argument(
        "--run_name",
        type=str,
        default="run1",
        help="The name of the run (subdirectory in ./runs)",
    )
    parser.add_argument(
        "--model",
        type=str,
        default="openai-gpt",
        help="Initialize model from path to checkpoint or with model name (openai-gpt/openai-gpt2)",
    )
    parser.add_argument(
        "--save_every",
        type=int,
        default=100,
        help="Save checkpoint every n updates steps.",
    )
    parser.add_argument(
        "--start_from",
        type=int,
        default=0,
        help="Continue training from a checkpoint.",
    )
    parser.add_argument(
        "--num_candidates",
        type=int,
        default=2,
        help="Number of candidates for training",
    )
    parser.add_argument(
        "--max_history",
        type=int,
        default=2,
        help="Number of previous exchanges to keep in history",
    )
    parser.add_argument(
        "--max_input_length",
        type=int,
        default=200,
        help="Number of tokens which will be fed into the model (reduce this number if you have memory constraints)",
    )
    parser.add_argument(
        "--weight_decay", default=0.0, type=float, help="Weight decay if we apply some."
    )
    parser.add_argument(
        "--train_batch_size", type=int, default=4, help="Batch size for training"
    )
    parser.add_argument(
        "--gradient_accumulation_steps",
        type=int,
        default=8,
        help="Accumulate gradients on several steps",
    )
    parser.add_argument("--lr", type=float, default=6.25e-5, help="Learning rate")
    parser.add_argument(
        "--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer."
    )
    parser.add_argument(
        "--lm_coef", type=float, default=1.0, help="LM loss coefficient"
    )
    parser.add_argument(
        "--mc_coef", type=float, default=1.0, help="Multiple-choice loss coefficient"
    )
    parser.add_argument(
        "--max_norm", type=float, default=1.0, help="Clipping gradient norm"
    )
    parser.add_argument(
        "--n_epochs", type=int, default=3, help="Number of training epochs"
    )
    parser.add_argument(
        "--device",
        type=str,
        default="cuda" if torch.cuda.is_available() else "cpu",
        help="Device (cuda or cpu)",
    )
    parser.add_argument(
        "--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps."
    )
    parser.add_argument(
        "--seed", type=int, default=42, help="random seed for initialization"
    )
    parser.add_argument(
        "--use_huggingface_model",
        action="store_true",
        help="Start training from pre-trained model by Huggingface",
    )
    args = parser.parse_args()

    # Set seed
    set_seed(args.seed)

    if args.use_huggingface_model:
        args.model = download_pretrained_model()
        logger.info(f'Using pre-trained Personachat model {args.model}')

    # if args.model == "gpt2":
    #     tokenizer_class, model_class = GPT2Tokenizer, GPT2DoubleHeadsModel
    # elif args.model == "distilbert-base-multilingual-cased":
    #     tokenizer_class, model_class = tr.DistilBertTokenizer, tr.DistilBertForMaskedLM
    # elif args.model == "bert-base-multilingual-cased":
    #     tokenizer_class, model_class = tr.DistilBertTokenizer, tr.BertForMaskedLM
    # else:
    #     tokenizer_class, model_class = OpenAIGPTTokenizer, OpenAIGPTLMHeadModel
    # model_path = os.path.join("runs", args.run_name)
    # tokenizer = tokenizer_class.from_pretrained(args.model)
    # model = model_class.from_pretrained(args.model)
    tokenizer = tr.GPT2Tokenizer.from_pretrained(args.model)
    model = tr.GPT2DoubleHeadsModel.from_pretrained(args.model)
    model.to(args.device)
    # Add special tokens if they are not already added
    add_special_tokens_(model, tokenizer)

    # Get data loaders
    logger.info("Prepare datasets")
    train_loader = get_data_loader(args, tokenizer, use_cache=True)

    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [
                p
                for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay": args.weight_decay,
        },
        {
            "params": [
                p
                for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.0,
        },
    ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=args.lr, eps=args.adam_epsilon)
    t_total = len(train_loader) // args.gradient_accumulation_steps * args.n_epochs
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total
    )

    # Train!
    logger.info("***** Running training *****")
    global_step = 0
    epochs_trained = 0
    steps_trained_in_current_epoch = 0
    # Check if continuing training from a checkpoint
    if os.path.exists(args.model):
        # set global_step to gobal_step of last saved checkpoint from model path
        try:
            global_step = args.start_from
        except:
            global_step = 0
        epochs_trained = global_step // (
            len(train_loader) // args.gradient_accumulation_steps
        )
        steps_trained_in_current_epoch = global_step % (
            len(train_loader) // args.gradient_accumulation_steps
        )
        logger.info(
            "Continuing training from checkpoint, will skip to saved global_step"
        )
        logger.info(f"Continuing training from epoch {epochs_trained}")
        logger.info(f"Continuing training from global step {global_step}")
        logger.info(
            f"Will skip the first {steps_trained_in_current_epoch} steps in the first epoch"
        )

    # Training loop
    model.zero_grad()
    epoch_pbar = trange(epochs_trained, int(args.n_epochs))
    av_loss = 0
    for current_epoch in epoch_pbar:
        epoch_pbar.set_description(f"Epoch [{current_epoch + 1}/{args.n_epochs}]")
        pbar = tqdm(train_loader)
        for step, batch in enumerate(pbar):
            # Skip past any already trained steps if resuming training
            if steps_trained_in_current_epoch > 0:
                steps_trained_in_current_epoch -= 1
                continue
            model.train()
            batch = tuple(input_tensor.to(args.device) for input_tensor in batch)
            input_ids, mc_token_ids, lm_labels, mc_labels, token_type_ids = batch
            (lm_loss), (mc_loss), *_ = model(
                input_ids,
                token_type_ids=token_type_ids,
                mc_token_ids=mc_token_ids,
                mc_labels=mc_labels,
                lm_labels=lm_labels,
            )
            loss = (
                lm_loss * args.lm_coef + mc_loss * args.mc_coef
            ) / args.gradient_accumulation_steps
            loss.backward()
            tr_loss = loss.item()
            # caclulate exponential moving average
            av_loss = (step * av_loss + loss) / (step + 1)
            pbar.set_description(f"Average loss: {av_loss:.4f}")
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_norm)
            if (step + 1) % args.gradient_accumulation_steps == 0:
                optimizer.step()
                scheduler.step()  # Update learning rate schedule
                model.zero_grad()
                global_step += 1
                # if global_step % args.save_every == 0 and global_step > 0:
                #     checkpoint_prefix = "checkpoint"
                #     output_dir = os.path.join(
                #         "runs",
                #         args.run_name,
                #         "{}-{}".format(checkpoint_prefix, global_step),
                #     )
                #     if not os.path.exists(output_dir):
                #         os.makedirs(output_dir)
                #     logger.info(f"Saving model checkpoint to {output_dir}")
                #     model.save_pretrained(output_dir)
                #     tokenizer.save_pretrained(output_dir)
                #     logger.info(
                #         f"Saving optimizer and scheduler states to {output_dir}"
                #     )
                #     torch.save(
                #         optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt")
                #     )
                #     torch.save(
                #         scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt")
                #     )

    # save model
    output_dir = os.path.join("runs", args.run_name)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    logger.info(f"Saving model checkpoint to {output_dir}")
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)
    # Good practice: save your training arguments together with the trained model
    torch.save(args, os.path.join(output_dir, "training_args.bin"))
def run():
    parser = ArgumentParser()
    parser.add_argument(
        "--dataset_path",
        type=str,
        default="",
        help="Path or url of the dataset. If empty download from S3.")
    parser.add_argument(
        "--dataset_cache",
        type=str,
        default='./dataset_cache/dataset_cache_OpenAIGPTTokenizer',
        help="Path or url of the dataset cache")
    parser.add_argument("--model_checkpoint",
                        type=str,
                        default="./Model",
                        help="Path, url or short name of the model")
    parser.add_argument(
        "--max_history",
        type=int,
        default=2,
        help="Number of previous utterances to keep in history")
    parser.add_argument("--device",
                        type=str,
                        default="cuda" if torch.cuda.is_available() else "cpu",
                        help="Device (cuda or cpu)")

    parser.add_argument("--no_sample",
                        action='store_true',
                        help="Set to use greedy decoding instead of sampling")
    parser.add_argument("--max_length",
                        type=int,
                        default=20,
                        help="Maximum length of the output utterances")
    parser.add_argument("--min_length",
                        type=int,
                        default=1,
                        help="Minimum length of the output utterances")
    parser.add_argument("--seed", type=int, default=0, help="Seed")
    parser.add_argument("--temperature",
                        type=int,
                        default=0.7,
                        help="Sampling softmax temperature")
    parser.add_argument(
        "--top_k",
        type=int,
        default=0,
        help="Filter top-k tokens before sampling (<=0: no filtering)")
    parser.add_argument(
        "--top_p",
        type=float,
        default=0.9,
        help="Nucleus filtering (top-p) before sampling (<=0.0: no filtering)")
    args = parser.parse_args()

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__file__)
    logger.info(pformat(args))

    if args.seed != 0:
        random.seed(args.seed)
        torch.random.manual_seed(args.seed)
        torch.cuda.manual_seed(args.seed)

    #Loading model class and tokenizer
    logger.info("Get pretrained model and tokenizer")
    tokenizer_class, model_class = OpenAIGPTTokenizer, OpenAIGPTLMHeadModel
    tokenizer = tokenizer_class.from_pretrained(args.model_checkpoint)
    model = model_class.from_pretrained(args.model_checkpoint)
    model.to(args.device)
    add_special_tokens_(model, tokenizer)

    logger.info("Sample a personality")
    dataset = torch.load(args.dataset_cache)
    personalities = [
        dialog["personality"] for dataset in dataset.values()
        for dialog in dataset
    ]
    personality = random.choice(personalities)
    logger.info("Selected personality: %s",
                tokenizer.decode(chain(*personality)))

    history = []
    while True:
        raw_text = input(">>> ")
        while not raw_text:
            print('Prompt should not be empty!')
            raw_text = input(">>> ")
        history.append(tokenizer.encode(raw_text))
        with torch.no_grad():
            out_ids = sample_sequence(personality, history, tokenizer, model,
                                      args)
        history.append(out_ids)
        history = history[-(2 * args.max_history + 1):]
        out_text = tokenizer.decode(out_ids, skip_special_tokens=True)
        print(out_text)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__file__)
logger.info(pformat(args))

if args.seed != 0:
    random.seed(args.seed)
    torch.random.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

#Loading model class and tokenizer
logger.info("Get pretrained model and tokenizer")
tokenizer_class, model_class = OpenAIGPTTokenizer, OpenAIGPTLMHeadModel
tokenizer = tokenizer_class.from_pretrained(args.model_checkpoint)
model = model_class.from_pretrained(args.model_checkpoint)
model.to(args.device)
add_special_tokens_(model, tokenizer)

logger.info("Sample a personality")
dataset = torch.load(args.dataset_cache)
personalities = [
    dialog["personality"] for dataset in dataset.values() for dialog in dataset
]
personality = None
history = []

app = Flask(__name__)


@app.route("/")
def home():
    global personality
Exemple #9
0
def main():
    def get_item(data, item):
        if item in data:
            message = data[item]
        elif 'message' in data:
            if item in data['message']:
                message = data['message'][item]
        else:
            return None
        return message

    @RTMClient.run_on(event="message")
    async def slack_interact(**payload):
        data = payload['data']
        user = get_item(data, 'user')

        if user == SLACK_USER:
            print(f'Receiving new payload by user {user}')
            print(payload)
            print(history)

            web_client = payload['web_client']
            message = get_item(data, 'text')
            if message is None:
                return
            history.append(tokenizer.encode(message))
            with torch.no_grad():
                out_ids = sample_sequence(history, tokenizer, model, args)
            history.append(out_ids)
            del history[:-(2 * args.max_history + 1)]
            out_text = tokenizer.decode(out_ids, skip_special_tokens=True)

            # respond
            channel_id = data['channel']
            await web_client.chat_postMessage(channel=channel_id,
                                              text=out_text)
        else:
            return

    parser = ArgumentParser()
    parser.add_argument("--run_name",
                        type=str,
                        default='run1',
                        help="The name of the run (subdirectory in ./runs)")
    parser.add_argument("--model",
                        type=str,
                        default="openai-gpt",
                        help="Model type (openai-gpt or gpt2)",
                        choices=['openai-gpt', 'gpt2'])
    parser.add_argument(
        "--max_history",
        type=int,
        default=2,
        help="Number of previous utterances to keep in history")
    parser.add_argument("--device",
                        type=str,
                        default="cuda" if torch.cuda.is_available() else "cpu",
                        help="Device (cuda or cpu)")
    parser.add_argument("--no_sample",
                        action='store_true',
                        help="Set to use greedy decoding instead of sampling")
    parser.add_argument("--max_length",
                        type=int,
                        default=40,
                        help="Maximum length of the output utterances")
    parser.add_argument("--min_length",
                        type=int,
                        default=1,
                        help="Minimum length of the output utterances")
    parser.add_argument("--seed", type=int, default=0, help="Seed")
    parser.add_argument("--temperature",
                        type=int,
                        default=1,
                        help="Sampling softmax temperature")
    parser.add_argument(
        "--top_k",
        type=int,
        default=0,
        help="Filter top-k tokens before sampling (<=0: no filtering)")
    parser.add_argument(
        "--top_p",
        type=float,
        default=0.8,
        help="Nucleus filtering (top-p) before sampling (<=0.0: no filtering)")
    args = parser.parse_args()

    # set seed
    set_seed(args)
    logger.info("Get pretrained model and tokenizer")
    model_path = os.path.join('runs', args.run_name)
    tokenizer_class, model_class = (
        GPT2Tokenizer,
        GPT2LMHeadModel) if args.model == 'gpt2' else (OpenAIGPTTokenizer,
                                                       OpenAIGPTLMHeadModel)
    tokenizer = tokenizer_class.from_pretrained(model_path)
    model = model_class.from_pretrained(model_path)
    model.to(args.device)
    add_special_tokens_(model, tokenizer)
    history = []

    # start RTM API
    loop = asyncio.get_event_loop()
    rtm_client = RTMClient(token=SLACK_API_TOKEN, run_async=True, loop=loop)
    loop.run_until_complete(rtm_client.start())