コード例 #1
0
ファイル: semi_attack.py プロジェクト: chong-z/NLG-project
def main(args):
    set_rseed(args.rseed)

    if args.ppl_use:
        print(f'Loading GPT-2...')
        lm_scorer = LMScorer.from_pretrained("gpt2", device="cuda:0", batch_size=1)
        print(f'Loading UniversalSentenceEncoder...')
        u = UniversalSentenceEncoder()
        def ppl(s):
            return -lm_scorer.sentence_score(s, log=True)
        def use(s1, s2):
            return u.cos_sim(s1, s2)
    else:
        def ppl(s):
            return 0
        def use(s1, s2):
            return 0

    print(f'Loading the VAE model...')
    vae = load_vae_model_from_args(args)
    print(f'Loading the victim model from HuggingFace...')
    victim_model = load_huggingface_model_from_args(args)

    if args.n_eval > 0:
        print(f'\n-------Evaluation Mode-------')
        do_evaluation(vae, victim_model, ppl, use, args)
    else:
        print(f'\n-------Debug Mode-------')
        if args.reference_sentence is None:
            do_n_attacks(vae, args.victim_sentence, victim_model, ppl, use, args)
        else:
            do_one_attack(vae, args.victim_sentence, args.reference_sentence, victim_model, ppl, use, args)
コード例 #2
0
    def __init__(self, reduce_mode="gmean", device="cuda"):
        if device == "cpu":
            logger.warning("Running LMScorer on CPU. Scoring may be slow.")

        self.model = LMScorer.from_pretrained("gpt2",
                                              device=device,
                                              batch_size=1)
        self.reduce_mode = reduce_mode
        self.tokenizer = Tokenizer()
コード例 #3
0
    def __init__(self, model_scale):
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        #self.device = "cpu"

        print("Device is " + str(self.device))

        #self.tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
        #self.model = GPT2LMHeadModel.from_pretrained("distilgpt2")
        if model_scale == 0:
            self.tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
            self.model = AutoModelWithLMHead.from_pretrained("distilgpt2")
            self.scorer = LMScorer.from_pretrained("distilgpt2",
                                                   device=self.device)
        elif model_scale == 1:
            self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
            self.model = AutoModelWithLMHead.from_pretrained("gpt2")
            self.scorer = LMScorer.from_pretrained("gpt2", device=self.device)
        elif model_scale == 2:
            self.tokenizer = AutoTokenizer.from_pretrained("gpt2-medium")
            self.model = AutoModelWithLMHead.from_pretrained("gpt2-medium")
            self.scorer = LMScorer.from_pretrained("gpt2-medium",
                                                   device=self.device)
        elif model_scale == 3:
            self.tokenizer = AutoTokenizer.from_pretrained("gpt2-large")
            self.model = AutoModelWithLMHead.from_pretrained("gpt2-large")
            self.scorer = LMScorer.from_pretrained("gpt2-large",
                                                   device=self.device)
        else:
            self.tokenizer = AutoTokenizer.from_pretrained("gpt2-xl")
            self.model = AutoModelWithLMHead.from_pretrained("gpt2-xl")
            self.scorer = LMScorer.from_pretrained("gpt2-xl",
                                                   device=self.device)

        #self.model.eval()
        self.model.to(self.device)
コード例 #4
0
 def __init__(self,
              model,
              batch_model,
              neighbour_model,
              compute_dis,
              lm,
              max_iters,
              dataset,
              pop_size,
              n1,
              n2,
              n_prefix,
              n_suffix,
              use_lm=True,
              use_suffix=False):
     #        self.dist_mat = dist_mat
     self.compute_dist = compute_dis
     self.dataset = dataset
     self.dict = self.dataset.dict
     self.inv_dict = self.dataset.inv_dict
     #        self.skip_list = skip_list
     self.model = model
     self.batch_model = batch_model
     self.neighbour_model = neighbour_model
     #        self.sess = sess
     self.n_prefix = n_prefix
     self.n_suffix = n_suffix
     self.max_iters = max_iters
     self.pop_size = pop_size
     self.lm = lm
     self.top_n = n1  # similar words
     self.top_n1 = n1
     self.top_n2 = n2
     self.use_lm = use_lm
     self.use_suffix = use_suffix
     self.temp = 0.0003
     self.device = torch.device(
         'cuda:0' if torch.cuda.is_available() else 'cpu')
     batch_size = 1
     self.scorer = LMScorer.from_pretrained("gpt2",
                                            device=self.device,
                                            batch_size=batch_size)
コード例 #5
0
ファイル: score_sentences.py プロジェクト: izaskr/a3_iza
 def init_LM_scorer(self):
     device = "cuda:1"
     batch_size = 1
     self.scorer = LMScorer.from_pretrained("gpt2", device=device, batch_size=batch_size)
コード例 #6
0
 def should_throw_an_exception_when_called():
     with pytest.raises(EnvironmentError):
         AutoLMScorer()
コード例 #7
0
 def should_return_gpt2_models(mocker):
     mocker.patch.object(GPT2LMScorer, "__init__", return_value=None)
     for model_name in GPT2LMScorer.supported_model_names():
         scorer = AutoLMScorer.from_pretrained(model_name)
         assert isinstance(scorer, GPT2LMScorer), model_name
コード例 #8
0
 def should_throw_an_error_for_an_unsupported_model_name():
     with pytest.raises(ValueError):
         AutoLMScorer.from_pretrained("_")
コード例 #9
0
 def should_not_be_empty():
     assert len(list(AutoLMScorer.supported_model_names())) > 0
コード例 #10
0
            chars = len(sent)
            words = len(sent.split())

            sent_chars[sent] = chars
            sent_words[sent] = words

            db.record_stat(sent, args.source_name, 'chars', chars)
            db.record_stat(sent, args.source_name, 'words', words)

    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
    print('using device', device)

    for model in MODELS:
        print('loading model', model, '...')
        scorer = LMScorer.from_pretrained(model, device=device, batch_size=1)

        scores = []
        charss = []
        wordss = []
        for sent in sents:
            score = scorer.sentence_score(sent, log=True)
            print(score, sent)

            scores.append(score)

            db.record_stats(
                sent, args.source_name, {
                    f'lm-{model}': score,
                    f'lm-{model}-div-chars': score / sent_chars[sent],
                    f'lm-{model}-div-words': score / sent_words[sent],
コード例 #11
0
    def __load_scorer_model(cls, model_name: str, **kwargs) -> LMScorer:
        cache_dir = os.environ.get("TRANSFORMERS_CACHE_DIR", ".transformers_cache")
        kwargs["cache_dir"] = kwargs.get("cache_dir", cache_dir)

        scorer = AutoLMScorer.from_pretrained(model_name, **kwargs)
        return scorer