def main(args): set_rseed(args.rseed) if args.ppl_use: print(f'Loading GPT-2...') lm_scorer = LMScorer.from_pretrained("gpt2", device="cuda:0", batch_size=1) print(f'Loading UniversalSentenceEncoder...') u = UniversalSentenceEncoder() def ppl(s): return -lm_scorer.sentence_score(s, log=True) def use(s1, s2): return u.cos_sim(s1, s2) else: def ppl(s): return 0 def use(s1, s2): return 0 print(f'Loading the VAE model...') vae = load_vae_model_from_args(args) print(f'Loading the victim model from HuggingFace...') victim_model = load_huggingface_model_from_args(args) if args.n_eval > 0: print(f'\n-------Evaluation Mode-------') do_evaluation(vae, victim_model, ppl, use, args) else: print(f'\n-------Debug Mode-------') if args.reference_sentence is None: do_n_attacks(vae, args.victim_sentence, victim_model, ppl, use, args) else: do_one_attack(vae, args.victim_sentence, args.reference_sentence, victim_model, ppl, use, args)
def __init__(self, reduce_mode="gmean", device="cuda"): if device == "cpu": logger.warning("Running LMScorer on CPU. Scoring may be slow.") self.model = LMScorer.from_pretrained("gpt2", device=device, batch_size=1) self.reduce_mode = reduce_mode self.tokenizer = Tokenizer()
def __init__(self, model_scale): self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") #self.device = "cpu" print("Device is " + str(self.device)) #self.tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2") #self.model = GPT2LMHeadModel.from_pretrained("distilgpt2") if model_scale == 0: self.tokenizer = AutoTokenizer.from_pretrained("distilgpt2") self.model = AutoModelWithLMHead.from_pretrained("distilgpt2") self.scorer = LMScorer.from_pretrained("distilgpt2", device=self.device) elif model_scale == 1: self.tokenizer = AutoTokenizer.from_pretrained("gpt2") self.model = AutoModelWithLMHead.from_pretrained("gpt2") self.scorer = LMScorer.from_pretrained("gpt2", device=self.device) elif model_scale == 2: self.tokenizer = AutoTokenizer.from_pretrained("gpt2-medium") self.model = AutoModelWithLMHead.from_pretrained("gpt2-medium") self.scorer = LMScorer.from_pretrained("gpt2-medium", device=self.device) elif model_scale == 3: self.tokenizer = AutoTokenizer.from_pretrained("gpt2-large") self.model = AutoModelWithLMHead.from_pretrained("gpt2-large") self.scorer = LMScorer.from_pretrained("gpt2-large", device=self.device) else: self.tokenizer = AutoTokenizer.from_pretrained("gpt2-xl") self.model = AutoModelWithLMHead.from_pretrained("gpt2-xl") self.scorer = LMScorer.from_pretrained("gpt2-xl", device=self.device) #self.model.eval() self.model.to(self.device)
def __init__(self, model, batch_model, neighbour_model, compute_dis, lm, max_iters, dataset, pop_size, n1, n2, n_prefix, n_suffix, use_lm=True, use_suffix=False): # self.dist_mat = dist_mat self.compute_dist = compute_dis self.dataset = dataset self.dict = self.dataset.dict self.inv_dict = self.dataset.inv_dict # self.skip_list = skip_list self.model = model self.batch_model = batch_model self.neighbour_model = neighbour_model # self.sess = sess self.n_prefix = n_prefix self.n_suffix = n_suffix self.max_iters = max_iters self.pop_size = pop_size self.lm = lm self.top_n = n1 # similar words self.top_n1 = n1 self.top_n2 = n2 self.use_lm = use_lm self.use_suffix = use_suffix self.temp = 0.0003 self.device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') batch_size = 1 self.scorer = LMScorer.from_pretrained("gpt2", device=self.device, batch_size=batch_size)
def init_LM_scorer(self): device = "cuda:1" batch_size = 1 self.scorer = LMScorer.from_pretrained("gpt2", device=device, batch_size=batch_size)
def should_throw_an_exception_when_called(): with pytest.raises(EnvironmentError): AutoLMScorer()
def should_return_gpt2_models(mocker): mocker.patch.object(GPT2LMScorer, "__init__", return_value=None) for model_name in GPT2LMScorer.supported_model_names(): scorer = AutoLMScorer.from_pretrained(model_name) assert isinstance(scorer, GPT2LMScorer), model_name
def should_throw_an_error_for_an_unsupported_model_name(): with pytest.raises(ValueError): AutoLMScorer.from_pretrained("_")
def should_not_be_empty(): assert len(list(AutoLMScorer.supported_model_names())) > 0
chars = len(sent) words = len(sent.split()) sent_chars[sent] = chars sent_words[sent] = words db.record_stat(sent, args.source_name, 'chars', chars) db.record_stat(sent, args.source_name, 'words', words) device = 'cuda:0' if torch.cuda.is_available() else 'cpu' print('using device', device) for model in MODELS: print('loading model', model, '...') scorer = LMScorer.from_pretrained(model, device=device, batch_size=1) scores = [] charss = [] wordss = [] for sent in sents: score = scorer.sentence_score(sent, log=True) print(score, sent) scores.append(score) db.record_stats( sent, args.source_name, { f'lm-{model}': score, f'lm-{model}-div-chars': score / sent_chars[sent], f'lm-{model}-div-words': score / sent_words[sent],
def __load_scorer_model(cls, model_name: str, **kwargs) -> LMScorer: cache_dir = os.environ.get("TRANSFORMERS_CACHE_DIR", ".transformers_cache") kwargs["cache_dir"] = kwargs.get("cache_dir", cache_dir) scorer = AutoLMScorer.from_pretrained(model_name, **kwargs) return scorer