コード例 #1
0
 def __init__(self,
              class_size=None,
              pretrained_model="gpt2-medium",
              classifier_head=None,
              cached_mode=False,
              device='cpu'):
     super(Discriminator, self).__init__()
     if pretrained_model.startswith("gpt2"):
         self.tokenizer = GPT2Tokenizer.from_pretrained(pretrained_model)
         self.encoder = GPT2LMHeadModel.from_pretrained(pretrained_model)
         self.embed_size = self.encoder.transformer.config.hidden_size
     elif pretrained_model.startswith("bert"):
         self.tokenizer = BertTokenizer.from_pretrained(pretrained_model)
         self.encoder = BertModel.from_pretrained(pretrained_model)
         self.embed_size = self.encoder.config.hidden_size
     elif ("finetune" in pretrained_model):
         ###presume using finetuned bert-base-uncased
         self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
         self.encoder = BertModel.from_pretrained(
             pretrained_model, output_hidden_states=False)
         self.embed_size = self.encoder.config.hidden_size
     if classifier_head:
         self.classifier_head = classifier_head
     else:
         if not class_size:
             raise ValueError("must specify class_size")
         self.classifier_head = ClassificationHead(
             class_size=class_size, embed_size=self.embed_size)
     self.cached_mode = cached_mode
     self.device = device
コード例 #2
0
 def __init__(self,
              class_size=None,
              pretrained_model="gpt2-medium",
              classifier_head=None,
              cached_mode=False,
              device=DEVICE):
     super(Discriminator, self).__init__()
     if pretrained_model.startswith("gpt2"):
         self.tokenizer = GPT2Tokenizer.from_pretrained(pretrained_model)
         self.encoder = GPT2LMHeadModel.from_pretrained(pretrained_model)
         self.embed_size = self.encoder.transformer.config.hidden_size
     elif pretrained_model.startswith("bert"):
         self.tokenizer = BertTokenizer.from_pretrained(pretrained_model)
         self.encoder = BertModel.from_pretrained(pretrained_model)
         self.embed_size = self.encoder.config.hidden_size
     else:
         raise ValueError(
             "{} model not yet supported".format(pretrained_model))
     if classifier_head:
         self.classifier_head = classifier_head
     else:
         if not class_size:
             raise ValueError("must specify class_size")
         self.classifier_head = ClassificationHead(
             class_size=class_size, embed_size=self.embed_size)
     self.cached_mode = cached_mode
     self.device = device
コード例 #3
0
 def __init__(self, class_size, pretrained_model="gpt2-medium", cached_mode=False, device="cpu"):
     super().__init__()
     self.tokenizer = GPT2Tokenizer.from_pretrained(pretrained_model)
     self.encoder = GPT2LMHeadModel.from_pretrained(pretrained_model)
     self.embed_size = self.encoder.transformer.config.hidden_size
     self.classifier_head = ClassificationHead(class_size=class_size, embed_size=self.embed_size)
     self.cached_mode = cached_mode
     self.device = device
コード例 #4
0
class Discriminator(torch.nn.Module):
    """Transformer encoder followed by a Classification Head"""

    def __init__(
            self,
            class_size,
            pretrained_model="gpt2-medium",
            cached_mode=False,
            device='cpu'
    ):
        super(Discriminator, self).__init__()
        self.tokenizer = GPT2Tokenizer.from_pretrained(pretrained_model)
        self.encoder = GPT2LMHeadModel.from_pretrained(pretrained_model)
        self.embed_size = self.encoder.transformer.config.hidden_size
        self.classifier_head = ClassificationHead(
            class_size=class_size,
            embed_size=self.embed_size
        )
        self.cached_mode = cached_mode
        self.device = device

    def get_classifier(self):
        return self.classifier_head

    def train_custom(self):
        for param in self.encoder.parameters():
            param.requires_grad = False
        self.classifier_head.train()

    def avg_representation(self, x):
        mask = x.ne(0).unsqueeze(2).repeat(
            1, 1, self.embed_size
        ).float().to(self.device).detach()
        hidden, _ = self.encoder.transformer(x)
        masked_hidden = hidden * mask
        avg_hidden = torch.sum(masked_hidden, dim=1) / (
                torch.sum(mask, dim=1).detach() + EPSILON
        )
        return avg_hidden

    def forward(self, x):
        if self.cached_mode:
            avg_hidden = x.to(self.device)
        else:
            avg_hidden = self.avg_representation(x.to(self.device))

        logits = self.classifier_head(avg_hidden)
        probs = F.log_softmax(logits, dim=-1)

        return probs
コード例 #5
0
def load_model(pretrained_model,
               sentence,
               discrim_weights,
               discrim_meta,
               device='cpu',
               cached=False):

    with open(discrim_meta, 'r') as discrim_meta_file:
        meta = json.load(discrim_meta_file)
    meta['path'] = discrim_weights

    classifier = ClassificationHead(
        class_size=meta["class_size"],
        embed_size=meta["embed_size"]).to('cpu').eval()

    classifier.load_state_dict(torch.load(discrim_weights,
                                          map_location=device))
    classifier.eval()

    model = Discriminator(pretrained_model=pretrained_model,
                          classifier_head=classifier,
                          cached_mode=cached,
                          device=device)
    model.eval()

    classes = [c for i, c in enumerate(meta["class_vocab"])]
    predict(sentence, model, classes)
コード例 #6
0
def load_classifier_head(weights_path, meta_path, device=DEVICE):
    with open(meta_path, 'r', encoding="utf8") as f:
        meta_params = json.load(f)
    classifier_head = ClassificationHead(
        class_size=meta_params['class_size'],
        embed_size=meta_params['embed_size']).to(device)
    classifier_head.load_state_dict(
        torch.load(weights_path, map_location=device))
    classifier_head.eval()
    return classifier_head, meta_params
コード例 #7
0
def get_classifier(
    name: Optional[str],
    class_label: Union[str, int],
    device: str,
    verbosity_level: int = REGULAR
) -> Tuple[Optional[ClassificationHead], Optional[int]]:
    """
    Загружаем предварительно сохранный малый торчевский классификатор-дискриминатор по имени.
    """
    if name is None:
        return None, None

    params = DISCRIMINATOR_MODELS_PARAMS[name]
    classifier = ClassificationHead(class_size=params['class_size'],
                                    embed_size=params['embed_size']).to(device)
    if "url" in params:
        resolved_archive_file = cached_path(params["url"])
    elif "path" in params:
        resolved_archive_file = params["path"]
    else:
        raise ValueError("Either url or path have to be specified "
                         "in the discriminator model parameters")
    classifier.load_state_dict(
        torch.load(resolved_archive_file, map_location=device))
    classifier.eval()

    if isinstance(class_label, str):
        if class_label in params["class_vocab"]:
            label_id = params["class_vocab"][class_label]
        else:
            label_id = params["default_class"]
            if verbosity_level >= REGULAR:
                print("class_label {} not in class_vocab".format(class_label))
                print("available values are: {}".format(params["class_vocab"]))
                print("using default class {}".format(label_id))

    elif isinstance(class_label, int):
        if class_label in set(params["class_vocab"].values()):
            label_id = class_label
        else:
            label_id = params["default_class"]
            if verbosity_level >= REGULAR:
                print("class_label {} not in class_vocab".format(class_label))
                print("available values are: {}".format(params["class_vocab"]))
                print("using default class {}".format(label_id))

    else:
        label_id = params["default_class"]

    return classifier, label_id
コード例 #8
0
ファイル: run_pplm.py プロジェクト: xyzhangfred/PPDRL
def get_classifier_new(model_path, meta_path,device):
   
    with open(meta_path, 'r') as discrim_meta_file:
        params = json.load(discrim_meta_file)

    classifier = ClassificationHead(
        class_size=params['class_size'],
        embed_size=params['embed_size']
    ).to(device)

    classifier.load_state_dict(
        torch.load(model_path, map_location=device))
    classifier.eval()

    return classifier
コード例 #9
0
ファイル: run_pplm.py プロジェクト: gsarti/ETC-NLG
def get_classifier(
        model, name: Optional[str], class_label: Union[str, int],
        device: str) -> Tuple[Optional[ClassificationHead], Optional[int]]:

    if name is None:
        return None, None

    params = DISCRIMINATOR_MODELS_PARAMS[name]
    classifier = ClassificationHead(class_size=params["class_size"],
                                    embed_size=params["embed_size"]).to(device)
    if "url" in params:
        resolved_archive_file = cached_path(params["url"])
    elif "path" in params:
        resolved_archive_file = params["path"]
    else:
        raise ValueError(
            "Either url or path have to be specified in the discriminator model parameters"
        )

    classifier.load_state_dict(
        torch.load(resolved_archive_file, map_location=device))
    classifier.eval()

    if isinstance(class_label, str):
        if class_label in params["class_vocab"]:
            label_id = params["class_vocab"][class_label]
        else:
            label_id = params["default_class"]
            print("class_label {} not in class_vocab".format(class_label))
            print("available values are: {}".format(params["class_vocab"]))
            print("using default class {}".format(label_id))

    elif isinstance(class_label, int):
        if class_label in set(params["class_vocab"].values()):
            label_id = class_label
        else:
            label_id = params["default_class"]
            print("class_label {} not in class_vocab".format(class_label))
            print("available values are: {}".format(params["class_vocab"]))
            print("using default class {}".format(label_id))

    else:
        label_id = params["default_class"]

    return classifier, label_id
コード例 #10
0
def get_classifier(discrim_meta: Optional[dict],
                   device: str) -> Optional[ClassificationHead]:
    if discrim_meta is None:
        return None, None

    params = discrim_meta
    classifier = ClassificationHead(class_size=params['class_size'],
                                    embed_size=params['embed_size']).to(device)
    if "url" in params:
        resolved_archive_file = cached_path(params["url"])
    elif "path" in params:
        resolved_archive_file = params["path"]
    else:
        raise ValueError("Either url or path have to be specified "
                         "in the discriminator model parameters")
    classifier.load_state_dict(
        torch.load(resolved_archive_file, map_location=device))
    classifier.eval()

    return classifier
コード例 #11
0
def get_classifier(
        discrim_meta: Optional[dict], class_label: Union[str, int],
        device: str) -> Tuple[Optional[ClassificationHead], Optional[int]]:
    if discrim_meta is None:
        return None, None

    params = discrim_meta
    classifier = ClassificationHead(class_size=params['class_size'],
                                    embed_size=params['embed_size']).to(device)
    if "url" in params:
        resolved_archive_file = cached_path(params["url"])
    elif "path" in params:
        resolved_archive_file = params["path"]
    else:
        raise ValueError("Either url or path have to be specified "
                         "in the discriminator model parameters")
    classifier.load_state_dict(
        torch.load(resolved_archive_file, map_location=device))
    classifier.eval()

    if isinstance(class_label, str):
        if class_label in params["class_vocab"]:
            label_id = params["class_vocab"][class_label]
        else:
            label_id = params["default_class"]

    elif isinstance(class_label, int):
        if class_label in set(params["class_vocab"].values()):
            label_id = class_label
        else:
            label_id = params["default_class"]

    else:
        label_id = params["default_class"]

    return classifier, label_id
コード例 #12
0
class Discriminator(torch.nn.Module):
    """Transformer encoder followed by a Classification Head"""
    def __init__(self,
                 class_size=None,
                 pretrained_model="gpt2-medium",
                 classifier_head=None,
                 cached_mode=False,
                 device=DEVICE):
        super(Discriminator, self).__init__()
        if pretrained_model.startswith("gpt2"):
            self.tokenizer = GPT2Tokenizer.from_pretrained(pretrained_model)
            self.encoder = GPT2LMHeadModel.from_pretrained(pretrained_model)
            self.embed_size = self.encoder.transformer.config.hidden_size
        elif pretrained_model.startswith("bert"):
            self.tokenizer = BertTokenizer.from_pretrained(pretrained_model)
            self.encoder = BertModel.from_pretrained(pretrained_model)
            self.embed_size = self.encoder.config.hidden_size
        else:
            raise ValueError(
                "{} model not yet supported".format(pretrained_model))
        if classifier_head:
            self.classifier_head = classifier_head
        else:
            if not class_size:
                raise ValueError("must specify class_size")
            self.classifier_head = ClassificationHead(
                class_size=class_size, embed_size=self.embed_size)
        self.cached_mode = cached_mode
        self.device = device

    def get_classifier(self):
        return self.classifier_head

    def train_custom(self):
        for param in self.encoder.parameters():
            param.requires_grad = False
        self.classifier_head.train()

    def avg_representation(self, x):
        mask = x.ne(0).unsqueeze(2).repeat(1, 1, self.embed_size).float().to(
            self.device).detach()
        if hasattr(self.encoder, 'transformer'):
            # for gpt2
            hidden, _ = self.encoder.transformer(x)
        else:
            # for bert
            hidden, _ = self.encoder(x)
        masked_hidden = hidden * mask
        avg_hidden = torch.sum(
            masked_hidden, dim=1) / (torch.sum(mask, dim=1).detach() + EPSILON)
        return avg_hidden

    def forward(self, x):
        if self.cached_mode:
            avg_hidden = x.to(self.device)
        else:
            avg_hidden = self.avg_representation(x.to(self.device))

        logits = self.classifier_head(avg_hidden)
        probs = F.log_softmax(logits, dim=-1)

        return probs

    def predict(self, input_sentence):
        input_t = self.tokenizer.encode(input_sentence)
        input_t = torch.tensor([input_t], dtype=torch.long, device=self.device)
        if self.cached_mode:
            input_t = self.avg_representation(input_t)

        log_probs = self(input_t).data.cpu().numpy().flatten().tolist()
        prob = [math.exp(log_prob) for log_prob in log_probs]
        return prob