def __init__(self, content):
     self.gp2 = BertParent('openApi', 'large')
     self.bert_model = BertParent('bert', 'large')
     self.gp2_non_hidden = self.gp2.create_matrix(content)
     self.bert_non_hidden = self.bert_model.create_matrix(content)
     self.bert_hidden = self.bert_model.create_matrix(content, True)
     self.content = content
class LectureEnsembler(object):

    def __init__(self, content):
        self.gp2 = BertParent('openApi', 'large')
        self.bert_model = BertParent('bert', 'large')
        self.gp2_non_hidden = self.gp2.create_matrix(content)
        self.bert_non_hidden = self.bert_model.create_matrix(content)
        self.bert_hidden = self.bert_model.create_matrix(content, True)
        self.content = content

    def __vote(self, arg_list):
        all_tally = {}
        for args in arg_list:
            for arg in args:
                if arg in all_tally:
                    all_tally[arg] += 1
                else:
                    all_tally[arg] = 1
        to_return = {k: v for k, v in all_tally.items() if v > 1}
        return to_return

    def run_clusters(self, cluster_percentage=0.2):
        bc_non_hidden_args = ClusterFeatures(self.bert_non_hidden).cluster(cluster_percentage)
        bc_hidden_args = ClusterFeatures(self.bert_hidden).cluster(cluster_percentage)
        gp2_non_hidden_args = ClusterFeatures(self.gp2_non_hidden).cluster(cluster_percentage)

        votes = self.__vote([bc_non_hidden_args, bc_hidden_args, gp2_non_hidden_args])
        sorted_keys = sorted(votes.keys())
        if sorted_keys[0] != 0:
            sorted_keys.insert(0, 0)
        to_return = []
        for key in sorted_keys:
            to_return.append(key)
        return to_return
Exemple #3
0
    def __init__(self,
                 model: str = 'bert-large-uncased',
                 custom_model: PreTrainedModel = None,
                 custom_tokenizer: PreTrainedTokenizer = None,
                 hidden: int = -2,
                 reduce_option: str = 'mean',
                 greedyness: float = 0.45,
                 language=English,
                 random_state: int = 12345):
        """
        This is the parent Bert Summarizer model. New methods should implement this class

        :param model: This parameter is associated with the inherit string parameters from the transformers library.
        :param custom_model: If you have a pre-trained model, you can add the model class here.
        :param custom_tokenizer: If you have a custom tokenizer, you can add the tokenizer here.
        :param hidden: This signifies which layer of the BERT model you would like to use as embeddings.
        :param reduce_option: Given the output of the bert model, this param determines how you want to reduce results.
        :param greedyness: associated with the neuralcoref library. Determines how greedy coref should be.
        :param language: Which language to use for training.
        :param random_state: The random state to reproduce summarizations.
        """

        np.random.seed(random_state)
        self.model = BertParent(model, custom_model, custom_tokenizer)
        self.hidden = hidden
        self.reduce_option = reduce_option
        self.nlp = language()
        self.random_state = random_state
        self.nlp.add_pipe(self.nlp.create_pipe('sentencizer'))
        neuralcoref.add_to_pipe(self.nlp, greedyness=greedyness)
 def __init__(self, model='bert-large-uncased',
              hidden: int=-2,
              reduce_option: str = 'mean',
              greedyness: float=0.45):
     self.model = BertParent(model)
     self.hidden = hidden
     self.reduce_option = reduce_option
     self.nlp = English()
     self.nlp.add_pipe(self.nlp.create_pipe('sentencizer'))
     neuralcoref.add_to_pipe(self.nlp, greedyness=greedyness)
Exemple #5
0
class SingleModelProcessor(object):
    def __init__(self, model='bert', model_size='large', use_hidden=True):
        self.model = BertParent(model, model_size)
        self.use_hidden = use_hidden

    def run_clusters(self, content: List[str], ratio=0.2) -> List[str]:
        hidden = self.model.create_matrix(content, self.use_hidden)
        hidden_args = ClusterFeatures(hidden).cluster(ratio)
        if hidden_args[0] != 0:
            hidden_args.insert(0, 0)
        return [content[j] for j in hidden_args]
 def __init__(self,
              model='bert-large-uncased',
              hidden: int = -2,
              reduce_option: str = 'mean',
              greedyness: float = 0.45,
              language=English,
              random_state: int = 12345):
     np.random.seed(random_state)
     self.model = BertParent(model)
     self.hidden = hidden
     self.reduce_option = reduce_option
     self.nlp = language()
     self.random_state = random_state
     self.nlp.add_pipe(self.nlp.create_pipe('sentencizer'))
     neuralcoref.add_to_pipe(self.nlp, greedyness=greedyness)
 def __init__(
     self,
     model='bert-base-multilingual-uncased',
     custom_model: PreTrainedModel = None,
     custom_tokenizer: PreTrainedTokenizer = None,
     hidden: int=-2,
     reduce_option: str = 'mean',
     greedyness: float=0.45,
     language=Vietnamese,
     random_state: int = 12345
 ):
     np.random.seed(random_state)
     self.model = BertParent(model, custom_model, custom_tokenizer)
     self.hidden = hidden
     self.reduce_option = reduce_option
     self.nlp = language()
     self.random_state = random_state
     self.nlp.add_pipe(self.nlp.create_pipe('sentencizer'))
     neuralcoref.add_to_pipe(self.nlp, greedyness=greedyness)
Exemple #8
0
 def __init__(self, model_type='bert', size='large', n_trees=40):
     BertParent.__init__(self, model_type, size)
     self.n_trees = n_trees
Exemple #9
0
 def __init__(self, content, annoy_index, model_type='bert', size='large'):
     BertParent.__init__(self, model_type, size)
     self.annoy_index = annoy_index
     self.content = content
     self.content_features = self.create_matrix(self.content,
                                                use_hidden=True)
 def __init__(self, model='bert', model_size='large', use_hidden=True):
     self.model = BertParent(model, model_size)
     self.use_hidden = use_hidden