def __init__(self, content): self.gp2 = BertParent('openApi', 'large') self.bert_model = BertParent('bert', 'large') self.gp2_non_hidden = self.gp2.create_matrix(content) self.bert_non_hidden = self.bert_model.create_matrix(content) self.bert_hidden = self.bert_model.create_matrix(content, True) self.content = content
class LectureEnsembler(object): def __init__(self, content): self.gp2 = BertParent('openApi', 'large') self.bert_model = BertParent('bert', 'large') self.gp2_non_hidden = self.gp2.create_matrix(content) self.bert_non_hidden = self.bert_model.create_matrix(content) self.bert_hidden = self.bert_model.create_matrix(content, True) self.content = content def __vote(self, arg_list): all_tally = {} for args in arg_list: for arg in args: if arg in all_tally: all_tally[arg] += 1 else: all_tally[arg] = 1 to_return = {k: v for k, v in all_tally.items() if v > 1} return to_return def run_clusters(self, cluster_percentage=0.2): bc_non_hidden_args = ClusterFeatures(self.bert_non_hidden).cluster(cluster_percentage) bc_hidden_args = ClusterFeatures(self.bert_hidden).cluster(cluster_percentage) gp2_non_hidden_args = ClusterFeatures(self.gp2_non_hidden).cluster(cluster_percentage) votes = self.__vote([bc_non_hidden_args, bc_hidden_args, gp2_non_hidden_args]) sorted_keys = sorted(votes.keys()) if sorted_keys[0] != 0: sorted_keys.insert(0, 0) to_return = [] for key in sorted_keys: to_return.append(key) return to_return
def __init__(self, model: str = 'bert-large-uncased', custom_model: PreTrainedModel = None, custom_tokenizer: PreTrainedTokenizer = None, hidden: int = -2, reduce_option: str = 'mean', greedyness: float = 0.45, language=English, random_state: int = 12345): """ This is the parent Bert Summarizer model. New methods should implement this class :param model: This parameter is associated with the inherit string parameters from the transformers library. :param custom_model: If you have a pre-trained model, you can add the model class here. :param custom_tokenizer: If you have a custom tokenizer, you can add the tokenizer here. :param hidden: This signifies which layer of the BERT model you would like to use as embeddings. :param reduce_option: Given the output of the bert model, this param determines how you want to reduce results. :param greedyness: associated with the neuralcoref library. Determines how greedy coref should be. :param language: Which language to use for training. :param random_state: The random state to reproduce summarizations. """ np.random.seed(random_state) self.model = BertParent(model, custom_model, custom_tokenizer) self.hidden = hidden self.reduce_option = reduce_option self.nlp = language() self.random_state = random_state self.nlp.add_pipe(self.nlp.create_pipe('sentencizer')) neuralcoref.add_to_pipe(self.nlp, greedyness=greedyness)
def __init__(self, model='bert-large-uncased', hidden: int=-2, reduce_option: str = 'mean', greedyness: float=0.45): self.model = BertParent(model) self.hidden = hidden self.reduce_option = reduce_option self.nlp = English() self.nlp.add_pipe(self.nlp.create_pipe('sentencizer')) neuralcoref.add_to_pipe(self.nlp, greedyness=greedyness)
class SingleModelProcessor(object): def __init__(self, model='bert', model_size='large', use_hidden=True): self.model = BertParent(model, model_size) self.use_hidden = use_hidden def run_clusters(self, content: List[str], ratio=0.2) -> List[str]: hidden = self.model.create_matrix(content, self.use_hidden) hidden_args = ClusterFeatures(hidden).cluster(ratio) if hidden_args[0] != 0: hidden_args.insert(0, 0) return [content[j] for j in hidden_args]
def __init__(self, model='bert-large-uncased', hidden: int = -2, reduce_option: str = 'mean', greedyness: float = 0.45, language=English, random_state: int = 12345): np.random.seed(random_state) self.model = BertParent(model) self.hidden = hidden self.reduce_option = reduce_option self.nlp = language() self.random_state = random_state self.nlp.add_pipe(self.nlp.create_pipe('sentencizer')) neuralcoref.add_to_pipe(self.nlp, greedyness=greedyness)
def __init__( self, model='bert-base-multilingual-uncased', custom_model: PreTrainedModel = None, custom_tokenizer: PreTrainedTokenizer = None, hidden: int=-2, reduce_option: str = 'mean', greedyness: float=0.45, language=Vietnamese, random_state: int = 12345 ): np.random.seed(random_state) self.model = BertParent(model, custom_model, custom_tokenizer) self.hidden = hidden self.reduce_option = reduce_option self.nlp = language() self.random_state = random_state self.nlp.add_pipe(self.nlp.create_pipe('sentencizer')) neuralcoref.add_to_pipe(self.nlp, greedyness=greedyness)
def __init__(self, model_type='bert', size='large', n_trees=40): BertParent.__init__(self, model_type, size) self.n_trees = n_trees
def __init__(self, content, annoy_index, model_type='bert', size='large'): BertParent.__init__(self, model_type, size) self.annoy_index = annoy_index self.content = content self.content_features = self.create_matrix(self.content, use_hidden=True)
def __init__(self, model='bert', model_size='large', use_hidden=True): self.model = BertParent(model, model_size) self.use_hidden = use_hidden