def __init__(self, model_dir=os.path.join('models', 'bert', 'classification')): Service.__init__(self, 'classification', 'bert', ['parse']) self.models = {} self.results = {} self.id2label = {} langs = set() for name in os.listdir(model_dir): if not os.path.isdir(os.path.join(model_dir, name)): continue with open(os.path.join(model_dir, name, transformers.CONFIG_NAME), 'r') as f: configs = json.load(f) num_labels = configs['_num_labels'] language = configs[constants.MODEL_INFO][constants.LANGUAGE] langs.add(language) pretrained_model_name_or_path = os.path.join(model_dir, name) self.models[name] = bert_for_sentence_classification.BertForSentenceClassification(language, num_labels, pretrained_model_name_or_path) self.results[name] = configs[constants.MODEL_INFO] self.id2label[name] = models_utilities.load_labels(pretrained_model_name_or_path) self.langs = list(langs)
def __init__(self): Service.__init__(self, 'codes', 'regex', []) self.regexes = [ (re.compile(r'([a-z]{6}\s?\d{2}\s?[a-z]{1}\s?\d{2}\s?[a-z]{1}\s?\d{3}\s?[a-z]{1})', re.IGNORECASE), 'FISCAL_CODE', 'it'), (re.compile(r'(IT\d{2}[ ][a-zA-Z]\d{3}[ ]\d{4}[ ]\d{4}[ ]\d{4}[ ]\d{4}[ ]\d{3})', re.IGNORECASE), 'IBAN', 'it'), (re.compile(r'(IT\d{2}[a-zA-Z]\d{22}|IT\d{2}[a-zA-Z][ ]\d{5}[ ]\d{5}[ ]\d{12})', re.IGNORECASE), 'IBAN', 'it'), (re.compile(r'(IT\s?\d{2}\s?[a-z]\s?\d{8}\s?\d{6}\s?\d{8})', re.IGNORECASE), 'IBAN', 'it'), (re.compile(r'\D(\d{11})\D', re.IGNORECASE), 'PIVA', 'it') ]
def __init__(self): Service.__init__(self, 'names', 'misc', ['ner'], ['fiscal_code']) self._person_prefixes = [ 'sig.ra', 'sig.a', 'sig.na', 'sig', 'sig.', 'avv', 'avv.', 'dott', 'dott.', 'dr', 'dr.', 'egr', 'ra' ] self._names = _line_set(os.path.join('resources', 'names', 'it.txt')) self._surnames = _line_set( os.path.join('resources', 'surnames', 'it.txt'))
def __init__(self, model_dir=os.path.join('models', 'gensim', 'lda'), stopwords_dir=os.path.join('resources', 'stopwords')): Service.__init__(self, 'topic-modeling', 'lda-gensim', ['parse']) self.models = {} self.stopwords = {} for name in os.listdir(model_dir): self.models[name] = LdaModel.load(os.path.join(model_dir, name, 'model')) for name in os.listdir(stopwords_dir): lang = name[:2] with open(os.path.join(stopwords_dir, name)) as f: self.stopwords[lang] = set([line.strip() for line in f.readlines()])
def __init__(self, models_dir='models/opennmt/translation'): Service.__init__(self, 'translation', 'opennmt', ['parse']) # define opt values for the summarisation task self.models = {} self.descriptions = {} for lang in os.listdir(models_dir): if len(lang) == 5: self.models[lang] = self._load_model( os.path.join(models_dir, lang), lang) self.descriptions[lang] = _load_model_description( os.path.join(models_dir, lang))
def __init__(self, models_dir='models/allen/sentiment-regression'): Service.__init__(self, 'sentiment', 'allen-regression', ['parse']) self.models = {} self.descriptions = {} self.indexer = ELMoTokenCharactersIndexer() for lang in os.listdir(models_dir): if len(lang) == 2: self.models[lang] = self._load_model( os.path.join(models_dir, lang)) self.descriptions[lang] = _load_model_description( os.path.join(models_dir, lang))
def __init__(self,server,cfg_fname='linkserv.json'): Service.__init__(self,server) self.nick = 'linkserv' self.delim = '\r\n.\r\n' self.links = [] self._cfg_fname = cfg_fname self._lock = threading.Lock() self._unlock = self._lock.release self._lock = self._lock.acquire j = self.get_cfg() if 'autoconnect' in j and j['autoconnect'] in self._yes: self.connect_all()
def __init__(self, models_dir='models/allen/ner'): Service.__init__(self, 'ner', 'allen-custom', ['parse']) self.readers = {} self.predictors = {} self.descriptions = {} for lang in os.listdir(models_dir): reader, predictor = self._load_reader_and_predictor( os.path.join(models_dir, lang)) self.readers[lang] = reader self.predictors[lang] = predictor self.descriptions[lang] = _load_model_description( os.path.join(models_dir, lang))
def __init__(self,server,cfg_fname='tcserv.json'): Service.__init__(self,server) self.nick = 'tcserv' self.cfg_fname = cfg_fname self.handle_error = server.handle_error self.dbg = lambda m : self.server.dbg('TCServ: %s'%m) self._db_lock = threading.Lock() self._lock_db = self._db_lock.acquire self._unlock_db = self._db_lock.release self.onion_peers = {} self.db_name = ':memory:' self.peers = 0 self.unlisted_peers = [] self._load_config() self.listener = TC_Listener(self) self.connect_all()
def __init__(self, model_dir=os.path.join('models', 'sklearn', 'nmf')): Service.__init__(self, 'topic-modeling', 'sklearn', []) self.models = {} self.vectorizers = {} self.results = {} langs = set() for name in os.listdir(model_dir): model_path = os.path.join(model_dir, name, 'model.pkl') results_path = os.path.join(model_dir, name, 'results.json') with open(model_path, 'rb') as f: m_ = pickle.load(f) self.models[name] = m_['model'] self.vectorizers[name] = m_['vectorizer'] with open(results_path) as f: results = json.load(f) if 'lang' in results: langs.add(results['lang']) self.results[name] = results self.langs = list(langs)
def __init__(self, model_dir=os.path.join('models', 'bert', 'next_sentence_prediction')): Service.__init__(self, 'next-sentence-prediction', 'bert', ['parse']) self.models = {} self.results = {} langs = set() for name in os.listdir(model_dir): if not os.path.isdir(os.path.join(model_dir, name)): continue with open(os.path.join(model_dir, name, transformers.CONFIG_NAME), 'r') as f: configs = json.load(f) language = configs[constants.MODEL_INFO][constants.LANGUAGE] langs.add(language) pretrained_model_name_or_path = os.path.join(model_dir, name) self.models[name] = bert_for_next_sentence_prediction.BertForNextSentencePrediction(language, pretrained_model_name_or_path) self.results[name] = configs[constants.MODEL_INFO] self.langs = list(langs)
def __init__(self, model_dir='models/sklearn/classification'): Service.__init__(self, 'classification', 'sklearn', []) self.models = {} self.patterns = {} self.extra_patterns = {} self.results = {} langs = set() for name in os.listdir(model_dir): model_path = os.path.join(model_dir, name, 'model.pkl') results_path = os.path.join(model_dir, name, 'results.json') with open(model_path, 'rb') as f: m_ = pickle.load(f) self.models[name] = m_['model'] self.patterns[name] = m_['patterns'] self.extra_patterns[name] = m_['extra_patterns'] with open(results_path) as f: results = json.load(f) if 'lang' in results: langs.add(results['lang']) self.results[name] = results self.langs = list(langs)
def __init__(self, stopwords_dir=os.path.join('resources', 'stopwords')): Service.__init__(self, 'extractive-summarization', 'textrank', ['parse']) self.stopwords = _read_stopwords(stopwords_dir)
def __init__(self, stopwords_dir=os.path.join('resources', 'stopwords')): Service.__init__(self, 'keywords', 'textrank', ['parse']) self.stopwords = _read_stopwords(stopwords_dir)
def __init__(self, langs=[]): Service.__init__(self, 'ner', 'nltk', ['parse'])
def __init__(self): Service.__init__(self, 'ner', 'allen', []) self.model = Predictor.from_path( 'models/allen/pretrained/ner-model-2018.12.18.tar.gz')
def __init__(self): Service.__init__(self, 'fiscal-code', 'misc', ['codes']) self._months = { 'A': 1, 'B': 2, 'C': 3, 'D': 4, 'E': 5, 'H': 6, 'L': 7, 'M': 8, 'P': 9, 'R': 10, 'S': 11, 'T': 12 } self._odd = { '0': 1, '1': 0, '2': 5, '3': 7, '4': 9, '5': 13, '6': 15, '7': 17, '8': 19, '9': 21, 'A': 1, 'B': 0, 'C': 5, 'D': 7, 'E': 9, 'F': 13, 'G': 15, 'H': 17, 'I': 19, 'J': 21, 'K': 2, 'L': 4, 'M': 18, 'N': 20, 'O': 11, 'P': 3, 'Q': 6, 'R': 8, 'V': 10, 'S': 12, 'T': 14, 'U': 16, 'W': 22, 'X': 25, 'Y': 24, 'Z': 23 } self._even = { '0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, 'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'J': 9, 'K': 10, 'L': 11, 'M': 12, 'N': 13, 'O': 14, 'P': 15, 'Q': 16, 'R': 17, 'S': 18, 'T': 19, 'U': 20, 'V': 21, 'W': 22, 'X': 23, 'Y': 24, 'Z': 25 }
def __init__(self, langs): Service.__init__(self, 'dates', 'misc', []) self.langs = langs
def __init__(self): Service.__init__(self, 'parse', 'regex', []) self.sentence_regex = re.compile(r'([.!?]\s+|$)') self.token_regex = re.compile(r'([\s;,:.!?]|$)')
def __init__(self, langs=[]): Service.__init__(self, 'parse', 'nltk', []) self.punktSentenceTokenizer = PunktSentenceTokenizer() self.treebankWordTokenizer = TreebankWordTokenizer()
def __init__(self, langs): Service.__init__(self, 'ner', 'spacy', []) self.models = {} for lang in langs: self.models[lang] = spacy.load(lang)