예제 #1
0
    def __init__(self, model_dir=os.path.join('models', 'bert', 'classification')):

        Service.__init__(self, 'classification', 'bert', ['parse'])
        self.models = {}
        self.results = {}
        self.id2label = {}
        langs = set()
        for name in os.listdir(model_dir):
            if not os.path.isdir(os.path.join(model_dir, name)):
                continue

            with open(os.path.join(model_dir, name, transformers.CONFIG_NAME), 'r') as f:
                configs = json.load(f)

            num_labels = configs['_num_labels']
            language = configs[constants.MODEL_INFO][constants.LANGUAGE]
            langs.add(language)

            pretrained_model_name_or_path = os.path.join(model_dir, name)
            self.models[name] = bert_for_sentence_classification.BertForSentenceClassification(language, num_labels,
                                                                                               pretrained_model_name_or_path)
            self.results[name] = configs[constants.MODEL_INFO]
            self.id2label[name] = models_utilities.load_labels(pretrained_model_name_or_path)

        self.langs = list(langs)
예제 #2
0
 def __init__(self):
     Service.__init__(self, 'codes', 'regex', [])
     self.regexes = [
         (re.compile(r'([a-z]{6}\s?\d{2}\s?[a-z]{1}\s?\d{2}\s?[a-z]{1}\s?\d{3}\s?[a-z]{1})', re.IGNORECASE), 'FISCAL_CODE', 'it'),
         (re.compile(r'(IT\d{2}[ ][a-zA-Z]\d{3}[ ]\d{4}[ ]\d{4}[ ]\d{4}[ ]\d{4}[ ]\d{3})', re.IGNORECASE), 'IBAN', 'it'),
         (re.compile(r'(IT\d{2}[a-zA-Z]\d{22}|IT\d{2}[a-zA-Z][ ]\d{5}[ ]\d{5}[ ]\d{12})', re.IGNORECASE), 'IBAN', 'it'),
         (re.compile(r'(IT\s?\d{2}\s?[a-z]\s?\d{8}\s?\d{6}\s?\d{8})', re.IGNORECASE), 'IBAN', 'it'),
         (re.compile(r'\D(\d{11})\D', re.IGNORECASE), 'PIVA', 'it')
     ]
예제 #3
0
 def __init__(self):
     Service.__init__(self, 'names', 'misc', ['ner'], ['fiscal_code'])
     self._person_prefixes = [
         'sig.ra', 'sig.a', 'sig.na', 'sig', 'sig.', 'avv', 'avv.', 'dott',
         'dott.', 'dr', 'dr.', 'egr', 'ra'
     ]
     self._names = _line_set(os.path.join('resources', 'names', 'it.txt'))
     self._surnames = _line_set(
         os.path.join('resources', 'surnames', 'it.txt'))
예제 #4
0
 def __init__(self, model_dir=os.path.join('models', 'gensim', 'lda'), stopwords_dir=os.path.join('resources', 'stopwords')):
     Service.__init__(self, 'topic-modeling', 'lda-gensim', ['parse'])
     self.models = {}
     self.stopwords = {}
     for name in os.listdir(model_dir):
         self.models[name] = LdaModel.load(os.path.join(model_dir, name, 'model'))
     for name in os.listdir(stopwords_dir):
         lang = name[:2]
         with open(os.path.join(stopwords_dir, name)) as f:
             self.stopwords[lang] = set([line.strip() for line in f.readlines()])
예제 #5
0
 def __init__(self, models_dir='models/opennmt/translation'):
     Service.__init__(self, 'translation', 'opennmt', ['parse'])
     # define opt values for the summarisation task
     self.models = {}
     self.descriptions = {}
     for lang in os.listdir(models_dir):
         if len(lang) == 5:
             self.models[lang] = self._load_model(
                 os.path.join(models_dir, lang), lang)
             self.descriptions[lang] = _load_model_description(
                 os.path.join(models_dir, lang))
예제 #6
0
파일: allen.py 프로젝트: phillswope/charade
 def __init__(self, models_dir='models/allen/sentiment-regression'):
     Service.__init__(self, 'sentiment', 'allen-regression', ['parse'])
     self.models = {}
     self.descriptions = {}
     self.indexer = ELMoTokenCharactersIndexer()
     for lang in os.listdir(models_dir):
         if len(lang) == 2:
             self.models[lang] = self._load_model(
                 os.path.join(models_dir, lang))
             self.descriptions[lang] = _load_model_description(
                 os.path.join(models_dir, lang))
예제 #7
0
 def __init__(self,server,cfg_fname='linkserv.json'):
     Service.__init__(self,server)
     self.nick = 'linkserv'
     self.delim = '\r\n.\r\n'
     self.links = []
     self._cfg_fname = cfg_fname
     self._lock = threading.Lock()
     self._unlock = self._lock.release
     self._lock = self._lock.acquire
     j = self.get_cfg()
     if 'autoconnect' in j and j['autoconnect'] in self._yes:
         self.connect_all()
예제 #8
0
파일: allen.py 프로젝트: phillswope/charade
 def __init__(self, models_dir='models/allen/ner'):
     Service.__init__(self, 'ner', 'allen-custom', ['parse'])
     self.readers = {}
     self.predictors = {}
     self.descriptions = {}
     for lang in os.listdir(models_dir):
         reader, predictor = self._load_reader_and_predictor(
             os.path.join(models_dir, lang))
         self.readers[lang] = reader
         self.predictors[lang] = predictor
         self.descriptions[lang] = _load_model_description(
             os.path.join(models_dir, lang))
예제 #9
0
 def __init__(self,server,cfg_fname='tcserv.json'):
     Service.__init__(self,server)
     self.nick = 'tcserv'
     self.cfg_fname = cfg_fname
     self.handle_error = server.handle_error
     self.dbg = lambda m : self.server.dbg('TCServ: %s'%m)
     self._db_lock = threading.Lock()
     self._lock_db = self._db_lock.acquire
     self._unlock_db = self._db_lock.release
     self.onion_peers = {}
     self.db_name = ':memory:'
     self.peers = 0
     self.unlisted_peers = []
     self._load_config()
     self.listener = TC_Listener(self)
     self.connect_all()
예제 #10
0
 def __init__(self, model_dir=os.path.join('models', 'sklearn', 'nmf')):
     Service.__init__(self, 'topic-modeling', 'sklearn', [])
     self.models = {}
     self.vectorizers = {}
     self.results = {}
     langs = set()
     for name in os.listdir(model_dir):
         model_path = os.path.join(model_dir, name, 'model.pkl')
         results_path = os.path.join(model_dir, name, 'results.json')
         with open(model_path, 'rb') as f:
             m_ = pickle.load(f)
             self.models[name] = m_['model']
             self.vectorizers[name] = m_['vectorizer']
         with open(results_path) as f:
             results = json.load(f)
             if 'lang' in results:
                 langs.add(results['lang'])
             self.results[name] = results
     self.langs = list(langs)
예제 #11
0
    def __init__(self, model_dir=os.path.join('models', 'bert', 'next_sentence_prediction')):

        Service.__init__(self, 'next-sentence-prediction', 'bert', ['parse'])
        self.models = {}
        self.results = {}
        langs = set()
        for name in os.listdir(model_dir):
            if not os.path.isdir(os.path.join(model_dir, name)):
                continue

            with open(os.path.join(model_dir, name, transformers.CONFIG_NAME), 'r') as f:
                configs = json.load(f)

            language = configs[constants.MODEL_INFO][constants.LANGUAGE]
            langs.add(language)

            pretrained_model_name_or_path = os.path.join(model_dir, name)
            self.models[name] = bert_for_next_sentence_prediction.BertForNextSentencePrediction(language, pretrained_model_name_or_path)
            self.results[name] = configs[constants.MODEL_INFO]

        self.langs = list(langs)
예제 #12
0
    def __init__(self, model_dir='models/sklearn/classification'):
        Service.__init__(self, 'classification', 'sklearn', [])

        self.models = {}
        self.patterns = {}
        self.extra_patterns = {}
        self.results = {}
        langs = set()
        for name in os.listdir(model_dir):
            model_path = os.path.join(model_dir, name, 'model.pkl')
            results_path = os.path.join(model_dir, name, 'results.json')
            with open(model_path, 'rb') as f:
                m_ = pickle.load(f)
                self.models[name] = m_['model']
                self.patterns[name] = m_['patterns']
                self.extra_patterns[name] = m_['extra_patterns']
            with open(results_path) as f:
                results = json.load(f)
                if 'lang' in results:
                    langs.add(results['lang'])
                self.results[name] = results
        self.langs = list(langs)
예제 #13
0
 def __init__(self, stopwords_dir=os.path.join('resources', 'stopwords')):
     Service.__init__(self, 'extractive-summarization', 'textrank',
                      ['parse'])
     self.stopwords = _read_stopwords(stopwords_dir)
예제 #14
0
 def __init__(self, stopwords_dir=os.path.join('resources', 'stopwords')):
     Service.__init__(self, 'keywords', 'textrank', ['parse'])
     self.stopwords = _read_stopwords(stopwords_dir)
예제 #15
0
 def __init__(self, langs=[]):
     Service.__init__(self, 'ner', 'nltk', ['parse'])
예제 #16
0
파일: allen.py 프로젝트: phillswope/charade
 def __init__(self):
     Service.__init__(self, 'ner', 'allen', [])
     self.model = Predictor.from_path(
         'models/allen/pretrained/ner-model-2018.12.18.tar.gz')
예제 #17
0
 def __init__(self):
     Service.__init__(self, 'fiscal-code', 'misc', ['codes'])
     self._months = {
         'A': 1,
         'B': 2,
         'C': 3,
         'D': 4,
         'E': 5,
         'H': 6,
         'L': 7,
         'M': 8,
         'P': 9,
         'R': 10,
         'S': 11,
         'T': 12
     }
     self._odd = {
         '0': 1,
         '1': 0,
         '2': 5,
         '3': 7,
         '4': 9,
         '5': 13,
         '6': 15,
         '7': 17,
         '8': 19,
         '9': 21,
         'A': 1,
         'B': 0,
         'C': 5,
         'D': 7,
         'E': 9,
         'F': 13,
         'G': 15,
         'H': 17,
         'I': 19,
         'J': 21,
         'K': 2,
         'L': 4,
         'M': 18,
         'N': 20,
         'O': 11,
         'P': 3,
         'Q': 6,
         'R': 8,
         'V': 10,
         'S': 12,
         'T': 14,
         'U': 16,
         'W': 22,
         'X': 25,
         'Y': 24,
         'Z': 23
     }
     self._even = {
         '0': 0,
         '1': 1,
         '2': 2,
         '3': 3,
         '4': 4,
         '5': 5,
         '6': 6,
         '7': 7,
         '8': 8,
         '9': 9,
         'A': 0,
         'B': 1,
         'C': 2,
         'D': 3,
         'E': 4,
         'F': 5,
         'G': 6,
         'H': 7,
         'I': 8,
         'J': 9,
         'K': 10,
         'L': 11,
         'M': 12,
         'N': 13,
         'O': 14,
         'P': 15,
         'Q': 16,
         'R': 17,
         'S': 18,
         'T': 19,
         'U': 20,
         'V': 21,
         'W': 22,
         'X': 23,
         'Y': 24,
         'Z': 25
     }
예제 #18
0
 def __init__(self, langs):
     Service.__init__(self, 'dates', 'misc', [])
     self.langs = langs
예제 #19
0
 def __init__(self):
     Service.__init__(self, 'parse', 'regex', [])
     self.sentence_regex = re.compile(r'([.!?]\s+|$)')
     self.token_regex = re.compile(r'([\s;,:.!?]|$)')
예제 #20
0
 def __init__(self, langs=[]):
     Service.__init__(self, 'parse', 'nltk', [])
     self.punktSentenceTokenizer = PunktSentenceTokenizer()
     self.treebankWordTokenizer = TreebankWordTokenizer()
예제 #21
0
 def __init__(self, langs):
     Service.__init__(self, 'ner', 'spacy', [])
     self.models = {}
     for lang in langs:
         self.models[lang] = spacy.load(lang)