def load_ner(models_path: str) -> NER: """Загружаем и инициализируем NER-модель Args: models_path (str): Папка, в которой расположены необходимые для работы модели Returns: slovnet.NER: Объект slovnet.NER """ os.makedirs(models_path, exist_ok=True) if not os.path.isfile(os.path.join(models_path, 'navec_news_v1_1B_250K_300d_100q.tar')): wget.download('https://storage.yandexcloud.net/natasha-navec/packs/navec_news_v1_1B_250K_300d_100q.tar', os.path.join(models_path, 'navec_news_v1_1B_250K_300d_100q.tar')) if not os.path.isfile(os.path.join(models_path, 'slovnet_ner_news_v1.tar')): wget.download('https://storage.yandexcloud.net/natasha-slovnet/packs/slovnet_ner_news_v1.tar', os.path.join(models_path, 'slovnet_ner_news_v1.tar')) navec = Navec.load(os.path.join(models_path, 'navec_news_v1_1B_250K_300d_100q.tar')) ner = NER.load(os.path.join(models_path, 'slovnet_ner_news_v1.tar')) ner.navec(navec) return ner
def shop_name(self) -> str: navec = Navec.load(constants.navec_file) ner = NER.load(constants.ner_file) ner.navec(navec) try: markup = ner(self.text) except IndexError: # i dont know what happens here sometimes del navec del ner return "" for span in markup.spans: if span.type == 'ORG': del navec del ner return self.text[span.start:span.stop].strip(".,;!:-–—/ ") del navec del ner return ""
# df = pd.read_csv('relations.csv') # relations = df.to_records(index=False) # print(df) from razdel import tokenize, sentenize from navec import Navec from slovnet import Morph, Syntax, NER from ipymarkup import show_span_ascii_markup as show_markup df = pd.read_csv('categories.csv') categories = df.to_records(index=False) navec = Navec.load('navec_news_v1_1B_250K_300d_100q.tar') # morph = Morph.load('slovnet_morph_news_v1.tar') # syntax = Syntax.load('slovnet_syntax_news_v1.tar') ner = NER.load('slovnet_ner_news_v1.tar') # morph.navec(navec) # syntax.navec(navec) ner.navec(navec) with open('sport_texts_clear.txt', 'r', encoding='UTF-8') as f: i = 1 for line in f: sents = [sent.text for sent in sentenize(line)] for sent in sents: i += 1 # tokens = tokenize(sent) # print(list(tokens)) markup = ner(sent) show_markup(markup.text, markup.spans)
def ner(navec): path = download( 'https://storage.yandexcloud.net/natasha-slovnet/packs/slovnet_ner_news_v1.tar' ) return NER.load(path).navec(navec)
record, k = get_random_record(records) markup = ner(record.text) print('This is ' + tp.BOLD + tp.RED + f'{k}' + tp.END + ' record\n') show_markup(markup.text, markup.spans) def test_on_k_random_records(K): records = load_lenta(lenta_path) records_num = [i for i in range(N)] chosen_records_num = random.choices(records_num, k=K) my_records = [] for i in chosen_records_num: my_records.append(get_k_record(records, i)) print(f'This is ' + tp.BOLD + tp.RED + f'{chosen_records_num}' + tp.END + ' records\n') for i in range(K): print(tp.BOLD + tp.RED + f'{chosen_records_num[i]}' + tp.END + '\t') markup = ner(my_records[i].text) show_markup(markup.text, markup.spans) print('\n--------------------------\n\n') if __name__ == '__main__': print() navec = Navec.load(navec_path) ner = NER.load(ner_path) ner.navec(navec) test_on_random_record() test_on_k_random_records(5)
NAVEC = getenv('NAVEC', 'navec.tar') PACK = getenv('PACK', 'pack.tar') BATCH_SIZE = int(getenv('BATCH_SIZE', 8)) HOST = getenv('HOST', '0.0.0.0') PORT = int(getenv('PORT', 8080)) MB = 1024 * 1024 MAX_SIZE = int(getenv('MAX_SIZE', 100 * MB)) log('Load navec: %r' % NAVEC) navec = Navec.load(NAVEC) log('Load pack: %r' % PACK) log('Batch size: %r' % BATCH_SIZE) ner = NER.load(PACK) ner.navec(navec) async def handle(request): chunk = await request.json() log('Post chunk size: %r' % len(chunk)) markups = list(ner.map(chunk)) spans = sum(len(_.spans) for _ in markups) log('Infer spans: %r', spans) data = [_.as_json for _ in markups] return web.json_response(data)
def __init__(self, model_path, vector_model_path): navec = Navec.load(vector_model_path) self.model = NER.load(model_path) self.model.navec(navec)
def map(self, items): markups = SlovnetNER.map(self, items) for markup in markups: yield adapt_markup(markup)
def __init__(self, emb, path): infer, *args = SlovnetNER.load(path) SlovnetNER.__init__(self, infer, *args) self.navec(emb)