Esempio n. 1
0
def load_ner(models_path: str) -> NER:
    """Загружаем и инициализируем NER-модель

    Args:
        models_path (str): Папка, в которой расположены необходимые для работы модели

    Returns:
        slovnet.NER: Объект slovnet.NER
    """
    os.makedirs(models_path, exist_ok=True)
    if not os.path.isfile(os.path.join(models_path, 'navec_news_v1_1B_250K_300d_100q.tar')):
        wget.download('https://storage.yandexcloud.net/natasha-navec/packs/navec_news_v1_1B_250K_300d_100q.tar',
                      os.path.join(models_path, 'navec_news_v1_1B_250K_300d_100q.tar'))
    if not os.path.isfile(os.path.join(models_path, 'slovnet_ner_news_v1.tar')):
        wget.download('https://storage.yandexcloud.net/natasha-slovnet/packs/slovnet_ner_news_v1.tar',
                      os.path.join(models_path, 'slovnet_ner_news_v1.tar'))
    navec = Navec.load(os.path.join(models_path, 'navec_news_v1_1B_250K_300d_100q.tar'))
    ner = NER.load(os.path.join(models_path, 'slovnet_ner_news_v1.tar'))
    ner.navec(navec)
    return ner
Esempio n. 2
0
    def shop_name(self) -> str:
        navec = Navec.load(constants.navec_file)
        ner = NER.load(constants.ner_file)
        ner.navec(navec)

        try:
            markup = ner(self.text)
        except IndexError:
            # i dont know what happens here sometimes
            del navec
            del ner
            return ""

        for span in markup.spans:
            if span.type == 'ORG':
                del navec
                del ner
                return self.text[span.start:span.stop].strip(".,;!:-–—/ ")

        del navec
        del ner

        return ""
Esempio n. 3
0
# df = pd.read_csv('relations.csv')
# relations = df.to_records(index=False)
# print(df)

from razdel import tokenize, sentenize
from navec import Navec
from slovnet import Morph, Syntax, NER
from ipymarkup import show_span_ascii_markup as show_markup

df = pd.read_csv('categories.csv')
categories = df.to_records(index=False)

navec = Navec.load('navec_news_v1_1B_250K_300d_100q.tar')
# morph = Morph.load('slovnet_morph_news_v1.tar')
# syntax = Syntax.load('slovnet_syntax_news_v1.tar')
ner = NER.load('slovnet_ner_news_v1.tar')

# morph.navec(navec)
# syntax.navec(navec)
ner.navec(navec)

with open('sport_texts_clear.txt', 'r', encoding='UTF-8') as f:
    i = 1
    for line in f:
        sents = [sent.text for sent in sentenize(line)]
        for sent in sents:
            i += 1
            # tokens = tokenize(sent)
            # print(list(tokens))
            markup = ner(sent)
            show_markup(markup.text, markup.spans)
Esempio n. 4
0
def ner(navec):
    path = download(
        'https://storage.yandexcloud.net/natasha-slovnet/packs/slovnet_ner_news_v1.tar'
    )
    return NER.load(path).navec(navec)
Esempio n. 5
0
    record, k = get_random_record(records)
    markup = ner(record.text)
    print('This is ' + tp.BOLD + tp.RED + f'{k}' + tp.END + ' record\n')
    show_markup(markup.text, markup.spans)


def test_on_k_random_records(K):
    records = load_lenta(lenta_path)
    records_num = [i for i in range(N)]
    chosen_records_num = random.choices(records_num, k=K)
    my_records = []
    for i in chosen_records_num:
        my_records.append(get_k_record(records, i))

    print(f'This is ' + tp.BOLD + tp.RED + f'{chosen_records_num}' + tp.END +
          ' records\n')

    for i in range(K):
        print(tp.BOLD + tp.RED + f'{chosen_records_num[i]}' + tp.END + '\t')
        markup = ner(my_records[i].text)
        show_markup(markup.text, markup.spans)
        print('\n--------------------------\n\n')


if __name__ == '__main__':
    print()
    navec = Navec.load(navec_path)
    ner = NER.load(ner_path)
    ner.navec(navec)
    test_on_random_record()
    test_on_k_random_records(5)
Esempio n. 6
0
NAVEC = getenv('NAVEC', 'navec.tar')
PACK = getenv('PACK', 'pack.tar')
BATCH_SIZE = int(getenv('BATCH_SIZE', 8))

HOST = getenv('HOST', '0.0.0.0')
PORT = int(getenv('PORT', 8080))
MB = 1024 * 1024
MAX_SIZE = int(getenv('MAX_SIZE', 100 * MB))

log('Load navec: %r' % NAVEC)
navec = Navec.load(NAVEC)

log('Load pack: %r' % PACK)
log('Batch size: %r' % BATCH_SIZE)
ner = NER.load(PACK)
ner.navec(navec)


async def handle(request):
    chunk = await request.json()
    log('Post chunk size: %r' % len(chunk))
    markups = list(ner.map(chunk))

    spans = sum(len(_.spans) for _ in markups)
    log('Infer spans: %r', spans)

    data = [_.as_json for _ in markups]
    return web.json_response(data)

Esempio n. 7
0
 def __init__(self, model_path, vector_model_path):
     navec = Navec.load(vector_model_path)
     self.model = NER.load(model_path)
     self.model.navec(navec)
Esempio n. 8
0
 def map(self, items):
     markups = SlovnetNER.map(self, items)
     for markup in markups:
         yield adapt_markup(markup)
Esempio n. 9
0
 def __init__(self, emb, path):
     infer, *args = SlovnetNER.load(path)
     SlovnetNER.__init__(self, infer, *args)
     self.navec(emb)