Python kansuuzi2number Examples

Programming Language: Python

Namespace/Package Name: reporter.preprocessing.text

Method/Function: kansuuzi2number

Examples at hotexamples.com: 3

Python kansuuzi2number - 3 examples found. These are the top rated real world Python examples of reporter.preprocessing.text.kansuuzi2number extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test_text.py Project: pecorarista/market-reporter

    def test_kansuuzi2number(self):

        tokens = ['1', '万', '円', '台']
        expected = ['10000', '円', '台']
        self.assertEqual(kansuuzi2number(tokens), expected)

        tokens = ['賞金', 'は' '500', '万']
        expected = ['賞金', 'は' '500', '万']
        self.assertEqual(kansuuzi2number(tokens), expected)

Example #2

Show file

def test_kansuuzi2number():

    tokens = ['1', '万', '円', '台']
    expected = ['10000', '円', '台']
    result = kansuuzi2number(tokens)
    assert result == expected

    tokens = ['賞金', 'は' '500', '万']
    expected = ['賞金', 'は' '500', '万']
    result = kansuuzi2number(tokens)
    assert result == expected

Example #3

Show file

def update_headlines(session: Session, user_dict: Path,
                     logger: Logger) -> None:

    query_result = session \
        .query(Headline) \
        .filter(Headline.is_used.is_(None)) \
        .all()
    headlines = list(query_result)

    if len(headlines) == 0:
        return

    tokenizer = Tokenizer(str(user_dict))
    mappings = []

    logger.info('start updating headlines')
    for headline in tqdm(headlines):

        h = simplify_headline(headline.headline)

        is_about_di = headline.categories is not None and \
            DOMESTIC_INDEX in headline.categories

        # We stopped using `is_template` because the size of the dataset decreased and the result got worse.
        # if is_template(h) or not is_interesting(h) or not is_about_di:
        if not is_interesting(h) or not is_about_di:
            mappings.append({
                'article_id': headline.article_id,
                'is_used': False
            })
            continue

        tokens = kansuuzi2number(
            [token.surface for token in tokenizer.tokenize(h)])
        tag_tokens = replace_prices_with_tags(tokens)

        mappings.append({
            'article_id': headline.article_id,
            'simple_headline': h,
            'tokens': tokens,
            'tag_tokens': tag_tokens,
            'is_used': True,
        })
    session.bulk_update_mappings(Headline, mappings)
    session.commit()
    logger.info('end updating headlines')