Python articlesの例

プログラミング言語: Python

名前空間/パッケージ名: ml_tools.fixtures

メソッド/関数: articles

hotexamples.comのコード掲載数: 9

Python articles - 9件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのml_tools.fixtures.articlesの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: test_cleantext.py プロジェクト: lefnire/ml-tools

def test_md_split_1():
    doc = articles()[0]
    paras = CleanText(doc) \
        .markdown_split_paragraphs() \
        .value()
    assert len(paras) > 1
    print(paras)

コード例 #2

ファイルを表示

ファイル: test_cleantext.py プロジェクト: lefnire/ml-tools

def test_md_split_all():
    docs = articles()
    paras = CleanText(docs)\
        .markdown_split_paragraphs()\
        .value()
    assert len(paras) > 0
    assert len(docs) < len(paras)
    print(paras)

コード例 #3

ファイルを表示

ファイル: fixtures.py プロジェクト: marait123/gnothi

 def gen_entries(self):
     try:
         # the generates article-fixtures on the GPU container, which then become available in /storage
         # to the server container. So run tests on GPU first, then on esrver. TODO decouple this!
         from ml_tools.fixtures import articles
     except:
         raise Exception("Can't generate entries from server container, must do from GPU container first.")
     entries = articles(group_by='paragraph')
     entries = Box({
         k: dict(text=v, paras=v.split('\n\n'))
         for k, v in entries.items()
     })
     self.save("entries", entries)
     return entries

コード例 #4

ファイルを表示

ファイル: test_cleantext.py プロジェクト: lefnire/ml-tools

def test_normalize(fmt, coverage, mode):
    chain = CleanText(articles(fmt=fmt))
    if coverage == "basic":
        chain = chain.keywords(mode=mode)
    else:
        # Revisit this list as cleantext.py grows
        chain = chain\
            .unmark()\
            .strip_html()\
            .normalize_numbers()\
            .fix_punct()\
            .only_english()\
            .only_ascii()\
            .remove_apos()\
            .multiple_whitespace()\
            .keywords(mode=mode)
    clean = chain.join().value()
    assert len(chain.data.lemmas) > 10
    print(chain.data.lemmas[:5])
    assert len(clean) > 10
    print(clean[0])

コード例 #5

ファイルを表示

ファイル: test_ae.py プロジェクト: lefnire/ml-tools

from ml_tools import Similars
from ml_tools.fixtures import articles
import numpy as np

corpus = articles()


def test_ae():
    chain = Similars(corpus).embed()
    vecs = chain.value()

    orig_cosines = chain.normalize().cosine().value()
    orig_cosines = np.argsort(orig_cosines, axis=1)

    dims = 20
    reduced = chain.autoencode(dims=[400, 20]).value()
    assert vecs.shape[0] == reduced.shape[0]
    assert reduced.shape[1] == dims[-1]

    # TODO do some comparison between original cosines & new cosines

コード例 #6

ファイルを表示

def test_group_none():
    res = articles()
    assert len(res) > 10
    assert type(res[0]) == str
    print(res[0])

コード例 #7

ファイルを表示

def test_group_paragraph():
    res = articles(group_by='paragraph')
    assert len(res.keys()) > 10
    assert type(res.vr_0) == str

コード例 #8

ファイルを表示

def test_group_article():
    res = articles(group_by='article')
    assert len(res.vr) > 10
    assert type(res.vr[0]) == str

コード例 #9

ファイルを表示

ファイル: hyperopt_cosine_estimator.py プロジェクト: lefnire/ml-tools

from box import Box
from ml_tools import CosineEstimator, Similars
from ml_tools.fixtures import articles
import numpy as np
import pandas as pd
import optuna

import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--jobs', help='Number of threads', default=1)
parser.add_argument('--init',
                    action='store_true',
                    help='initialize starter trials')
args_p = parser.parse_args()

lhs = articles()
lhs = Similars(lhs).embed().cluster(algo='agglomorative').value()

rhs = np.load('/storage/libgen/testing.npy')  #, mmap_mode='r')
books = pd.read_feather('/storage/libgen/testing.df')

# don't use cook(.?book)? , it's used in too many programming books
food_re = "gluten.?free|vegan|vegetarian"
# these should be really specific (think about edge-cases)
votes = Box(
    mine_up=r"(tensorflow|keras)",
    other_up=rf"({food_re}|republican)",
    mine_down=rf"({food_re})",
    other_down=r"(artificial|\bai\b|python|java|css|html|cbt|cognitive.?behav)"
)