Ejemplos de get_id_author en Python

Lenguaje de programación: Python

Namespace/Package Name: cltk.corpus.greek.tlg.parse_tlg_indices

Método / Función: get_id_author

Ejemplos en hotexamples.com: 4

Python get_id_author - 4 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de cltk.corpus.greek.tlg.parse_tlg_indices.get_id_author extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

Archivo: test_corpus.py Proyecto: ykl7/cltk

 def test_get_id_author(self):
     """Test get_id_author()."""
     self.assertEqual(type(get_id_author()), dict)

Ejemplo n.º 2

Mostrar archivo

Archivo: test_corpus.py Proyecto: mcneela/cltk

 def test_get_id_author(self):
     """Test get_id_author()."""
     self.assertEqual(type(get_id_author()), dict)

Ejemplo n.º 3

Mostrar archivo

Archivo: make_vectorizer.py Proyecto: kylepjohnson/notebooks


def stream_lemmatized_files(corpus_dir):
    # return all docs in a dir
    user_dir = os.path.expanduser('~/cltk_data/user_data/' + corpus_dir)
    files = os.listdir(user_dir)

    for file in files:
        filepath = os.path.join(user_dir, file)
        with open(filepath) as fo:
            #TODO rm words less the 3 chars long
            yield file[3:-4], fo.read()

t0 = dt.datetime.utcnow()

map_id_author = get_id_author()

df = pandas.DataFrame(columns=['id', 'author' 'text', 'epithet'])

for _id, text in stream_lemmatized_files('tlg_lemmatized_no_accents_no_stops'):
    author = map_id_author[_id]
    epithet = get_epithet_of_author(_id)
    df = df.append({'id': _id, 'author': author, 'text': text, 'epithet': epithet}, ignore_index=True)

print(df.shape)
print('... finished in {}'.format(dt.datetime.utcnow() - t0))
print('Number of texts:', len(df))


text_list = df['text'].tolist()

Ejemplo n.º 4

Mostrar archivo

    # return all docs in a dir
    user_dir = os.path.expanduser('~/cltk_data/user_data/' + corpus_dir)
    files = os.listdir(user_dir)

    for file in files:
        filepath = os.path.join(user_dir, file)
        with open(filepath) as fo:
            #TODO rm words less the 3 chars long
            yield file[3:-4], fo.read()


# In[3]:

t0 = dt.datetime.utcnow()

map_id_author = get_id_author()

df = pandas.DataFrame(columns=['id', 'author' 'text', 'epithet'])

for _id, text in stream_lemmatized_files('tlg_lemmatized_no_accents_no_stops'):
    author = map_id_author[_id]
    epithet = get_epithet_of_author(_id)
    df = df.append(
        {
            'id': _id,
            'author': author,
            'text': text,
            'epithet': epithet
        },
        ignore_index=True)