Python get_id_author Examples

Programming Language: Python

Namespace/Package Name: cltk.corpus.greek.tlg.parse_tlg_indices

Method/Function: get_id_author

Examples at hotexamples.com: 4

Python get_id_author - 4 examples found. These are the top rated real world Python examples of cltk.corpus.greek.tlg.parse_tlg_indices.get_id_author extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test_corpus.py Project: ykl7/cltk

 def test_get_id_author(self):
     """Test get_id_author()."""
     self.assertEqual(type(get_id_author()), dict)

Example #2

Show file

File: test_corpus.py Project: mcneela/cltk

 def test_get_id_author(self):
     """Test get_id_author()."""
     self.assertEqual(type(get_id_author()), dict)

Example #3

Show file

File: make_vectorizer.py Project: kylepjohnson/notebooks


def stream_lemmatized_files(corpus_dir):
    # return all docs in a dir
    user_dir = os.path.expanduser('~/cltk_data/user_data/' + corpus_dir)
    files = os.listdir(user_dir)

    for file in files:
        filepath = os.path.join(user_dir, file)
        with open(filepath) as fo:
            #TODO rm words less the 3 chars long
            yield file[3:-4], fo.read()

t0 = dt.datetime.utcnow()

map_id_author = get_id_author()

df = pandas.DataFrame(columns=['id', 'author' 'text', 'epithet'])

for _id, text in stream_lemmatized_files('tlg_lemmatized_no_accents_no_stops'):
    author = map_id_author[_id]
    epithet = get_epithet_of_author(_id)
    df = df.append({'id': _id, 'author': author, 'text': text, 'epithet': epithet}, ignore_index=True)

print(df.shape)
print('... finished in {}'.format(dt.datetime.utcnow() - t0))
print('Number of texts:', len(df))


text_list = df['text'].tolist()

Example #4

Show file

    # return all docs in a dir
    user_dir = os.path.expanduser('~/cltk_data/user_data/' + corpus_dir)
    files = os.listdir(user_dir)

    for file in files:
        filepath = os.path.join(user_dir, file)
        with open(filepath) as fo:
            #TODO rm words less the 3 chars long
            yield file[3:-4], fo.read()


# In[3]:

t0 = dt.datetime.utcnow()

map_id_author = get_id_author()

df = pandas.DataFrame(columns=['id', 'author' 'text', 'epithet'])

for _id, text in stream_lemmatized_files('tlg_lemmatized_no_accents_no_stops'):
    author = map_id_author[_id]
    epithet = get_epithet_of_author(_id)
    df = df.append(
        {
            'id': _id,
            'author': author,
            'text': text,
            'epithet': epithet
        },
        ignore_index=True)