Ejemplo n.º 1
0
from lisc.utils.io import load_object

###################################################################################################
# Metadata
# --------
#
# Whenever you collect data with LISC, meta data is collected about the API requests
# and databases accessed.
#
# Here we will explore the metadata collected during our previous investigations.
#

###################################################################################################

# Reload the counts object
counts = load_object('tutorial_counts', SCDB('lisc_db'))

###################################################################################################
# Metadata Object
# ---------------
#
# Metadata information is collected into a custom :class:`~.MetaData` object.
#
# If you are collecting data using the LISC object, such as the :class:`~.Counts`
# or :class:`~.Words` object, this collection information is attached and saved
# to the object as the `meta_data` attribute.
#

###################################################################################################

# Check the date on which the collection happened
Ejemplo n.º 2
0
arts_all.print_summary()

###################################################################################################
# Words Object
# ~~~~~~~~~~~~
#
# The :class:`~.Words` object can also be used to reload and analyze collected data.
#
# The `results` attribute of the :class:`~.Words` object, when loaded, contains a list of
# :class:`~.Articles` objects, one for each term.
#

###################################################################################################

# Reload the words object
words = load_object('tutorial_words', directory=SCDB('lisc_db'))

###################################################################################################

# Reload all data
for ind in range(words.n_terms):
    words.results[ind].load(directory=db)

###################################################################################################

# Collect into list of aggregated data objects
all_articles = [ArticlesAll(words[label]) for label in words.labels]

###################################################################################################

# Plot a WordCloud of the collected data for the first term
Ejemplo n.º 3
0
arts_all.create_summary()
arts_all.print_summary()

###################################################################################################
# Words Object
# ~~~~~~~~~~~~
#
# The :class:`~.Words` object can also be used to reload and analyze collected data.
#
# The `results` attribute contains a list of :class:`~.Articles` objects, one for each term.
#

###################################################################################################

# Reload the words object, specifying to also reload the article data
words = load_object('tutorial_words', directory=SCDB('lisc_db'), reload_results=True)

###################################################################################################
#
# Note that the reloaded data is the raw data from the data collection.
#
# The :meth:`~.Words.process_articles` method can be used to do some preprocessing on the
# collected data.
#
# By default, the :func:`~.process_articles` function is used to process articles, which
# preprocesses journal and author names, and tokenizes the text data. You can also pass in
# a custom function to apply custom processing to the collected articles data.
#
# Note that some processing steps, like converting to the ArticlesAll representation,
# will automatically apply article preprocessing.
#