from lisc.utils.io import load_object ################################################################################################### # Metadata # -------- # # Whenever you collect data with LISC, meta data is collected about the API requests # and databases accessed. # # Here we will explore the metadata collected during our previous investigations. # ################################################################################################### # Reload the counts object counts = load_object('tutorial_counts', SCDB('lisc_db')) ################################################################################################### # Metadata Object # --------------- # # Metadata information is collected into a custom :class:`~.MetaData` object. # # If you are collecting data using the LISC object, such as the :class:`~.Counts` # or :class:`~.Words` object, this collection information is attached and saved # to the object as the `meta_data` attribute. # ################################################################################################### # Check the date on which the collection happened
arts_all.print_summary() ################################################################################################### # Words Object # ~~~~~~~~~~~~ # # The :class:`~.Words` object can also be used to reload and analyze collected data. # # The `results` attribute of the :class:`~.Words` object, when loaded, contains a list of # :class:`~.Articles` objects, one for each term. # ################################################################################################### # Reload the words object words = load_object('tutorial_words', directory=SCDB('lisc_db')) ################################################################################################### # Reload all data for ind in range(words.n_terms): words.results[ind].load(directory=db) ################################################################################################### # Collect into list of aggregated data objects all_articles = [ArticlesAll(words[label]) for label in words.labels] ################################################################################################### # Plot a WordCloud of the collected data for the first term
arts_all.create_summary() arts_all.print_summary() ################################################################################################### # Words Object # ~~~~~~~~~~~~ # # The :class:`~.Words` object can also be used to reload and analyze collected data. # # The `results` attribute contains a list of :class:`~.Articles` objects, one for each term. # ################################################################################################### # Reload the words object, specifying to also reload the article data words = load_object('tutorial_words', directory=SCDB('lisc_db'), reload_results=True) ################################################################################################### # # Note that the reloaded data is the raw data from the data collection. # # The :meth:`~.Words.process_articles` method can be used to do some preprocessing on the # collected data. # # By default, the :func:`~.process_articles` function is used to process articles, which # preprocesses journal and author names, and tokenizes the text data. You can also pass in # a custom function to apply custom processing to the collected articles data. # # Note that some processing steps, like converting to the ArticlesAll representation, # will automatically apply article preprocessing. #