Beispiel #1
0
def save_object(obj, f_name, directory=None):
    """Save a custom object as a pickle file.

    Parameters
    ----------
    obj : Counts or Words object
        Object to save out.
    f_name : str
        Name for the file to be saved out.
    directory : str or SCDB object, optional
        Folder or database object specifying the save location.
    """

    # Set the save path based on object type
    # Note: imports done here to stop circular imports
    from lisc.objects import Counts, Words
    if isinstance(obj, Counts):
        obj_type = 'counts'
    elif isinstance(obj, Words):
        obj_type = 'words'
    else:
        raise ValueError('Object type unclear - can not save.')

    pickle.dump(
        obj,
        open(
            os.path.join(check_directory(directory, obj_type),
                         check_ext(f_name, '.p')), 'wb'))
Beispiel #2
0
    def load(self, directory=None):
        """Load raw data from json file.

        Parameters
        ----------
        directory : str or SCDB, optional
            Folder or database object specifying the save location.
        """

        directory = check_directory(directory, 'raw')

        data = parse_json_data(os.path.join(directory, check_ext(self.label, '.json')))

        self.term = Term(*next(data)['term'])

        for dat in data:
            self.add_data('ids', dat['id'])
            self.add_data('titles', dat['title'])
            self.add_data('journals', dat['journal'])
            self.add_data('authors', dat['authors'])
            self.add_data('words', dat['words'])
            self.add_data('keywords', dat['keywords'])
            self.add_data('years', dat['year'])
            self.add_data('dois', dat['doi'])

        self._check_results()
Beispiel #3
0
    def _set_up_logging(self, logging, directory):
        """Set up for URL logging.

        Parameters
        ----------
        logging : {None, 'print', 'store', 'file'}
            What kind of logging, if any, to do for requested URLs.
        directory : SCDB or str or None
            A string or object containing a file path.
        """

        if logging in [None, 'print']:
            log = None

        elif logging == 'store':
            log = []

        elif logging == 'file':
            log = open(
                os.path.join(check_directory(directory, 'logs'),
                             check_ext('requester_log', '.txt')), 'w')
            log.write('REQUESTER LOG - STARTED AT:  ' + self.start_time)

        else:
            raise ValueError('Logging type not understood.')

        return logging, log
Beispiel #4
0
    def load(self, directory=None):
        """Load raw data from json file.

        Parameters
        ----------
        directory : str or SCDB, optional
            Folder or database object specifying the save location.

        Examples
        --------
        Load an ``Articles`` object, assuming an :class:`~.SCDB` organization named 'lisc_db':

        >>> from lisc.utils import SCDB
        >>> articles = Articles('frontal lobe')
        >>> articles.load(SCDB('lisc_db')) # doctest:+SKIP
        """

        directory = check_directory(directory, 'raw')

        data = parse_json_data(
            os.path.join(directory, check_ext(self.label, '.json')))

        self.term = Term(*next(data)['term'])

        for datum in data:
            self.add_data('ids', datum['id'])
            self.add_data('titles', datum['title'])
            self.add_data('journals', datum['journal'])
            self.add_data('authors', datum['authors'])
            self.add_data('words', datum['words'])
            self.add_data('keywords', datum['keywords'])
            self.add_data('years', datum['year'])
            self.add_data('dois', datum['doi'])

        self._check_results()
Beispiel #5
0
    def save(self, directory=None):
        """Save out a json file with all attached data.

        Parameters
        ----------
        directory : str or SCDB, optional
            Folder or database object specifying the save location.

        Examples
        --------
        Save an ``Articles`` object, using a temporary directory:

        >>> from tempfile import TemporaryDirectory
        >>> articles = Articles('frontal lobe')
        >>> with TemporaryDirectory() as dirpath:
        ...     articles.save(directory=dirpath)
        """

        directory = check_directory(directory, 'raw')

        with open(os.path.join(directory, check_ext(self.label, '.json')),
                  'w') as outfile:
            json.dump({'term': self.term}, outfile)
            outfile.write('\n')
            for art in self:
                json.dump(art, outfile)
                outfile.write('\n')
Beispiel #6
0
def create_files(directory):
    """Creates some test term files."""

    term_file = open(
        pjoin(check_directory(directory, 'terms'), 'test_terms.txt'), 'w')
    term_file.write('word\nthing, same')
    term_file.close()

    excl_file = open(
        pjoin(check_directory(directory, 'terms'), 'test_inclusions.txt'), 'w')
    excl_file.write('need\nrequired')
    excl_file.close()

    excl_file = open(
        pjoin(check_directory(directory, 'terms'), 'test_exclusions.txt'), 'w')
    excl_file.write('not\navoid')
    excl_file.close()
Beispiel #7
0
    def save_summary(self, directory=None):
        """Save out a summary of the collected words data.

        Parameters
        ----------
        directory : str or SCDB object, optional
            Folder or database object specifying the save location.
        """

        directory = check_directory(directory, 'summary')

        with open(os.path.join(directory, check_ext(self.label, '.json')), 'w') as outfile:
            json.dump(self.summary, outfile)
Beispiel #8
0
    def save(self, directory=None):
        """Save out a json file with all attached data.

        Parameters
        ----------
        directory : str or SCDB, optional
            Folder or database object specifying the save location.
        """

        directory = check_directory(directory, 'raw')

        with open(os.path.join(directory, check_ext(self.label, '.json')), 'w') as outfile:
            json.dump({'term' : self.term}, outfile)
            outfile.write('\n')
            for art in self:
                json.dump(art, outfile)
                outfile.write('\n')
Beispiel #9
0
def load_api_key(file_name, directory=None, required=False):
    """"Load an API key from a file.

    Parameters
    ----------
    file_name : str
        Name of the file to load.
    directory : str or SCDB, optional
        Folder or database object specifying the location of the file to load.
    required : bool, optional, default: False
        Whether loading the API key file is required for continued execution.
        If True, this function will raise an error if the requested file is not found.
        If False, this function will return None if the file is not found.

    Returns
    -------
    api_key : str or None
        The loaded API key.

    Raises
    ------
    FileNotFoundError
        If the requested file. Only raised if `required` is True.

    Notes
    -----
    This function assumes the API key is in a single-line txt file.
    """

    file_path = os.path.join(check_directory(directory, 'base'),
                             check_ext(file_name, '.txt'))

    try:

        with open(file_path, 'r') as f_obj:
            api_key = f_obj.read().strip()

    except Exception as error:

        if required:
            raise
        else:
            api_key = None

    return api_key
Beispiel #10
0
def load_txt_file(file_name,
                  directory=None,
                  split_elements=True,
                  split_character=','):
    """Loads contents from a text file.

    Parameters
    ----------
    file_name : str
        Name of the file to load.
    directory : str or SCDB, optional
        Folder or database object specifying the location of the file to load.
    split_elements : bool, optional, default: True
        If True, splits elements within a single line.
    split_character : str, optional, default: ','
        The character to use to split elements within a line.

    Returns
    -------
    contents : list
        Data loaded from the file.
    """

    file_path = os.path.join(check_directory(directory, 'terms'),
                             check_ext(file_name, '.txt'))

    with open(file_path, 'r') as terms_file:

        text = terms_file.read()

        # If the last line is empty, it gets cut off due to no trailing content
        #   To make sure there is the correct number of lines, add a newline character
        if text.endswith('\n'):
            text = text + '\n'

        contents = text.splitlines()

    if split_elements:
        contents = [line.split(split_character) for line in contents]
        contents = [[string.strip() for string in temp] for temp in contents]

    else:
        contents = [string.strip() for string in contents]

    return contents
Beispiel #11
0
    def save_summary(self, directory=None):
        """Save out a summary of the collected words data.

        Parameters
        ----------
        directory : str or SCDB or None, optional
            Folder or database object specifying the save location.

        Examples
        --------
        Save a summary for a term, assuming an initialized ``ArticlesAll`` object with data::

        >>> articles_all.create_summary() # doctest:+SKIP
        >>> articles_all.save_summary() # doctest:+SKIP
        """

        directory = check_directory(directory, 'summary')

        with open(os.path.join(directory, check_ext(self.label, '.json')),
                  'w') as outfile:
            json.dump(self.summary, outfile)
Beispiel #12
0
def save_object(obj, file_name, directory=None):
    """Save a custom object as a pickle file.

    Parameters
    ----------
    obj : Counts or Words
        Object to save out.
    file_name : str
        Name for the file to be saved out.
    directory : str or SCDB, optional
        Folder or database object specifying the save location.

    Examples
    --------
    Save a :class:`~.Counts` object, using a temporary directory:

    >>> from tempfile import TemporaryDirectory
    >>> from lisc.objects import Counts
    >>> with TemporaryDirectory() as dirpath:
    ...     save_object(Counts(), 'counts.p', directory=dirpath)
    """

    # Import objects locally, to avoid circular imports
    from lisc.objects import Counts, Words

    # Set the save path based on object type
    if isinstance(obj, Counts):
        obj_type = 'counts'
    elif isinstance(obj, Words):
        obj_type = 'words'
    else:
        raise ValueError('Object type unclear - can not save.')

    file_path = os.path.join(check_directory(directory, obj_type),
                             check_ext(file_name, '.p'))

    with open(file_path, 'wb') as file_path:
        pickle.dump(obj, file_path)
Beispiel #13
0
def load_terms_file(f_name, directory=None):
    """Loads terms from a text file.

    Parameters
    ----------
    f_name : str
        Name of the file to load.
    directory : str or SCDB object, optional
        Folder or database object specifying the save location.

    Returns
    -------
    terms : list of list of str
        Data from the file.
    """

    terms_file = open(
        os.path.join(check_directory(directory, 'terms'),
                     check_ext(f_name, '.txt')), 'r')
    terms = terms_file.read().splitlines()
    terms = [term.split(',') for term in terms]

    return terms