Exemple #1
0
    def _check_term_consistency(self):
        """Check if the loaded terms and inclusions/exclusions are consistent size."""

        if self.inclusions and self.n_terms != len(self.inclusions):
            raise InconsistentDataError(
                'Mismatch in number of inclusions and terms!')

        if self.exclusions and self.n_terms != len(self.exclusions):
            raise InconsistentDataError(
                'Mismatch in number of exclusions and terms!')
Exemple #2
0
    def _check_term_consistency(self):
        """Check if loaded term definitions are consistent."""

        if self.n_terms != len(self.inclusions):
            raise InconsistentDataError(
                'There is a mismatch in number of inclusions and terms.')

        if self.n_terms != len(self.exclusions):
            raise InconsistentDataError(
                'There is a mismatch in number of exclusions and terms.')

        if self.n_terms != len(self._labels):
            raise InconsistentDataError(
                'There is a mismatch in number of labels and terms.')
def load_pickle_obj(f_name, db=None):
    """Load a custom object, from a pickle file, for SCANR project.

    Parameters
    ----------
    f_name : str
        File name of the object to be loaded.
    db : SCDB object, optional
        Database object for the SCANR project.
    """

    # Check for database object, initialize if not provided
    db = check_db(db)

    # Get all available files, for Count and Words pickled objects
    counts_objs = os.listdir(db.counts_path)
    words_objs = os.listdir(db.words_path)

    # Search for object in saved Count files, and set path if found
    if f_name + '.p' in counts_objs:
        load_path = os.path.join(db.counts_path, f_name + '.p')

    # Search for object in saved Words files, and set path if found
    elif f_name + '.p' in words_objs:
        load_path = os.path.join(db.words_path, f_name + '.p')

    # Raise an error if the file name is not found
    else:
        raise InconsistentDataError('Can not find requested file name.')

    # Load and return the data
    return pickle.load(open(load_path, 'rb'))
Exemple #4
0
    def _check_labels(self):
        """Check loaded terms and labels, and set None labels if needed."""

        if self.has_terms and (not self._labels
                               or self._labels == [None] * len(self._labels)):
            self._set_none_labels()

        if not len(self.labels) == len(set(self.labels)):
            raise InconsistentDataError(
                'Not all labels are unique. Labels must be unique.')
    def check_args(self, args_to_use):
        """Checks whether the requested arguments are defined, so that they can be used.

        Parameters
        ----------
        args_to_use : list of str
            Requested arguments to check that they are defined.
        """

        # Check that all requested arguments are available. Catch and raise custom error if not
        try:
            [self.args[arg] for arg in args_to_use]
        except KeyError:
            raise InconsistentDataError('Not all requested settings provided - can not proceed.')
Exemple #6
0
    def _check_results(self):
        """Check for consistency in extracted results.

        Notes
        -----
        If everything worked, each data field (ids, titles, words, etc)
        should have the same length, equal to the number of articles.
        Some entries may be blank (missing data), but if the lengths are not
        the same then the data does not line up and something went wrong.
        """

        # Check that all data fields have length n_articles
        if not (self.n_articles == len(self.ids) == len(self.titles)
                == len(self.words) == len(self.journals) == len(self.authors)
                == len(self.keywords) == len(self.years) == len(self.dois)):

            raise InconsistentDataError('Words data is inconsistent.')
Exemple #7
0
    def set_exclusions_file(self):
        """Load exclusion words from a txt file."""

        # Unload previous terms if some are already loaded
        self.unload_exclusions()

        # Get exclusion words from module data file
        exclusions = _terms_load_file('exclusions')

        # Check that the number of exclusions matches n_terms
        if len(exclusions) != self.n_terms:
            raise InconsistentDataError(
                'Mismatch in number of exclusions and terms!')

        # Drop number indices for exclusions, and set as list
        for i in range(self.n_terms):
            self.exclusions.append(exclusions[i][3:].split(','))
Exemple #8
0
    def set_exclusions(self, exclusions):
        """Sets the given list of strings as exclusion words.

        Parameters
        ----------
        exclusions : list of str OR list of list of str
            List of exclusion words to be used.
        """

        # Unload previous terms if some are already loaded
        self.unload_exclusions()

        # Set given list as exclusion words
        for exclude in exclusions:
            self.exclusions.append(_check_type(exclude))

        # Check that the number of exclusions matches n_terms
        if len(exclusions) != self.n_terms:
            raise InconsistentDataError(
                'Mismatch in number of exclusions and terms!')
Exemple #9
0
    def check_results(self):
        """Check for consistencty in extracted results.

        If everything worked, each data field (ids, titles, words, years)
        should have the same length, equal to the number of articles.
        Some entries may be blank (missing data), but if the lengths are not
        the same then the data does not line up and cannot be trusted.
        """

        # Check that all data fields have length n_articles
        if not (self.n_articles == len(self.ids) == len(self.titles) == len(
                self.words) == len(self.journals) == len(self.authors) == len(
                    self.kws) == len(self.years) == len(self.months) == len(
                        self.dois)):

            # If not, print out error
            self.update_history('Failed Check')
            raise InconsistentDataError('term Words data is inconsistent.')

        # Update history
        self.update_history('Passed Check')
def save_pickle_obj(obj, f_name, db=None):
    """Save a custom object from LISC as a pickle file.

    Parameters
    ----------
    obj : {Counts() object, Words() object}
        LISC custom object to save out.
    f_name : str
        Name to append to saved out file name.
    db : SCDB() object, optional
        Database object for the LISC project.
    """

    # Check for database object, initialize if not provided
    db = check_db(db)

    # If it's a Counts object, set path and name
    if isinstance(obj, Count):
        save_name = f_name + '_counts.p'
        save_path = db.counts_path

    # If it's a Words object, set path and name
    elif isinstance(obj, Words):
        save_name = f_name + '_words.p'
        save_path = db.words_path

    # If neither, raise error as object type is unclear
    else:
        raise InconsistentDataError('Object type unclear - can not save.')

    # Save out labels header file
    #with open(os.path.join(save_path, 'labels.txt'), 'w') as outfile:
    #    for label in obj.labels:
    #        outfile.write("%s\n" % label)

    # Save pickle file
    save_file = os.path.join(save_path, save_name)
    pickle.dump(obj, open(save_file, 'wb'))