Esempio n. 1
0
    def __init__(self, term_data, exclusions=[]):
        """Initialize ArticlesAll object.

        Parameters
        ----------
        term_data : Articles object
            Data for all articles from a given search term.
        exclusions : list of str
            Words to exclude from the word collections.
        """

        # Inherit from the BaseArticles object
        BaseArticles.__init__(self, term_data.term)

        # Copy over tracking of included IDs & DOIs
        self.ids = term_data.ids
        self.dois = term_data.dois

        # Get counts of authors, journals, years
        self.journals = count_elements([journal[0] for journal in term_data.journals])
        self.years = count_elements(term_data.years)
        self.authors = _count_authors(term_data.authors)
        self.first_authors, self.last_authors = _count_end_authors(term_data.authors)

        # Convert lists of all words to frequency distributions
        exclusions = exclusions + self.term.search + self.term.inclusions
        temp_words = [convert_string(words) for words in term_data.words]
        self.words = self.create_freq_dist(combine_lists(temp_words), exclusions)
        self.keywords = self.create_freq_dist(combine_lists(term_data.keywords), exclusions)

        # Initialize summary dictionary
        self.summary = dict()
Esempio n. 2
0
def _count_end_authors(authors):
    """Count first and last authors only.

    Parameters
    ----------
    authors : list of list of tuple of (str, str, str, str)
        Authors, as (last name, first name, initials, affiliation).

    Returns
    -------
    first_counts, last_counts : collections.Counter
        Number of publications for each first and last author.
    """

    # Pull out the full name for the first & last author of each article
    #  Last author is only considered if there is more than 1 author
    firsts = [auth[0] for auth in authors]
    f_names = [(author[0], author[2]) for author in firsts]

    lasts = [auth[-1] for auth in authors if len(auth) > 1]
    l_names = [(author[0], author[2]) for author in lasts]

    f_counts = count_elements(_fix_author_names(f_names))
    l_counts = count_elements(_fix_author_names(l_names))

    return f_counts, l_counts
Esempio n. 3
0
    def __init__(self, term_data, exclusions=None):
        """Initialize ArticlesAll object.

        Parameters
        ----------
        term_data : Articles
            Data for all articles from a given search term.
        exclusions : list of str, optional
            Words to exclude from the word collections.

        Examples
        --------
        Create an ``ArticlesAll`` object from an :class:`~.Articles` object:

        >>> from lisc.data import Articles
        >>> articles = Articles('frontal lobe')
        >>> articles_all = ArticlesAll(articles)
        """

        # Inherit from the BaseArticles object
        BaseArticles.__init__(self, term_data.term)

        # Copy over tracking of included IDs & DOIs
        self.ids = term_data.ids
        self.dois = term_data.dois

        # Get counts of authors, journals, years
        self.journals = count_elements(
            [journal[0] for journal in term_data.journals])
        self.years = count_elements(term_data.years)
        self.authors = _count_authors(term_data.authors)
        self.first_authors, self.last_authors = _count_end_authors(
            term_data.authors)

        # Convert lists of all words to frequency distributions
        exclusions = exclusions if exclusions else [] + self.term.search + self.term.inclusions
        temp_words = [convert_string(words) for words in term_data.words]
        self.words = self.create_freq_dist(combine_lists(temp_words),
                                           exclusions)
        self.keywords = self.create_freq_dist(
            combine_lists(term_data.keywords), exclusions)

        # Initialize summary dictionary
        self.summary = dict()
Esempio n. 4
0
    def __init__(self, articles, exclusions=None):
        """Initialize ArticlesAll object.

        Parameters
        ----------
        articles : Articles
            Data for all articles from a given search term.
        exclusions : list of str, optional
            Words to exclude from the word collections.

        Examples
        --------
        Create an ``ArticlesAll`` object from an :class:`~.Articles` object:

        >>> from lisc.data import Articles
        >>> articles = Articles('frontal lobe')
        >>> articles_all = ArticlesAll(articles)
        """

        # Inherit from the BaseArticles object
        BaseArticles.__init__(self, articles.term)

        # Process the article data
        if not articles.processed:
            articles = process_articles(articles)

        # Set exclusions, copying input list, if given, and adding current search terms
        exclusions = list(set((deepcopy(exclusions) if exclusions else []) + \
            [articles.term.label] + articles.term.search + articles.term.inclusions))

        # Copy over tracking of included IDs & DOIs
        self.ids = articles.ids
        self.dois = articles.dois

        # Get frequency distributions of years, journals, authors
        self.years = count_elements(articles.years)
        self.journals = count_elements(articles.journals)
        self.first_authors = count_elements(\
            auth[0] if auth else None for auth in articles.authors)
        self.last_authors = count_elements(\
            auth[-1] if auth and len(auth) > 1 else None for auth in articles.authors)
        self.authors = count_elements(combine_lists(articles.authors))

        # Convert lists of all words to frequency distributions
        self.words = count_elements(combine_lists(articles.words), exclusions)
        self.keywords = count_elements(combine_lists(articles.keywords),
                                       exclusions)

        # Initialize summary dictionary
        self.summary = dict()
Esempio n. 5
0
def _count_authors(authors):
    """Count all authors.

    Parameters
    ----------
    authors : list of list of tuple of (str, str, str, str)
        Authors, as (last name, first name, initials, affiliation).

    Returns
    -------
    author_counts : collections.Counter
        Number of publications per author.
    """

    # Reduce author fields to pair of tuples (last name, initials) & count # of publications per author
    all_authors = [(author[0], author[2]) for art_authors in authors for author in art_authors]
    author_counts = count_elements(_fix_author_names(all_authors))

    return author_counts