def __init__(self, term_data, exclusions=[]): """Initialize ArticlesAll object. Parameters ---------- term_data : Articles object Data for all articles from a given search term. exclusions : list of str Words to exclude from the word collections. """ # Inherit from the BaseArticles object BaseArticles.__init__(self, term_data.term) # Copy over tracking of included IDs & DOIs self.ids = term_data.ids self.dois = term_data.dois # Get counts of authors, journals, years self.journals = count_elements([journal[0] for journal in term_data.journals]) self.years = count_elements(term_data.years) self.authors = _count_authors(term_data.authors) self.first_authors, self.last_authors = _count_end_authors(term_data.authors) # Convert lists of all words to frequency distributions exclusions = exclusions + self.term.search + self.term.inclusions temp_words = [convert_string(words) for words in term_data.words] self.words = self.create_freq_dist(combine_lists(temp_words), exclusions) self.keywords = self.create_freq_dist(combine_lists(term_data.keywords), exclusions) # Initialize summary dictionary self.summary = dict()
def _count_end_authors(authors): """Count first and last authors only. Parameters ---------- authors : list of list of tuple of (str, str, str, str) Authors, as (last name, first name, initials, affiliation). Returns ------- first_counts, last_counts : collections.Counter Number of publications for each first and last author. """ # Pull out the full name for the first & last author of each article # Last author is only considered if there is more than 1 author firsts = [auth[0] for auth in authors] f_names = [(author[0], author[2]) for author in firsts] lasts = [auth[-1] for auth in authors if len(auth) > 1] l_names = [(author[0], author[2]) for author in lasts] f_counts = count_elements(_fix_author_names(f_names)) l_counts = count_elements(_fix_author_names(l_names)) return f_counts, l_counts
def __init__(self, term_data, exclusions=None): """Initialize ArticlesAll object. Parameters ---------- term_data : Articles Data for all articles from a given search term. exclusions : list of str, optional Words to exclude from the word collections. Examples -------- Create an ``ArticlesAll`` object from an :class:`~.Articles` object: >>> from lisc.data import Articles >>> articles = Articles('frontal lobe') >>> articles_all = ArticlesAll(articles) """ # Inherit from the BaseArticles object BaseArticles.__init__(self, term_data.term) # Copy over tracking of included IDs & DOIs self.ids = term_data.ids self.dois = term_data.dois # Get counts of authors, journals, years self.journals = count_elements( [journal[0] for journal in term_data.journals]) self.years = count_elements(term_data.years) self.authors = _count_authors(term_data.authors) self.first_authors, self.last_authors = _count_end_authors( term_data.authors) # Convert lists of all words to frequency distributions exclusions = exclusions if exclusions else [] + self.term.search + self.term.inclusions temp_words = [convert_string(words) for words in term_data.words] self.words = self.create_freq_dist(combine_lists(temp_words), exclusions) self.keywords = self.create_freq_dist( combine_lists(term_data.keywords), exclusions) # Initialize summary dictionary self.summary = dict()
def __init__(self, articles, exclusions=None): """Initialize ArticlesAll object. Parameters ---------- articles : Articles Data for all articles from a given search term. exclusions : list of str, optional Words to exclude from the word collections. Examples -------- Create an ``ArticlesAll`` object from an :class:`~.Articles` object: >>> from lisc.data import Articles >>> articles = Articles('frontal lobe') >>> articles_all = ArticlesAll(articles) """ # Inherit from the BaseArticles object BaseArticles.__init__(self, articles.term) # Process the article data if not articles.processed: articles = process_articles(articles) # Set exclusions, copying input list, if given, and adding current search terms exclusions = list(set((deepcopy(exclusions) if exclusions else []) + \ [articles.term.label] + articles.term.search + articles.term.inclusions)) # Copy over tracking of included IDs & DOIs self.ids = articles.ids self.dois = articles.dois # Get frequency distributions of years, journals, authors self.years = count_elements(articles.years) self.journals = count_elements(articles.journals) self.first_authors = count_elements(\ auth[0] if auth else None for auth in articles.authors) self.last_authors = count_elements(\ auth[-1] if auth and len(auth) > 1 else None for auth in articles.authors) self.authors = count_elements(combine_lists(articles.authors)) # Convert lists of all words to frequency distributions self.words = count_elements(combine_lists(articles.words), exclusions) self.keywords = count_elements(combine_lists(articles.keywords), exclusions) # Initialize summary dictionary self.summary = dict()
def _count_authors(authors): """Count all authors. Parameters ---------- authors : list of list of tuple of (str, str, str, str) Authors, as (last name, first name, initials, affiliation). Returns ------- author_counts : collections.Counter Number of publications per author. """ # Reduce author fields to pair of tuples (last name, initials) & count # of publications per author all_authors = [(author[0], author[2]) for art_authors in authors for author in art_authors] author_counts = count_elements(_fix_author_names(all_authors)) return author_counts