def run_collection(self, db='pubmed', field='TIAB', api_key=None, logging=None, directory=None, verbose=False): """Collect co-occurrence data. Parameters ---------- db : str, optional, default: 'pubmed' Which database to access from EUtils. field : str, optional, default: 'TIAB' Field to search for term in. Defaults to 'TIAB', which is Title/Abstract. api_key : str, optional An API key for a NCBI account. logging : {None, 'print', 'store', 'file'}, optional What kind of logging, if any, to do for requested URLs. directory : str or SCDB object, optional Folder or database object specifying the save location. verbose : bool, optional, default: False Whether to print out updates. """ # Run single list of terms against themselves, in 'square' mode if not self.terms['B'].has_data: self.square = True self.counts, self.terms[ 'A'].counts, self.meta_data = collect_counts( terms_a=self.terms['A'].terms, inclusions_a=self.terms['A'].inclusions, exclusions_a=self.terms['A'].exclusions, db=db, field=field, api_key=api_key, logging=logging, directory=directory, verbose=verbose) # Run two different sets of terms else: self.square = False self.counts, term_counts, self.meta_data = collect_counts( terms_a=self.terms['A'].terms, inclusions_a=self.terms['A'].inclusions, exclusions_a=self.terms['A'].exclusions, terms_b=self.terms['B'].terms, inclusions_b=self.terms['B'].inclusions, exclusions_b=self.terms['B'].exclusions, db=db, field=field, api_key=api_key, logging=logging, directory=directory, verbose=verbose) self.terms['A'].counts, self.terms['B'].counts = term_counts
################################################################################################### # Import function to collect data, and helper functions to analyze co-occurrence data from lisc.collect import collect_counts from lisc.analysis.counts import compute_normalization, compute_association_index ################################################################################################### # Set some terms to search for terms_a = [['protein'], ['gene']] terms_b = [['heart'], ['lung']] ################################################################################################### # Collect co-occurrence data across a single list of terms coocs, term_counts, meta_dat = collect_counts(terms_a, db='pubmed', verbose=True) ################################################################################################### # Check how many articles were found for each combination print(coocs) ################################################################################################### # Print out how many articles found for each term for term, count in zip(terms_a, term_counts): print('{:12} : {}'.format(term[0], count)) ################################################################################################### # # When given a single set of terms, the function collects counts of each term
# Import function to collect data, and helper functions to analyze co-occurrence data import lisc from lisc.collect import collect_counts from lisc.analysis.counts import compute_normalization, compute_association_index # Set some terms to search for terms_a = [['Salmonella enterica'], ['Escherichia coli'],['Sus scrofa'],['H**o sapiens'],['Mus musculus']] # Collect 'counts' (co-occurrence data) - across a single list of terms coocs, term_counts, meta_dat = collect_counts(terms_a, db='nucleotide', verbose=True) # Check how many articles were found for each combination print(coocs) # Print out how many articles found for each term for term, count in zip(terms_a, term_counts): print('{:12} : {}'.format(term[0], count))
def run_collection(self, db='pubmed', field='TIAB', api_key=None, logging=None, directory=None, verbose=False, **eutils_kwargs): """Collect co-occurrence data. Parameters ---------- db : str, optional, default: 'pubmed' Which database to access from EUtils. field : str, optional, default: 'TIAB' Field to search for term in. Defaults to 'TIAB', which is Title/Abstract. api_key : str, optional An API key for a NCBI account. logging : {None, 'print', 'store', 'file'}, optional What kind of logging, if any, to do for requested URLs. directory : str or SCDB, optional Folder or database object specifying the save location. verbose : bool, optional, default: False Whether to print out updates. **eutils_kwargs Additional settings for the EUtils API. Examples -------- Collect co-occurrence data from added terms, across one set of terms: >>> counts = Counts() >>> counts.add_terms(['frontal lobe', 'temporal lobe', 'parietal lobe', 'occipital lobe']) >>> counts.run_collection() # doctest: +SKIP Collect co-occurrence data from added terms, across two sets of terms: >>> counts = Counts() >>> counts.add_terms(['frontal lobe', 'temporal lobe', 'parietal lobe', 'occipital lobe']) >>> counts.add_terms(['attention', 'perception', 'cognition'], dim='B') >>> counts.run_collection() # doctest: +SKIP """ # Run single list of terms against themselves, in 'square' mode if not self.terms['B'].has_terms: self.square = True self.counts, self.terms[ 'A'].counts, self.meta_data = collect_counts( terms_a=self.terms['A'].terms, inclusions_a=self.terms['A'].inclusions, exclusions_a=self.terms['A'].exclusions, labels_a=self.terms['A'].labels, db=db, field=field, api_key=api_key, logging=logging, directory=directory, verbose=verbose, **eutils_kwargs) # Run two different sets of terms else: self.square = False self.counts, term_counts, self.meta_data = collect_counts( terms_a=self.terms['A'].terms, inclusions_a=self.terms['A'].inclusions, exclusions_a=self.terms['A'].exclusions, labels_a=self.terms['A'].labels, terms_b=self.terms['B'].terms, inclusions_b=self.terms['B'].inclusions, exclusions_b=self.terms['B'].exclusions, labels_b=self.terms['B'].labels, db=db, field=field, api_key=api_key, logging=logging, directory=directory, verbose=verbose, **eutils_kwargs) self.terms['A'].counts, self.terms['B'].counts = term_counts