def check_data(self, data_type='counts', dim='A'): """Prints out the highest value count or score for each term. Parameters ---------- data_type : {'counts', 'score'} Which data type to use. dim : {'A', 'B'}, optional Which set of terms to check. """ if data_type not in ['counts', 'score']: raise ValueError('Data type not understood - can not proceed.') if data_type == 'score' and self.score.size == 0: raise ValueError('Score is not computed - can not proceed.') # Set up which direction to act across dat = getattr(self, data_type) if dim == 'A' else getattr( self, data_type).T alt = 'B' if dim == 'A' and not self.square else 'A' # Loop through each term, find maximally associated term and print out for term_ind, term in enumerate(self.terms[dim].labels): # Find the index of the most common association for current term assoc_ind = np.argmax(dat[term_ind, :]) print( "For {:{twd1}} the highest association is {:{twd2}} with {:{nwd}}" .format(wrap(term), wrap(self.terms[alt].labels[assoc_ind]), dat[term_ind, assoc_ind], twd1=get_max_length(self.terms[dim].labels, 2), twd2=get_max_length(self.terms[alt].labels, 2), nwd='>10.0f' if data_type == 'counts' else '06.3f'))
def check_data(self, data_type='counts', dim='A'): """Prints out the highest value count or score for each term. Parameters ---------- data_type : {'counts', 'score'} Which data type to use. dim : {'A', 'B'}, optional Which set of terms to check. Examples -------- Print the highest count for each term (assuming `counts` already has data): >>> counts.check_data() # doctest: +SKIP Print the highest score value for each term (assuming `counts` already has data): >>> counts.check_data(data_type='score') # doctest: +SKIP """ if not self.has_data: raise ValueError('No data is available - cannot proceed.') if data_type not in ['counts', 'score']: raise ValueError('Data type not understood - can not proceed.') if data_type == 'score': if self.score.size == 0: raise ValueError('Score is not computed - can not proceed.') if self.score_info['type'] == 'similarity': raise ValueError( 'Cannot check value counts for similarity score.') # Set up which direction to act across data = getattr(self, data_type) data = data.T if dim == 'B' else data alt = 'B' if dim == 'A' and not self.square else 'A' # Calculate widths for printing twd1 = get_max_length(self.terms[dim].labels, 2) twd2 = get_max_length(self.terms[alt].labels, 2) nwd = '>10.0f' if data_type == 'counts' else '06.3f' # Loop through each term, find maximally associated term and print out for term_ind, term in enumerate(self.terms[dim].labels): # Find the index of the most common association for current term assoc_ind = np.argmax(data[term_ind, :]) print( "For {:{twd1}} the highest association is {:{twd2}} with {:{nwd}}" .format(wrap(term), wrap(self.terms[alt].labels[assoc_ind]), data[term_ind, assoc_ind], twd1=twd1, twd2=twd2, nwd=nwd))
def check_counts(self, dim='A'): """Check how many articles were found for each term. Parameters ---------- dim : {'A', 'B'} Which set of terms to check. Examples -------- Print the number of articles found for each term (assuming `counts` already has data): >>> counts.check_counts() # doctest: +SKIP """ if not self.has_data: raise ValueError('No data is available - cannot proceed.') # Calculate widths for printing twd = get_max_length(self.terms[dim].labels, 2) nwd = get_max_length(self.terms[dim].counts) print("The number of documents found for each search term is:") for ind, term in enumerate(self.terms[dim].labels): print(" {:{twd}} - {:{nwd}.0f}".format( wrap(term), self.terms[dim].counts[ind], twd=twd, nwd=nwd))
def check_top(self, dim='A'): """Check the terms with the most articles. Parameters ---------- dim : {'A', 'B'}, optional Which set of terms to check. """ max_ind = np.argmax(self.terms[dim].counts) print("The most studied term is {} with {} articles.".format( wrap(self.terms[dim].labels[max_ind]), self.terms[dim].counts[max_ind]))
def check_counts(self, dim='A'): """Check how many articles were found for each term. Parameters ---------- dim : {'A', 'B'}, optional Which set of terms to check. """ print("The number of documents found for each search term is:") for ind, term in enumerate(self.terms[dim].labels): print(" {:{twd}} - {:{nwd}.0f}".format( wrap(term), self.terms[dim].counts[ind], twd=get_max_length(self.terms[dim].labels, 2), nwd=get_max_length(self.terms[dim].counts)))
def check_top(self, dim='A'): """Check the terms with the most articles. Parameters ---------- dim : {'A', 'B'}, optional Which set of terms to check. Examples -------- Print which term has the most articles (assuming `counts` already has data): >>> counts.check_top() # doctest: +SKIP """ max_ind = np.argmax(self.terms[dim].counts) print("The most studied term is {} with {} articles.".format( wrap(self.terms[dim].labels[max_ind]), self.terms[dim].counts[max_ind]))
def check_counts(self, dim='A'): """Check how many articles were found for each term. Parameters ---------- dim : {'A', 'B'}, optional Which set of terms to check. Examples -------- Print the number of articles found for each term (assuming `counts` already has data): >>> counts.check_counts() # doctest: +SKIP """ print("The number of documents found for each search term is:") for ind, term in enumerate(self.terms[dim].labels): print(" {:{twd}} - {:{nwd}.0f}".format( wrap(term), self.terms[dim].counts[ind], twd=get_max_length(self.terms[dim].labels, 2), nwd=get_max_length(self.terms[dim].counts)))