Python ScaledFScore Examples

Programming Language: Python

Namespace/Package Name: scattertext.termscoring

Class/Type: ScaledFScore

Examples at hotexamples.com: 8

Python ScaledFScore - 8 examples found. These are the top rated real world Python examples of scattertext.termscoring.ScaledFScore extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

get_scores(4)

get_scores_for_category(2)

Example #1

Show file

 def _get_default_scores(self, category, df):
     category_column_name = category + ' freq'
     cat_word_counts = df[category_column_name]
     not_cat_word_counts = self._get_not_category_term_frequency(
         category_column_name, df)
     scores = ScaledFScore.get_scores(cat_word_counts, not_cat_word_counts)
     return scores

Example #2

Show file

 def _get_default_scores(self, category, other_categories, df):
     category_column_name = category + ' freq'
     cat_word_counts = df[category_column_name]
     not_cat_word_counts = df[[c + ' freq'
                               for c in other_categories]].sum(axis=1)
     scores = ScaledFScore.get_scores(cat_word_counts, not_cat_word_counts)
     return scores

Example #3

Show file

 def get_scaled_f_scores_vs_background(
         self,
         scaler_algo=DEFAULT_BACKGROUND_SCALER_ALGO,
         beta=DEFAULT_BACKGROUND_BETA):
     df = self.get_term_and_background_counts()
     df['Scaled f-score'] = ScaledFScore.get_scores_for_category(
         df['corpus'], df['background'], scaler_algo, beta)
     return df.sort_values(by='Scaled f-score', ascending=False)

Example #4

Show file

File: ScatterChart.py Project: udibr/scattertext

 def _get_default_scores(self, category, df):
     category_column_name = category + ' freq'
     cat_word_counts = df[category_column_name]
     not_cat_word_counts = df[[
         c for c in df.columns if c != category_column_name
     ]].sum(axis=1)
     scores = ScaledFScore.get_scores(cat_word_counts, not_cat_word_counts)
     return scores

Example #5

Show file

File: TermCategoryFrequencies.py Project: JasonKessler/scattertext

	def get_scaled_f_scores_vs_background(self,
	                                      scaler_algo=DEFAULT_BACKGROUND_SCALER_ALGO,
	                                      beta=DEFAULT_BACKGROUND_BETA):
		df = self.get_term_and_background_counts()
		df['Scaled f-score'] = ScaledFScore.get_scores_for_category(
			df['corpus'], df['background'], scaler_algo, beta
		)
		return df.sort_values(by='Scaled f-score', ascending=False)

Example #6

Show file

File: TermDocMatrixWithoutCategories.py Project: yangyang0477/scattertext

 def get_scaled_f_scores_vs_background(self,
                                       scaler_algo=DEFAULT_BACKGROUND_SCALER_ALGO,
                                       beta=DEFAULT_BACKGROUND_BETA):
     '''
     Parameters
     ----------
     scaler_algo : str
         see get_scaled_f_scores, default 'none'
     beta : float
       default 1.
     Returns
     -------
     pd.DataFrame of scaled_f_score scores compared to background corpus
     '''
     df = self.get_term_and_background_counts()
     df['Scaled f-score'] = ScaledFScore.get_scores_for_category(
         df['corpus'], df['background'], scaler_algo, beta
     )
     return df.sort_values(by='Scaled f-score', ascending=False)

Example #7

Show file

File: ScaledFScoreSignificance.py Project: zluckyhou/scattertext

	def get_p_vals(self, X):
		'''
		Imputes p-values from the Z-scores of `ScaledFScore` scores.  Assuming incorrectly
		that the scaled f-scores are normally distributed.

		Parameters
		----------
		X : np.array
			Array of word counts, shape (N, 2) where N is the vocab size.  X[:,0] is the
			positive class, while X[:,1] is the negative class.

		Returns
		-------
		np.array of p-values

		'''
		f_scores = ScaledFScore.get_scores(X[:,0], X[:,1], self.scaler_algo, self.beta)
		z_scores = (f_scores - np.mean(f_scores))/(np.std(f_scores)/np.sqrt(len(f_scores)))
		return norm.cdf(z_scores)

Example #8

Show file

File: TermDocMatrixWithoutCategories.py Project: JasonKessler/scattertext

 def get_scaled_f_scores_vs_background(self,
                                       scaler_algo=DEFAULT_BACKGROUND_SCALER_ALGO,
                                       beta=DEFAULT_BACKGROUND_BETA):
     '''
     Parameters
     ----------
     scaler_algo : str
         see get_scaled_f_scores, default 'none'
     beta : float
       default 1.
     Returns
     -------
     pd.DataFrame of scaled_f_score scores compared to background corpus
     '''
     df = self.get_term_and_background_counts()
     df['Scaled f-score'] = ScaledFScore.get_scores_for_category(
         df['corpus'], df['background'], scaler_algo, beta
     )
     return df.sort_values(by='Scaled f-score', ascending=False)