def fit(self, df, fnames, by):
		self.cprobs, self.fnames = [], []
		for f in fnames:
			if not utility.is_categorical(df, f):
				raise ValueError(f+" must be categorical, use encoding or discretizing")
			cprob = pd.crosstab(df[by], df[f])
			cprob = cprob / cprob.sum(axis = 0)
			cprob = cprob.iloc[1, :]
			cprob.name = "%sIs%s_on_%s" % (by, cprob.name, f)
			self.cprobs.append(cprob)
			self.fnames.append(f)
		return self
def _extract_cprobs_by_biclass(df, fnames, by, copy = True):
	"""
	Depreciated - use BiClassProbabilityFeatureExtractor to better handle train and validate data 
	Extract conditional probability features based on binary class labels, 
	see Ref 1 chapter 6 for details. 
	df: DataFrame
	fnames: features to be extracted from - the features must be categorical or discretized numerical
	(call transformation.discretize_numerical for that)
	by: binary class labels (for multiple labels, use one-hot-encoding to get the cprobs-features separately)
	copy: whether copy dataframe or modify in place 
	"""
	result = df.copy() if copy else df 
	for f in fnames:
		if not utility.is_categorical(df, f):
			raise ValueError(f+" must be categorical, use encoding or discretizing")
		cprobs = pd.crosstab(df[by], df[f])
		cprobs = cprobs / cprobs.sum(axis = 0)
		cprobs = cprobs.iloc[1, :]
		cprobs.name = "%sIs%s_on_%s" % (by, cprobs.name, f)
		result = result.join(cprobs, on = f)
	return result
def find_categorical_features(df):
	return np.asarray([f for f in df.columns if is_categorical(df, f)])