def _fit(self, n_components): """ This function fits the model to the data """ df = pd.read_table("fanalysis/tests/ca_data.txt", header=0, index_col=0, delimiter="\t") M = df.as_matrix() base = Base(n_components=n_components) base.fit(M) return base
def fit(self, X, y=None): """ Fit the model to X. Parameters ---------- X : array of string, int or float, shape (n_rows, n_vars) Training data, where n_rows in the number of rows and n_vars is the number of variables. X is a data table containing a category in each cell. Categories can be coded by strings or numeric values. y : None y is ignored. Returns ------- self : object Returns the instance itself. """ # Create a dummy variables table X_dummies = self._binarization(X) # Fit a Factorial Analysis to the dummy variables table self.r_ = np.sum(X_dummies, axis=1).reshape(-1, 1) Base.fit(self, X_dummies, y=None) # Adjustment of the number of components n_eigen = self.n_categories_ - self.n_vars_ if (self.n_components_ > n_eigen): self.n_components_ = n_eigen self.eig_ = self.eig_[:, :self.n_components_] self.row_coord_ = self.row_coord_[:, :self.n_components_] self.col_coord_ = self.col_coord_[:, :self.n_components_] if self.stats: self.row_contrib_ = self.row_contrib_[:, :self.n_components_] self.col_contrib_ = self.col_contrib_[:, :self.n_components_] self.row_cos2_ = self.row_cos2_[:, :self.n_components_] self.col_cos2_ = self.col_cos2_[:, :self.n_components_] # Set col_labels_short_ self.col_labels_short_ = self.col_labels_short_temp_ # Set col_labels_ self.col_labels_ = self.col_labels_temp_ self.model_ = "mca" return self
def _col_topandas_comparison(self, n_components=None, col_labels=False): """ This function compares the output of the col_topandas method with the R FactoMiner output """ df = pd.read_table("fanalysis/tests/ca_data.txt", header=0, index_col=0, delimiter="\t") M = df.as_matrix() if col_labels == False: labels = ["col" + str(x) for x in np.arange(0, M.shape[1])] base = Base(n_components=n_components, row_labels=None) else: labels = np.loadtxt("fanalysis/tests/ca_col_labels.txt", delimiter=" ", dtype=str) base = Base(n_components=n_components, col_labels=labels) df_Y = base.fit(M).col_topandas() Y = df_Y.as_matrix() df_Y_index = df_Y.index.values Y_col_coord_temp = Y[:, :base.n_components_] eigen_values = np.loadtxt("fanalysis/tests/ca_eig.txt", delimiter=" ", dtype=float) n_components = self._adjust_n_components(n_components, eigen_values) X_col_coord = np.loadtxt("fanalysis/tests/ca_col_coord.txt", delimiter=" ", dtype=float)[:, :n_components] X_col_contrib = np.loadtxt("fanalysis/tests/ca_col_contrib.txt", delimiter=" ", dtype=float)[:, :n_components] X_col_cos2 = np.loadtxt("fanalysis/tests/ca_col_cos2.txt", delimiter=" ", dtype=float)[:, :n_components] X = np.c_[X_col_coord, X_col_contrib, X_col_cos2] # test for data Y_col_coord = self._compute_Y(X_col_coord, Y_col_coord_temp, "col_coord_") Y[:, :base.n_components_] = Y_col_coord assert_array_almost_equal(X, Y) # test for col_labels assert_array_equal(labels, df_Y_index)
def transform(self, X, y=None): """ Apply the dimensionality reduction on X. X is projected on the first axes previous extracted from a training set. Parameters ---------- X : array of string, int or float, shape (n_rows_sup, n_vars) New data, where n_rows_sup is the number of supplementary row points and n_vars is the number of variables. X is a data table containing a category in each cell. Categories can be coded by strings or numeric values. X rows correspond to supplementary row points that are projected onto the axes. y : None y is ignored. Returns ------- X_new : array of float, shape (n_rows_sup, n_components_) X_new : coordinates of the projections of the supplementary row points onto the axes. """ # Build dummy variables for the supplementary rows table nrows = X.shape[0] #ncols = self.col_labels_.shape[0] ncols = len(self.col_labels_) Y = np.zeros(shape=(nrows, ncols)) for i in np.arange(0, nrows, 1): values = [self.prefixes_[k] + str(X[i, k]) for k in np.arange(0, self.n_vars_)] for j in np.arange(0, ncols, 1): if self.col_labels_[j] in values: Y[i, j] = 1 # Apply the transform method to Y return Base.transform(self, Y)
def _fit_transform_comparison(self, n_components=None): """ This function compares the result of the fit_transform operation with the R FactoMineR output """ if n_components is None: base1 = Base() base2 = Base() else: base1 = Base(n_components=n_components) base2 = Base(n_components=n_components) eigen_values = np.loadtxt("fanalysis/tests/ca_eig.txt", delimiter=" ", dtype=float) n_components = self._adjust_n_components(n_components, eigen_values) X = np.loadtxt("fanalysis/tests/ca_row_coord.txt", delimiter=" ", dtype=float)[:, :n_components] df = pd.read_table("fanalysis/tests/ca_data.txt", header=0, index_col=0, delimiter="\t") M = df.as_matrix() base1.fit(M) Y_temp_1 = base1.transform(M) Y1 = self._compute_Y(X, Y_temp_1, "row_coord_") assert_array_almost_equal(X, Y1) Y_temp_2 = base2.fit_transform(M) Y2 = self._compute_Y(X, Y_temp_2, "row_coord_") assert_array_almost_equal(X, Y2)
def __init__(self, n_components=None, row_labels=None, var_labels=None, stats=True): Base.__init__(self, n_components, row_labels, None, stats) self.var_labels = var_labels