Beispiel #1
0
    def fit(self, BET, target, c=0.1):
        from artml.explore import stats
        row_indexes = list(BET.index)
        target_index = row_indexes.index(target)
        BET_features = BET.drop(target, axis=1)
        BET_features = BET_features.drop(target, axis=0)
        cov_features = stats.covariance(BET_features).values
        cov_target = stats.covariance(BET).values
        cov_target = cov_target[target_index]
        cov_target = np.delete(cov_target, target_index)
        I = np.identity(len(cov_features))
        inverse = (np.linalg.inv(cov_features) + c * I)
        Beta_array = np.matmul(inverse, cov_target)

        l = (len(BET))
        BET.reset_index(drop=True, inplace=True)
        x = BET.to_dict(orient='list')
        keys = list(x.keys())
        mean_target = (BET[target][keys.index(target)][1]
                       ) / BET[target][keys.index(target)][0]
        mean_X = []

        for i in range(len(BET_features) + 1):
            if i != keys.index(target):
                mean_X.append((BET[target][i][1]) / BET[target][i][0])

        self.Beta_array = Beta_array
        self.intercept_ = mean_target - np.matmul(Beta_array, mean_X)

        return (intercept_, Beta_array)
Beispiel #2
0
    def fit(self, BET, *targets):

        l = (len(BET))
        BET1 = BET
        BET1.reset_index(drop=True, inplace=True)
        x = BET1.to_dict(orient='list')
        keys = list(x.keys())
        mean_ = []
        prob_ = []

        for target in targets:
            mu = []
            for i in range(len(BET)):
                if keys[i] not in targets:
                    mu.append((BET[target][i][10]) / BET[target][i][6])
            prob_.append(math.log(BET[target][i][6]))
            mean_.append(mu)

        features = [x for x in BET1.columns if x not in targets]
        covaraince_ = stats.covariance(BET)
        BET_data = covaraince_.loc[features]
        BET_data = BET_data[features]
        covaraince_ = BET_data.as_matrix()

        self.prob_ = prob_
        self.mean_ = mean_
        self.covaraince_ = covaraince_
Beispiel #3
0
    def fit(self, BET): 
        """
        Principal component analysis (PCA) is a classical statistical method that uses an orthogonal transformation 
        to convert a set of observations of possibly correlated variables into a set of values of linearly uncorrelated variables 
        called principal components.

        Real time Principal components for datasets can be extracted from the ART-M covariance matrix equations.

        Examples
        --------
        PCA(Basic_Element_Table)

        This function returns eigen values & eigen vectors for the features in the Basic element table.
        """
        from artml.explore import stats
        cov = stats.covariance(BET)
        cov_mat  = cov.values
        eig_vals, eig_vecs = np.linalg.eig(cov_mat)

        print('Eigenvectors: \n%s' %eig_vecs)
        print('\nEigenvalues: \n%s' %eig_vals)

        # Make a list of (eigenvalue, eigenvector) tuples
        eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))]

        # Sort the (eigenvalue, eigenvector) tuples from high to low
        eig_pairs.sort(key=lambda x: x[0], reverse=True)

        # Visually confirm that the list is correctly sorted by decreasing eigenvalues
        print('\nEigenvalues in descending order:')
        for i in eig_pairs:
            print(i[0])