def execute(self): alternative_names = self._X.columns.tolist() criterion_names = list(self._key_features.keys()) criteria = [MAX for i in criterion_names] weights = [ i / len(criterion_names) for i in range(len(criterion_names)) ] df = pd.DataFrame(self._key_features, index=alternative_names) data = Data(df.as_matrix(), criteria, weights, anames=df.index.tolist(), cnames=df.columns) #if self._verbose: #data.plot("radar"); dm1 = simple.WeightedSum() dm2 = simple.WeightedProduct() dm3 = closeness.TOPSIS() dec1 = dm1.decide(data) dec2 = dm2.decide(data) dec3 = dm3.decide(data) ranks = [dec1.rank_, dec2.rank_, dec3.rank_] self._ensemble_results = pd.DataFrame( { "TOPSIS": dec3.rank_, "WeightedSum": dec1.rank_, "WeightedProduct": dec2.rank_ }, index=df.index.tolist()) # Only keep features that our decision makers deemed in the top % specified num_features_requested = math.ceil( len(alternative_names) * self._featurePercentage) sum_ranks = sum(ranks) argmin_sorted = np.argpartition(sum_ranks, num_features_requested) self._kept_features = [] count = 0 for i in argmin_sorted: self._kept_features.append(alternative_names[i]) count += 1 if count >= num_features_requested: break if self._verbose: print("", self._featurePercentage * 100, " % -> (" + str(num_features_requested) + ") features kept.") print(self._kept_features) return self._ensemble_results, self._kept_features
def rank_label(df_lab, wgt): criteria_data = Data( df_lab.iloc[:, 1:5], # the pandas dataframe [MIN, MIN, MIN, MIN], # direction of goodness for each column anames = df_lab['Index'], # each entity's name, here car name cnames = df_lab.columns[1:5], # attribute/column name weights=wgt # weights for each attribute (optional) ) df_lab_copy = df_lab.copy() # weighted sum, sumNorm dm = simple.WeightedSum(mnorm="sum") #print(dm.tolist()) dec = dm.decide(criteria_data) #print(dec) df_lab_copy.loc[:, 'rank_weightedSum_sumNorm_inverse'] = dec.rank_ # weighted sum, maxNorm dm = simple.WeightedSum(mnorm="max") dec = dm.decide(criteria_data) df_lab_copy.loc[:, 'rank_weightedSum_maxNorm_inverse'] = dec.rank_ # weighted product, sumNorm dm = simple.WeightedProduct(mnorm="sum") dec = dm.decide(criteria_data) df_lab_copy.loc[:, 'rank_weightedProduct_sumNorm_inverse'] = dec.rank_ # weighted product, sumNorm dm = simple.WeightedProduct(mnorm="max") dec = dm.decide(criteria_data) df_lab_copy.loc[:, 'rank_weightedProduct_maxNorm_inverse'] = dec.rank_ # sort for better visualization df_lab_copy.sort_values(by=['rank_weightedSum_sumNorm_inverse'], inplace=True) return df_lab_copy
print("Generate a kernel of best alternatives?", dec.beta_solution_) print("Choose the best alternative?", dec.gamma_solution_) #The rank as numpy array (if this decision is a 𝛼-solution) print(dec.rank_) #The index of the row of the best alternative (if this decision is a 𝛾-solution) print(dec.best_alternative_, data.anames[dec.best_alternative_]) #And the kernel of the non supered alternatives (if this decision is a 𝛽-solution) # this return None because this # decision is not a beta-solution print(dec.kernel_) dm = simple.WeightedProduct() print(dm) dec = dm.decide(data) print(dec) #TOPSIS dm = closeness.TOPSIS() print(dm) dec = dm.decide(data) print(dec) #The TOPSIS add more information into the decision object. print(dec.e_) print("Ideal:", dec.e_.ideal)
def f1(): st.title("Fast Ranking") st.markdown("<description> Rapid ranking based on indicator data </description>", unsafe_allow_html = True) st.sidebar.title("") #--------------------------------------------------# # Upload data #--------------------------------------------------# st.markdown("## 1. Upload Your File", unsafe_allow_html = True) data_file = st.file_uploader("Supported Format: xlsx", type=["xlsx"]) # read data if data_file is not None: inputData = pd.read_excel(data_file, engine = "openpyxl") # show the head of data is_show_data = st.checkbox("Show content of your file?") st.markdown("The top 5 rows of your uploaded data", unsafe_allow_html = True) if is_show_data: st.write(inputData.head()) #--------------------------------------------------# # Settings #--------------------------------------------------# st.markdown("## ", unsafe_allow_html = True) st.markdown("## 2. Settings", unsafe_allow_html = True) numerics = ["int16", "int32", "int64", "float16", "float32", "float64"] numDF = inputData.select_dtypes(include = numerics) numCols = numDF.columns.tolist() # non-numerical columns idCols = [col for col in inputData.columns.tolist() if col not in numDF] #--------------------------------------------------# # Decide indicator relationship with the ranking #--------------------------------------------------# st.markdown("### Select Positive Indicators", unsafe_allow_html = True) # select positive indicators stPos = st.multiselect("Positive Indicator Columns", numCols) unselectedCols = [col for col in numCols if col not in stPos] # select negative indicators st.markdown("### Select Negative Indicators", unsafe_allow_html = True) stNeg = st.multiselect("Negative Indicator Columns", unselectedCols) #--------------------------------------------------# # collect information provided by user #--------------------------------------------------# criteria =[] selectedCols = [] for colName in numCols: if colName in stPos: criteria.append(max) selectedCols.append(colName) elif colName in stNeg: criteria.append(min) selectedCols.append(colName) else: pass print("criteria",criteria) print(selectedCols) #--------------------------------------------------# # Run Ranking Algorithm #--------------------------------------------------# st.markdown("## ", unsafe_allow_html = True) if len(selectedCols) > 0: st.markdown("## 3. Run Ranking Algorithm",unsafe_allow_html = True) st.markdown("## ",unsafe_allow_html = True) sortData = st.radio("Sort results by ranks?",("Yes", "No")) if st.button("Run"): # X X = inputData[selectedCols] # fill NA X.fillna(0,inplace=True) # scaling X min_max_scaler =MinMaxScaler() X_scaled_values = min_max_scaler.fit_transform(X.values) # format as dataframe X_scaled = pd.DataFrame(X_scaled_values, columns=X.columns) # prepare input data for ranking criteria_data = Data(X_scaled, criteria, cnames = X_scaled.columns) #------------------------------------# # ranking algorithm: WeightedProduct #------------------------------------# dp = simple.WeightedProduct() # run ranking algorithm dec_dp = dp.decide(criteria_data) #--------------------------------------------------# # Save results #--------------------------------------------------# # add ranking result back to selected data X.loc[:, "Rank_using_selected_columns"] = dec_dp.rank_ # add id cols back outputData = pd.concat([inputData[idCols].reset_index(drop=True), X], axis =1) # ask users whether sorting is needed if sortData == "Yes": st.write("Results are sorted by ranks") saveData =outputData.sort_values(by ="Rank_using_selected_columns") else: saveData = outputData #--------------------------------------------------# # Download results #--------------------------------------------------# st.markdown("## ", unsafe_allow_html = True) st.markdown("## 4. Download Results", unsafe_allow_html = True) st.markdown("The top 5 rows of results", unsafe_allow_html = True) st.write(saveData.head()) st.markdown(get_table_download_link(saveData), unsafe_allow_html=True)
xticklabels=attribute_names, fmt='.2g') ########### # print final ranking table with different multi criteria decision makers dm = simple.WeightedSum() dec = dm.decide(criteria_data) print(dec) print(dec.e_.points) ##print each rank's value print(dec.rank_) ##print ranks print("==============================") dm = simple.WeightedProduct() dec = dm.decide(criteria_data) print(dec) print(dec.e_.points) ##print each rank's value print(dec.rank_) ##print ranks print("==============================") dm = closeness.TOPSIS() dec = dm.decide(criteria_data) print(dec) print("Ideal:", dec.e_.ideal) ##print each rank's value print("Anti-Ideal:", dec.e_.anti_ideal) print("Closeness:", dec.e_.closeness) ######################
def flow(self, models_to_flow=[], params=None, test_size=0.2, nfolds=3, nrepeats=3, n_jobs=1, metrics=[], verbose=False, regressors=True, ensemble=False, featurePercentage=0.25): # Enforce parameters assert isinstance(nfolds, int), "nfolds must be integer" assert isinstance(nrepeats, int), "nrepeats must be integer" assert isinstance(n_jobs, int), "n_jobs must be integer" assert isinstance(verbose, bool), "verbosem ust be bool" assert isinstance(params, dict), "params must be a dict" assert isinstance(test_size, float), "test_size must be a float" assert isinstance(metrics, list), "model scoring must be a list" assert isinstance(regressors, bool), "regressor must be bool" assert isinstance(ensemble, bool), "ensemble must be bool" # Enforce logic for regressors #if regressors: # assert(not any(["c" in k.split("__") for k,v in params.items()]), "You selected classifiers with the regressors flag true. Comon\' man!") #else: # assert(not any(["r" in k.split("__") for k,v in params.items()]), "You selected regressors with the regressors flag false. Comon\' man!") self._nfolds = nfolds self._nrepeats = nrepeats self._n_jobs = n_jobs self._verbose = verbose self._allParams = params self._metrics = metrics self._test_size = test_size self._regressors = regressors self._ensemble = ensemble self._featurePercentage = featurePercentage # Inform the streamline to user. stringbuilder = "" for thing in models_to_flow: stringbuilder += thing stringbuilder += " --> " if self._verbose: if self._regressors: print("*************************") print("=> (Regressor) " + "=> Feature Selection Streamline: " + stringbuilder[:-5]) print("*************************") elif self._regressors == False: print("*************************") print("=> (Classifier) " + "=> Feature Selection Streamline: " + stringbuilder[:-5]) print("*************************") else: print( "Invalid model selected. Please set regressors=True or regressors=False." ) print def supportVectorRegression(): self._svr_params = {} for k, v in self._allParams.items(): if "svr" in k: self._svr_params[k] = v self._svr_params["svr__kernel"] = ['linear'] model = SupportVectorRegressorPredictiveModel( self._X_train, self._y_train, self._svr_params, self._nfolds, self._n_jobs, self._verbose) return abs(model.getBestEstimator().coef_.flatten()) def randomForestRegression(): self._rfr_params = {} for k, v in self._allParams.items(): if "rfr" in k: self._rfr_params[k] = v model = RandomForestRegressorPredictiveModel( self._X_train, self._y_train, self._rfr_params, self._nfolds, self._n_jobs, self._verbose) return abs(model.getBestEstimator().feature_importances_.flatten()) def adaptiveBoostingRegression(): self._abr_params = {} for k, v in self._allParams.items(): if "abr" in k: self._abr_params[k] = v model = AdaptiveBoostingRegressorPredictiveModel( self._X_train, self._y_train, self._abr_params, self._nfolds, self._n_jobs, self._verbose) return abs(model.getBestEstimator().feature_importances_.flatten()) def lassoRegression(): self._lasso_params = {} for k, v in self._allParams.items(): if "lasso" in k: self._lasso_params[k] = v model = LassoRegressorPredictiveModel(self._X_train, self._y_train, self._lasso_params, self._nfolds, self._n_jobs, self._verbose) return abs(model.getBestEstimator().coef_.flatten()) def elasticNetRegression(): self._enet_params = {} for k, v in self._allParams.items(): if "enet" in k: self._enet_params[k] = v model = ElasticNetRegressorPredictiveModel( self._X_train, self._y_train, self._enet_params, self._nfolds, self._n_jobs, self._verbose) return abs(model.getBestEstimator().coef_.flatten()) def mixed_selection(): if self._verbose: print("Executing: mixed_selection") X = self._X y = self._y initial_list = [] threshold_in_specified = False threshold_out_specified = False if "mixed_selection__threshold_in" in self._allParams.keys(): assert (isinstance( self._allParams["mixed_selection__threshold_in"], float), "threshold_in must be a float") threshold_in = self._allParams["mixed_selection__threshold_in"] threshold_in_specified = True else: threshold_in = 0.01 if "mixed_selection__threshold_out" in self._allParams.keys(): assert (isinstance( self._allParams["mixed_selection__threshold_out"], float), "threshold_out must be a float") threshold_out = self._allParams[ "mixed_selection__threshold_out"] threshold_out_specified = True else: threshold_out = 0.05 if "mixed_selection__verbose" in self._allParams.keys(): assert (isinstance(self._allParams["mixed_selection__verbose"], bool), "verbose must be a bool") verbose = self._allParams["mixed_selection__verbose"] else: verbose = False if threshold_in_specified and threshold_out_specified: assert ( threshold_in < threshold_out, "threshold in must be strictly less than the threshold out to avoid infinite looping." ) #initial_list = self._initial_list #threshold_in = self._threshold_in #threshold_out = self._threshold_out #verbse = self._verbose """ Perform a forward-backward feature selection based on p-value from statsmodels.api.OLS Arguments: X - pandas.DataFrame with candidate features y - list-like with the target initial_list - list of features to start with (column names of X) threshold_in - include a feature if its p-value < threshold_in threshold_out - exclude a feature if its p-value > threshold_out verbose - whether to print the sequence of inclusions and exclusions Returns: list of selected features Always set threshold_in < threshold_out to avoid infinite looping. See https://en.wikipedia.org/wiki/Stepwise_regression for the details """ included = list(initial_list) while True: changed = False # forward step excluded = list(set(X.columns) - set(included)) new_pval = pd.Series(index=excluded) for new_column in excluded: model = sm.OLS( y, sm.add_constant( pd.DataFrame(X[included + [new_column]]))).fit() new_pval[new_column] = model.pvalues[new_column] best_pval = new_pval.min() if best_pval < threshold_in: best_feature = new_pval.idxmin() #best_feature = new_pval.argmin() included.append(best_feature) changed = True if verbose: print('Adding {:30} with p-value {:.6}'.format( best_feature, best_pval)) # backward step model = sm.OLS(y, sm.add_constant(pd.DataFrame( X[included]))).fit() # use all coefs except intercept pvalues = model.pvalues.iloc[1:] worst_pval = pvalues.max() # null if pvalues is empty if worst_pval > threshold_out: changed = True worst_feature = pvalues.idxmax() #worst_feature = pvalues.argmax() included.remove(worst_feature) if verbose: print('Dropping {:30} with p-value {:.6}'.format( worst_feature, worst_pval)) if not changed: break new_included = [] for col in X.columns: if col in included: new_included.append(1) else: new_included.append(0) return new_included def partialLeastSquaresRegression(): if self._verbose: print("Executing: plsr") # The components are not helpful for this context. They might be for transformation, however. #if "plsr__n_components" in self._allParams.keys(): # n_components = self._allParams["plsr__n_components"] #else: # n_components = 2 pls_model = PLSRegression() pls_out = pls_model.fit(self._X, self._y) # The coefficients are used to show direction of the relationship return abs(pls_out.coef_.flatten()) ############################################ ########## Classifiers Start Here ########## ############################################ def adaptiveBoostingClassifier(): self._abc_params = {} for k, v in self._allParams.items(): if "abc" in k: self._abc_params[k] = v model = AdaptiveBoostingClassifierPredictiveModel( self._X_train, self._y_train, self._abc_params, self._nfolds, self._n_jobs, self._verbose) return model.getBestEstimator().feature_importances_.flatten() def randomForestClassifier(): self._rfc_params = {} for k, v in self._allParams.items(): if "rfc" in k: self._rfc_params[k] = v model = RandomForestClassifierPredictiveModel( self._X_train, self._y_train, self._rfc_params, self._nfolds, self._n_jobs, self._verbose) return model.getBestEstimator().feature_importances_.flatten() def supportVectorClassifier(): self._svc_params = {} for k, v in self._allParams.items(): if "svc" in k: self._svc_params[k] = v self._svc_params["svc__kernel"] = ['linear'] model = SupportVectorClassifierPredictiveModel( self._X_train, self._y_train, self._svc_params, self._nfolds, self._n_jobs, self._verbose) coefs = model.getBestEstimator().coef_ prods = coefs[0, :] for i in range(1, len(coefs)): prods = np.multiply(prods, coefs[i, :]) return abs(prods) # Valid regressors regression_options = { "mixed_selection": mixed_selection, "svr": supportVectorRegression, "rfr": randomForestRegression, "abr": adaptiveBoostingRegression, "lasso": lassoRegression, "enet": elasticNetRegression, "plsr": partialLeastSquaresRegression } # Valid classifiers classification_options = { 'abc': adaptiveBoostingClassifier, 'rfc': randomForestClassifier, 'svc': supportVectorClassifier } # Define return dictionary return_dict = {} # Train test split self._X_train, self._X_test, self._y_train, self._y_test = train_test_split( self._X, self._y, test_size=self._test_size) # Wrapper models self._key_features = {} if self._regressors: for key in models_to_flow: self._key_features[key] = regression_options[key]() elif self._regressors == False: for key in models_to_flow: self._key_features[key] = classification_options[key]() else: print( "Invalid model type. Please set regressors=True or regressors=False." ) print if self._verbose: print return_dict['feature_importances'] = self._key_features self._ensemble_results = None self._kept_features = None if self._ensemble: alternative_names = self._X.columns.tolist() criterion_names = list(self._key_features.keys()) criteria = [MAX for i in criterion_names] weights = [ i / len(criterion_names) for i in range(len(criterion_names)) ] df = pd.DataFrame(self._key_features, index=alternative_names) data = Data(df.as_matrix(), criteria, weights, anames=df.index.tolist(), cnames=df.columns) #if self._verbose: #data.plot("radar"); dm1 = simple.WeightedSum() dm2 = simple.WeightedProduct() dm3 = closeness.TOPSIS() dec1 = dm1.decide(data) dec2 = dm2.decide(data) dec3 = dm3.decide(data) self._ensemble_results = pd.DataFrame( { "TOPSIS": dec3.rank_, "WeightedSum": dec1.rank_, "WeightedProduct": dec2.rank_ }, index=df.index.tolist()) # Only keep features that our decision makers deemed in the top % specified num_features_requested = math.ceil( len(alternative_names) * self._featurePercentage) ranks = dec1.rank_ + dec2.rank_ + dec3.rank_ argmin_sorted = np.argpartition(ranks, num_features_requested) self._kept_features = [] count = 0 for i in argmin_sorted: self._kept_features.append(alternative_names[i]) count += 1 if count >= num_features_requested: break print("", self._featurePercentage * 100, " % -> (" + str(num_features_requested) + ") features kept.") print(self._kept_features) # Print data with only those features return_dict['ensemble_results'] = self._ensemble_results return_dict['kept_features'] = self._kept_features return return_dict
def get_madm_concensus(Wijk=None, num_optimizers=100, data_shape=(10,5), batch_size=10, policy=np.average, verbose=False): # Get data from simulation if Wijk is None: Wijk, move_sequence = get_k_optimizations(num_optimizers=num_optimizers, data_shape=data_shape, batch_size=batch_size, verbose=verbose) # Construct alternative-space alternatives={} alternative_num=0 for i in range(Wijk[:,:,0].shape[0]): for j in range(Wijk[:,:,0].shape[1]): alternatives[alternative_num]=(i,j) alternative_num+=1 #print(alternatives) # Construct decision-matrix DM=np.empty((alternative_num,Wijk.shape[2])) for a,loc in alternatives.items(): for k in range(Wijk.shape[2]): DM[a,k]=Wijk[loc[0],loc[1],k] #print(DM) # Putting it all together alternative_names = [v for k,v in alternatives.items()] criterion_names = [k for k in range(Wijk.shape[2])] criteria = [MAX for i in criterion_names] weights = [1/len(criterion_names) for i in range(len(criterion_names))] df = pd.DataFrame(DM, index=alternative_names, columns=criterion_names) if verbose: print("Alternatives {}".format(alternative_names)) print("Criteria {}".format(criterion_names)) print("Weights {}".format(weights)) print("Decision Matrix {}".format(df)) # Execute MADM data = Data(df.as_matrix(), criteria, weights, anames=df.index.tolist(), cnames=df.columns ) # Execute on 3 decision makers dm1 = simple.WeightedSum() dm2 = simple.WeightedProduct() dm3 = closeness.TOPSIS() dec1 = dm1.decide(data) dec2 = dm2.decide(data) dec3 = dm3.decide(data) ranks=[dec1.rank_, dec2.rank_,dec3.rank_] results = pd.DataFrame({"TOPSIS":dec3.rank_, "WeightedSum":dec1.rank_, "WeightedProduct":dec2.rank_}, index=df.index.tolist()) if verbose: print("MADM Results: {}".format(results)) concensus_results=pd.DataFrame({"ConsensusRank":policy(results, axis=1)},index=results.index) rij=concensus_results.as_matrix().reshape(Wijk.shape[0],Wijk.shape[1]) rij_move_sequence=np.argmin(rij,axis=1) #if verbose: # print("rij {}".format(rij)) # print("rij_move_sequence {}".format(rij_move_sequence)) return rij, rij_move_sequence #wijk,_=get_k_optimizations(data=None, num_optimizers=5, data_shape=(10,5), batch_size=5, verbose=True) #rij, _=get_madm_concensus(Wijk=wijk, policy=np.average ) #print(rij) #print(_)