def get_ranking(): # Use a service account cred = credentials.Certificate('merlin-c4fa7-firebase-adminsdk-7cfbn-8323034d25.json') firebase_admin.initialize_app(cred) db = firestore.client() user_ref = db.collection(u'users') docs = user_ref.stream() users_data = users_data.loc[:, ['ID', 'skills_score', 'work_experience', 'rating', 'origin']] users_data.head(10) criteria_data = Data( users_data.iloc[:, 1:], # the pandas dataframe [MAX, MAX, MAX,MIN], # direction of goodness for each column anames = users_data['ID'], # each entity's name, here userId cnames = users_data.columns[1:], # attribute/column name # weights=[1,1,1,1,1] # weights for each attribute (optional) ) dm = simple.WeightedSum(mnorm="sum") dec = dm.decide(criteria_data) user_list = [] for doc in docs: return jsonify({user_list.append(doc.to_dict())})
def execute(self): alternative_names = self._X.columns.tolist() criterion_names = list(self._key_features.keys()) criteria = [MAX for i in criterion_names] weights = [ i / len(criterion_names) for i in range(len(criterion_names)) ] df = pd.DataFrame(self._key_features, index=alternative_names) data = Data(df.as_matrix(), criteria, weights, anames=df.index.tolist(), cnames=df.columns) #if self._verbose: #data.plot("radar"); dm1 = simple.WeightedSum() dm2 = simple.WeightedProduct() dm3 = closeness.TOPSIS() dec1 = dm1.decide(data) dec2 = dm2.decide(data) dec3 = dm3.decide(data) ranks = [dec1.rank_, dec2.rank_, dec3.rank_] self._ensemble_results = pd.DataFrame( { "TOPSIS": dec3.rank_, "WeightedSum": dec1.rank_, "WeightedProduct": dec2.rank_ }, index=df.index.tolist()) # Only keep features that our decision makers deemed in the top % specified num_features_requested = math.ceil( len(alternative_names) * self._featurePercentage) sum_ranks = sum(ranks) argmin_sorted = np.argpartition(sum_ranks, num_features_requested) self._kept_features = [] count = 0 for i in argmin_sorted: self._kept_features.append(alternative_names[i]) count += 1 if count >= num_features_requested: break if self._verbose: print("", self._featurePercentage * 100, " % -> (" + str(num_features_requested) + ") features kept.") print(self._kept_features) return self._ensemble_results, self._kept_features
def getBestRankedCandidate(nbToExtract, candidateSet, rulesSetsQuality, criteriaQuality): # print(str(candidateSet)) # print(str(rulesSetsCandidates)) tmpRulesSetsQuality = None criteria = [] idAttributesToRemove = [] for idCrit in range(len(criteriaQuality)): if criteriaQuality[idCrit] == '+': criteria.append(MAX) elif criteriaQuality[idCrit] == '-': criteria.append(MIN) else: idAttributesToRemove.append(idCrit) attributes = removeAttributeId(['Polarity', 'Diversity', 'Distancing', 'Surprise'], idAttributesToRemove) if len(attributes) < 4: tmpRulesSetsQuality = rulesSetsQuality rulesSetsQuality = [] for ruleQI in tmpRulesSetsQuality: newQI = removeAttributeId(ruleQI, idAttributesToRemove) rulesSetsQuality.append(newQI) print(str(attributes)) candidateIds = [i for i in range(len(candidateSet))] data = Data(rulesSetsQuality, criteria, anames=candidateIds, cnames=attributes) # apply a simple weighted sums method dm = simple.WeightedSum() res = dm.decide(data) current_ranking = res.rank_ # extract bests candidates if nbToExtract > len(current_ranking): nbToExtract = len(current_ranking) bestCandidates = [] for i in range(nbToExtract): bestCandidateId = list(current_ranking).index(i+1) bestCandidate = candidateSet[bestCandidateId] if tmpRulesSetsQuality is None: bestCandidateQI = rulesSetsQuality[bestCandidateId] else: bestCandidateQI = tmpRulesSetsQuality[bestCandidateId] bestCandidates.append((bestCandidate, bestCandidateQI)) return bestCandidates
def rank_label(df_lab, wgt): criteria_data = Data( df_lab.iloc[:, 1:5], # the pandas dataframe [MIN, MIN, MIN, MIN], # direction of goodness for each column anames = df_lab['Index'], # each entity's name, here car name cnames = df_lab.columns[1:5], # attribute/column name weights=wgt # weights for each attribute (optional) ) df_lab_copy = df_lab.copy() # weighted sum, sumNorm dm = simple.WeightedSum(mnorm="sum") #print(dm.tolist()) dec = dm.decide(criteria_data) #print(dec) df_lab_copy.loc[:, 'rank_weightedSum_sumNorm_inverse'] = dec.rank_ # weighted sum, maxNorm dm = simple.WeightedSum(mnorm="max") dec = dm.decide(criteria_data) df_lab_copy.loc[:, 'rank_weightedSum_maxNorm_inverse'] = dec.rank_ # weighted product, sumNorm dm = simple.WeightedProduct(mnorm="sum") dec = dm.decide(criteria_data) df_lab_copy.loc[:, 'rank_weightedProduct_sumNorm_inverse'] = dec.rank_ # weighted product, sumNorm dm = simple.WeightedProduct(mnorm="max") dec = dm.decide(criteria_data) df_lab_copy.loc[:, 'rank_weightedProduct_maxNorm_inverse'] = dec.rank_ # sort for better visualization df_lab_copy.sort_values(by=['rank_weightedSum_sumNorm_inverse'], inplace=True) return df_lab_copy
cnames=[ "Run", "HS", "Average", "BF", "Strikerate", "Centuries", "Fifties", "zeros", "Fours", "Sixes", "Maiden", "Runs_bowl", "Wickets", "Average_bowling", "Economical", "Strikerate_bowl", "4wickets", "5wickets" ]) #data.plot(); data.plot("box") #data.plot.violin(); #data.plot.radar(cmap="inferno", show_criteria=False); # first create the decision maker # (with the default hiper parameters) dm = simple.WeightedSum() print(dm) # Now lets decide the ranking dec = dm.decide(data) print(dec) print(dec.e_) print(dec.e_.points) print("Generate a ranking of alternatives?", dec.alpha_solution_) print("Generate a kernel of best alternatives?", dec.beta_solution_) print("Choose the best alternative?", dec.gamma_solution_) #The rank as numpy array (if this decision is a 𝛼-solution) print(dec.rank_)
def drv(weights, abc, climit, ntest, ntest_kwargs, alpha_norm, alpha_rank, njobs, agg_only_consensus): # PREPROCESS # determine numbers of parallel jobs njobs = joblib.cpu_count() if njobs is None else njobs # determine the normal test ntest = NORMAL_TESTS.get(ntest, ntest) ntest_kwargs = {} if ntest_kwargs is None else ntest_kwargs # number of participants & alternatives N, I = np.shape(abc[0]) # number of criteria J = len(abc) # placeholder to store the results results = {"N_": N, "I_": I, "J_": J} # WEIGHTS if np.ndim(weights) > 1: wresults = subproblem(mtx=weights, climit=climit, alpha_norm=alpha_norm, ntest=ntest, ntest_kwargs=ntest_kwargs) else: wresults = {} # copy weights results to the global results results.update({ "wmtx_": wresults.get("nproducts"), "wsst_": wresults.get("sst"), "wssw_": wresults.get("ssw"), "wssb_": wresults.get("ssb"), "wssu_": wresults.get("ssu"), "wivr_": wresults.get("ivr"), "wntest_sts_": wresults.get("ntest_sts"), "wntest_pvals_": wresults.get("ntest_pvals"), "wntest_reject_h0_": wresults.get("ntest_reject_h0"), "win_consensus_": wresults.get("in_consensus"), "weights_mean_": wresults.get("resume") }) # ALTERNATIVES with joblib.Parallel(n_jobs=njobs) as jobs: wresults = jobs( joblib.delayed(subproblem)(amtx, climit=climit, alpha_norm=alpha_norm, ntest=ntest, ntest_kwargs=ntest_kwargs) for amtx in abc) # copy alt results to the global results results.update({ "amtx_criteria_": tuple([r["nproducts"] for r in wresults]), "asst_": np.hstack([r["sst"] for r in wresults]), "assw_": np.hstack([r["ssw"] for r in wresults]), "assb_": np.hstack([r["ssb"] for r in wresults]), "assu_": np.hstack([r["ssu"] for r in wresults]), "aivr_": np.hstack([r["ivr"] for r in wresults]), "ain_consensus_": np.hstack([r["in_consensus"] for r in wresults]), "antest_sts_": np.vstack([r["ntest_sts"] for r in wresults]), "antest_pvals_": np.vstack([r["ntest_pvals"] for r in wresults]), "antest_reject_h0_": np.vstack([r["ntest_reject_h0"] for r in wresults]), "amtx_mean_": np.vstack([r["resume"] for r in wresults]) }) # CONSENSUS consensus = np.all(results["ain_consensus_"]) if consensus and results["weights_mean_"] is not None: consensus = consensus and results["win_consensus_"] results["consensus_"] = consensus # to global results # GLOBAL REJECT H0 reject_h0 = np.any(results["antest_reject_h0_"]) if not reject_h0 and results["wntest_reject_h0_"] is not None: reject_h0 = reject_h0 or np.any(results["wntest_reject_h0_"]) results["ntest_reject_h0_"] = reject_h0 # AGGREGATION if consensus or not agg_only_consensus: aggregator = simple.WeightedSum(mnorm="none", wnorm="none") criteria = [max] * J weights_mean = (1 if results["weights_mean_"] is None else results["weights_mean_"]) agg_m = aggregator.decide(results["amtx_mean_"].T, criteria=criteria, weights=weights_mean) with joblib.Parallel(n_jobs=1) as jobs: agg_p = jobs( joblib.delayed(run_aggregator)(idx=idx, mtxs=results["amtx_criteria_"], criteria=criteria, weights=results["wmtx_"], aggregator=aggregator) for idx in range(N)) agg_p = tuple(agg_p) with joblib.Parallel(n_jobs=1) as jobs: # rank verification ttest_results = jobs( joblib.delayed(rank_ttest_rel)( agg_p=agg_p, aidx=aidx, bidx=bidx) for aidx, bidx in it.combinations(range(I), 2)) ttest_size = len(ttest_results) rank_t, rank_p = np.empty(ttest_size), np.empty(ttest_size) for idx, r in enumerate(ttest_results): rank_t[idx] = r.statistic rank_p[idx] = r.pvalue rank_fdr = fdr_by(alpha=alpha_rank, pvals=rank_p, I=I) rank_results = rank_p < rank_fdr rank_results_resume = np.all(rank_results) else: agg_p, agg_m = None, None rank_t, rank_p, rank_fdr, rank_results = None, None, None, None rank_results_resume = False # to global results results["aggregation_criteria_"] = agg_p results["aggregation_mean_"] = agg_m results["rank_check_t_"] = rank_t results["rank_check_pval_"] = rank_p results["rank_check_fdr_"] = rank_fdr results["rank_check_results_"] = rank_results results["rank_check_results_resume_"] = rank_results_resume results["strict_preference_"] = (consensus and not reject_h0 and rank_results_resume) return results
def calculate(id): # fetch & preprocess project = mongo.db.projects.find_one_or_404({"id": id}) projectCharacteristics = {p["id"]: p for p in project["characteristics"]} cnames = [*projectCharacteristics] # fetch & preprocess characteristics cursor = mongo.db.characteristics.find({"id": {"$in": cnames}}) characteristics = {d["id"]: d for d in cursor} # fetch & preprocess methodchunks cursor = mongo.db.methodchunks.find( {"characteristics.id": { "$in": cnames }}) method_chunks = {} for document in cursor: document["characteristics"] = { d["id"]: d for d in document["characteristics"] } method_chunks[document["id"]] = document # create encoder from sklearn.preprocessing import OrdinalEncoder for cid, pc in projectCharacteristics.items(): if (pc["rule"] == "preference_list"): pass elif (pc["rule"] == "exact"): pass else: #maximum, minimum if (cid in characteristics): for cv in characteristics[cid]["characteristicValues"]: if (cv["ref"] == pc["ref"]): if (pc["rule"] == "maximum"): pc["value"] = list(reversed(cv["values"])) else: pc["value"] = cv["values"] break values = pc["value"] + ["N/A"] values.reverse() # ordinal values asc order (smallest to largest) enc = OrdinalEncoder(categories=[values]) enc.fit([[v] for v in values]) pc["encoder"] = enc # build mtx import pandas as pd df = pd.DataFrame([], columns=cnames) for mid, m in method_chunks.items(): obj = {} for cid, pc in projectCharacteristics.items(): if (cid in m["characteristics"]): if (pc["ref"] == m["characteristics"][cid]["ref"]): obj[cid] = m["characteristics"][cid]["value"] df = df.append(pd.Series(obj, index=df.columns, name=mid)) df.fillna("N/A", inplace=True) print(df) separator() if len(df.index.values) == 0: return "No match" # apply encoding encoded = df.copy() for key, value in encoded.items(): values = [ v if v in projectCharacteristics[key]["value"] else "N/A" for v in value ] encoded.loc[:, key] = projectCharacteristics[key]["encoder"].transform( [[v] for v in values]) encoded = encoded.loc[:, (encoded != 0).any(axis=0)] # print(encoded) # separator() # construct from skcriteria import Data, MAX from skcriteria.madm import simple, closeness optimal_senses = [] weights = [] for cid, pc in encoded.items(): optimal_senses.append(MAX) weights.append(projectCharacteristics[cid].get("weight", 1)) data = Data(encoded.values, optimal_senses, weights=weights, anames=encoded.index, cnames=encoded.columns) #print(data) #separator() # WeightedSum model = simple.WeightedSum(mnorm="vector", wnorm="sum") de = model.decide(data) print(de) separator() print(de.e_) print("Points:", de.e_.points) # TOPSIS model2 = closeness.TOPSIS(mnorm="vector", wnorm="sum") de2 = model2.decide(data) print(de2) separator() print(de2.e_) print("Ideal:", de2.e_.ideal) print("Anti-Ideal:", de2.e_.anti_ideal) print("Closeness:", de2.e_.closeness) # build response res = {} for cid, mc in method_chunks.items(): mc["characteristics"] = [c for cid, c in mc["characteristics"].items()] z = [{ "methodChunk": method_chunks[de._data._anames[i]], "score": de.e_.points[i], "rank": int(de._rank[i]) } for i in range(0, len(de.mtx))] z2 = [{ "methodChunk": method_chunks[de2._data._anames[i]], "score": de2.e_.closeness[i], "rank": int(de2._rank[i]) } for i in range(0, len(de2.mtx))] res["results"] = [{ "model": "WeightedSum", "values": sorted(z, key=lambda x: x["rank"]) }, { "model": "TOPSIS", "values": sorted(z2, key=lambda x: x["rank"]) }] for cid, pc in projectCharacteristics.items(): pc.pop("_id", None) pc["encoder"] = pc["encoder"].categories[0] project["characteristics"] = [ pc for cid, pc in projectCharacteristics.items() ] project.pop("_id", None) res["project"] = project # print(res) # tes = {} # tes["project"] = projectCharacteristics # tes["method_chunks"] = method_chunks # tes["characteristics"] = characteristics default = lambda o: f"<<non-serializable: {type(o).__qualname__}>>" result = json.loads(json_util.dumps(res, default=default)) return result
def plot_heatmap(logic): plot_datas = normalize_data(logic) patent_names = patent_data['patent_number'] attribute_names = patent_data.columns[1:] sns.heatmap(plot_datas, annot=True, yticklabels=patent_names, xticklabels=attribute_names, fmt='.2g') ########### # print final ranking table with different multi criteria decision makers dm = simple.WeightedSum() dec = dm.decide(criteria_data) print(dec) print(dec.e_.points) ##print each rank's value print(dec.rank_) ##print ranks print("==============================") dm = simple.WeightedProduct() dec = dm.decide(criteria_data) print(dec) print(dec.e_.points) ##print each rank's value print(dec.rank_) ##print ranks print("==============================")
def flow(self, models_to_flow=[], params=None, test_size=0.2, nfolds=3, nrepeats=3, n_jobs=1, metrics=[], verbose=False, regressors=True, ensemble=False, featurePercentage=0.25): # Enforce parameters assert isinstance(nfolds, int), "nfolds must be integer" assert isinstance(nrepeats, int), "nrepeats must be integer" assert isinstance(n_jobs, int), "n_jobs must be integer" assert isinstance(verbose, bool), "verbosem ust be bool" assert isinstance(params, dict), "params must be a dict" assert isinstance(test_size, float), "test_size must be a float" assert isinstance(metrics, list), "model scoring must be a list" assert isinstance(regressors, bool), "regressor must be bool" assert isinstance(ensemble, bool), "ensemble must be bool" # Enforce logic for regressors #if regressors: # assert(not any(["c" in k.split("__") for k,v in params.items()]), "You selected classifiers with the regressors flag true. Comon\' man!") #else: # assert(not any(["r" in k.split("__") for k,v in params.items()]), "You selected regressors with the regressors flag false. Comon\' man!") self._nfolds = nfolds self._nrepeats = nrepeats self._n_jobs = n_jobs self._verbose = verbose self._allParams = params self._metrics = metrics self._test_size = test_size self._regressors = regressors self._ensemble = ensemble self._featurePercentage = featurePercentage # Inform the streamline to user. stringbuilder = "" for thing in models_to_flow: stringbuilder += thing stringbuilder += " --> " if self._verbose: if self._regressors: print("*************************") print("=> (Regressor) " + "=> Feature Selection Streamline: " + stringbuilder[:-5]) print("*************************") elif self._regressors == False: print("*************************") print("=> (Classifier) " + "=> Feature Selection Streamline: " + stringbuilder[:-5]) print("*************************") else: print( "Invalid model selected. Please set regressors=True or regressors=False." ) print def supportVectorRegression(): self._svr_params = {} for k, v in self._allParams.items(): if "svr" in k: self._svr_params[k] = v self._svr_params["svr__kernel"] = ['linear'] model = SupportVectorRegressorPredictiveModel( self._X_train, self._y_train, self._svr_params, self._nfolds, self._n_jobs, self._verbose) return abs(model.getBestEstimator().coef_.flatten()) def randomForestRegression(): self._rfr_params = {} for k, v in self._allParams.items(): if "rfr" in k: self._rfr_params[k] = v model = RandomForestRegressorPredictiveModel( self._X_train, self._y_train, self._rfr_params, self._nfolds, self._n_jobs, self._verbose) return abs(model.getBestEstimator().feature_importances_.flatten()) def adaptiveBoostingRegression(): self._abr_params = {} for k, v in self._allParams.items(): if "abr" in k: self._abr_params[k] = v model = AdaptiveBoostingRegressorPredictiveModel( self._X_train, self._y_train, self._abr_params, self._nfolds, self._n_jobs, self._verbose) return abs(model.getBestEstimator().feature_importances_.flatten()) def lassoRegression(): self._lasso_params = {} for k, v in self._allParams.items(): if "lasso" in k: self._lasso_params[k] = v model = LassoRegressorPredictiveModel(self._X_train, self._y_train, self._lasso_params, self._nfolds, self._n_jobs, self._verbose) return abs(model.getBestEstimator().coef_.flatten()) def elasticNetRegression(): self._enet_params = {} for k, v in self._allParams.items(): if "enet" in k: self._enet_params[k] = v model = ElasticNetRegressorPredictiveModel( self._X_train, self._y_train, self._enet_params, self._nfolds, self._n_jobs, self._verbose) return abs(model.getBestEstimator().coef_.flatten()) def mixed_selection(): if self._verbose: print("Executing: mixed_selection") X = self._X y = self._y initial_list = [] threshold_in_specified = False threshold_out_specified = False if "mixed_selection__threshold_in" in self._allParams.keys(): assert (isinstance( self._allParams["mixed_selection__threshold_in"], float), "threshold_in must be a float") threshold_in = self._allParams["mixed_selection__threshold_in"] threshold_in_specified = True else: threshold_in = 0.01 if "mixed_selection__threshold_out" in self._allParams.keys(): assert (isinstance( self._allParams["mixed_selection__threshold_out"], float), "threshold_out must be a float") threshold_out = self._allParams[ "mixed_selection__threshold_out"] threshold_out_specified = True else: threshold_out = 0.05 if "mixed_selection__verbose" in self._allParams.keys(): assert (isinstance(self._allParams["mixed_selection__verbose"], bool), "verbose must be a bool") verbose = self._allParams["mixed_selection__verbose"] else: verbose = False if threshold_in_specified and threshold_out_specified: assert ( threshold_in < threshold_out, "threshold in must be strictly less than the threshold out to avoid infinite looping." ) #initial_list = self._initial_list #threshold_in = self._threshold_in #threshold_out = self._threshold_out #verbse = self._verbose """ Perform a forward-backward feature selection based on p-value from statsmodels.api.OLS Arguments: X - pandas.DataFrame with candidate features y - list-like with the target initial_list - list of features to start with (column names of X) threshold_in - include a feature if its p-value < threshold_in threshold_out - exclude a feature if its p-value > threshold_out verbose - whether to print the sequence of inclusions and exclusions Returns: list of selected features Always set threshold_in < threshold_out to avoid infinite looping. See https://en.wikipedia.org/wiki/Stepwise_regression for the details """ included = list(initial_list) while True: changed = False # forward step excluded = list(set(X.columns) - set(included)) new_pval = pd.Series(index=excluded) for new_column in excluded: model = sm.OLS( y, sm.add_constant( pd.DataFrame(X[included + [new_column]]))).fit() new_pval[new_column] = model.pvalues[new_column] best_pval = new_pval.min() if best_pval < threshold_in: best_feature = new_pval.idxmin() #best_feature = new_pval.argmin() included.append(best_feature) changed = True if verbose: print('Adding {:30} with p-value {:.6}'.format( best_feature, best_pval)) # backward step model = sm.OLS(y, sm.add_constant(pd.DataFrame( X[included]))).fit() # use all coefs except intercept pvalues = model.pvalues.iloc[1:] worst_pval = pvalues.max() # null if pvalues is empty if worst_pval > threshold_out: changed = True worst_feature = pvalues.idxmax() #worst_feature = pvalues.argmax() included.remove(worst_feature) if verbose: print('Dropping {:30} with p-value {:.6}'.format( worst_feature, worst_pval)) if not changed: break new_included = [] for col in X.columns: if col in included: new_included.append(1) else: new_included.append(0) return new_included def partialLeastSquaresRegression(): if self._verbose: print("Executing: plsr") # The components are not helpful for this context. They might be for transformation, however. #if "plsr__n_components" in self._allParams.keys(): # n_components = self._allParams["plsr__n_components"] #else: # n_components = 2 pls_model = PLSRegression() pls_out = pls_model.fit(self._X, self._y) # The coefficients are used to show direction of the relationship return abs(pls_out.coef_.flatten()) ############################################ ########## Classifiers Start Here ########## ############################################ def adaptiveBoostingClassifier(): self._abc_params = {} for k, v in self._allParams.items(): if "abc" in k: self._abc_params[k] = v model = AdaptiveBoostingClassifierPredictiveModel( self._X_train, self._y_train, self._abc_params, self._nfolds, self._n_jobs, self._verbose) return model.getBestEstimator().feature_importances_.flatten() def randomForestClassifier(): self._rfc_params = {} for k, v in self._allParams.items(): if "rfc" in k: self._rfc_params[k] = v model = RandomForestClassifierPredictiveModel( self._X_train, self._y_train, self._rfc_params, self._nfolds, self._n_jobs, self._verbose) return model.getBestEstimator().feature_importances_.flatten() def supportVectorClassifier(): self._svc_params = {} for k, v in self._allParams.items(): if "svc" in k: self._svc_params[k] = v self._svc_params["svc__kernel"] = ['linear'] model = SupportVectorClassifierPredictiveModel( self._X_train, self._y_train, self._svc_params, self._nfolds, self._n_jobs, self._verbose) coefs = model.getBestEstimator().coef_ prods = coefs[0, :] for i in range(1, len(coefs)): prods = np.multiply(prods, coefs[i, :]) return abs(prods) # Valid regressors regression_options = { "mixed_selection": mixed_selection, "svr": supportVectorRegression, "rfr": randomForestRegression, "abr": adaptiveBoostingRegression, "lasso": lassoRegression, "enet": elasticNetRegression, "plsr": partialLeastSquaresRegression } # Valid classifiers classification_options = { 'abc': adaptiveBoostingClassifier, 'rfc': randomForestClassifier, 'svc': supportVectorClassifier } # Define return dictionary return_dict = {} # Train test split self._X_train, self._X_test, self._y_train, self._y_test = train_test_split( self._X, self._y, test_size=self._test_size) # Wrapper models self._key_features = {} if self._regressors: for key in models_to_flow: self._key_features[key] = regression_options[key]() elif self._regressors == False: for key in models_to_flow: self._key_features[key] = classification_options[key]() else: print( "Invalid model type. Please set regressors=True or regressors=False." ) print if self._verbose: print return_dict['feature_importances'] = self._key_features self._ensemble_results = None self._kept_features = None if self._ensemble: alternative_names = self._X.columns.tolist() criterion_names = list(self._key_features.keys()) criteria = [MAX for i in criterion_names] weights = [ i / len(criterion_names) for i in range(len(criterion_names)) ] df = pd.DataFrame(self._key_features, index=alternative_names) data = Data(df.as_matrix(), criteria, weights, anames=df.index.tolist(), cnames=df.columns) #if self._verbose: #data.plot("radar"); dm1 = simple.WeightedSum() dm2 = simple.WeightedProduct() dm3 = closeness.TOPSIS() dec1 = dm1.decide(data) dec2 = dm2.decide(data) dec3 = dm3.decide(data) self._ensemble_results = pd.DataFrame( { "TOPSIS": dec3.rank_, "WeightedSum": dec1.rank_, "WeightedProduct": dec2.rank_ }, index=df.index.tolist()) # Only keep features that our decision makers deemed in the top % specified num_features_requested = math.ceil( len(alternative_names) * self._featurePercentage) ranks = dec1.rank_ + dec2.rank_ + dec3.rank_ argmin_sorted = np.argpartition(ranks, num_features_requested) self._kept_features = [] count = 0 for i in argmin_sorted: self._kept_features.append(alternative_names[i]) count += 1 if count >= num_features_requested: break print("", self._featurePercentage * 100, " % -> (" + str(num_features_requested) + ") features kept.") print(self._kept_features) # Print data with only those features return_dict['ensemble_results'] = self._ensemble_results return_dict['kept_features'] = self._kept_features return return_dict
def get_madm_concensus(Wijk=None, num_optimizers=100, data_shape=(10,5), batch_size=10, policy=np.average, verbose=False): # Get data from simulation if Wijk is None: Wijk, move_sequence = get_k_optimizations(num_optimizers=num_optimizers, data_shape=data_shape, batch_size=batch_size, verbose=verbose) # Construct alternative-space alternatives={} alternative_num=0 for i in range(Wijk[:,:,0].shape[0]): for j in range(Wijk[:,:,0].shape[1]): alternatives[alternative_num]=(i,j) alternative_num+=1 #print(alternatives) # Construct decision-matrix DM=np.empty((alternative_num,Wijk.shape[2])) for a,loc in alternatives.items(): for k in range(Wijk.shape[2]): DM[a,k]=Wijk[loc[0],loc[1],k] #print(DM) # Putting it all together alternative_names = [v for k,v in alternatives.items()] criterion_names = [k for k in range(Wijk.shape[2])] criteria = [MAX for i in criterion_names] weights = [1/len(criterion_names) for i in range(len(criterion_names))] df = pd.DataFrame(DM, index=alternative_names, columns=criterion_names) if verbose: print("Alternatives {}".format(alternative_names)) print("Criteria {}".format(criterion_names)) print("Weights {}".format(weights)) print("Decision Matrix {}".format(df)) # Execute MADM data = Data(df.as_matrix(), criteria, weights, anames=df.index.tolist(), cnames=df.columns ) # Execute on 3 decision makers dm1 = simple.WeightedSum() dm2 = simple.WeightedProduct() dm3 = closeness.TOPSIS() dec1 = dm1.decide(data) dec2 = dm2.decide(data) dec3 = dm3.decide(data) ranks=[dec1.rank_, dec2.rank_,dec3.rank_] results = pd.DataFrame({"TOPSIS":dec3.rank_, "WeightedSum":dec1.rank_, "WeightedProduct":dec2.rank_}, index=df.index.tolist()) if verbose: print("MADM Results: {}".format(results)) concensus_results=pd.DataFrame({"ConsensusRank":policy(results, axis=1)},index=results.index) rij=concensus_results.as_matrix().reshape(Wijk.shape[0],Wijk.shape[1]) rij_move_sequence=np.argmin(rij,axis=1) #if verbose: # print("rij {}".format(rij)) # print("rij_move_sequence {}".format(rij_move_sequence)) return rij, rij_move_sequence #wijk,_=get_k_optimizations(data=None, num_optimizers=5, data_shape=(10,5), batch_size=5, verbose=True) #rij, _=get_madm_concensus(Wijk=wijk, policy=np.average ) #print(rij) #print(_)