def get_optimal_candidates(self, candidates, criteria): names = self.candidate_names.iloc[candidates.index].values min_max = [] weights = [] columns = [] for c in criteria: if c.maximize: min_max.append(MAX) else: min_max.append(MIN) weights.append(c.weight) columns.append(c.index) self.data_topsis = Data(candidates[columns].as_matrix().tolist(), min_max, weights=weights, anames=names, cnames=columns) model = closeness.TOPSIS() choice = model.decide(self.data_topsis) return [candidates.index[int(choice.best_alternative_)] ] # Return must be a list
def leaderboard(): s_name = [] s_roll = [] name = [] clarity = [] Brightness = [] Pixel = [] Contrast = [] Resolution = [] Vignette = [] for i in user_data.find(): s_name.append(i['Name']) s_roll.append(i['Roll_No']) name.append(i['Image_Name']) clarity.append(float(i['clarity'])) Brightness.append(float(i['Brightness'])) Pixel.append(float(i['Pixel'])) Contrast.append(float(i['Contrast'])) Resolution.append(float(i['Resolution'])) Vignette.append(float(i['Vignette'])) df = pd.DataFrame({ 'image_name': name, 'clarity': clarity, 'Brightness': Brightness, 'Pixel': Pixel, 'Contrast': Contrast, 'Resolution': Resolution, 'Vignette': Vignette }) criteria = [MAX, MAX, MAX, MAX, MIN, MAX] ds = np.array(df) ds1 = ds[:, 1:] data = Data(ds1, criteria, weights=[ float(1.0) / 6, float(1.0) / 6, float(1.0) / 6, float(1.0) / 6, float(1.0) / 6, float(1.0) / 6 ], anames=ds[:, -1], cnames=[ "Brightness", "Contrast", "Pixel", "Resolution", "Vignette", "Clarity" ]) t = closeness.TOPSIS() dec = t.decide(data) rank = dec.rank_ y = rank.astype(np.int) topsis_score = dec.e_.closeness name = ds[:, 0] result = [s_name, s_roll, y, topsis_score] result = np.array(result) result = result.T final = result[result[:, 2].argsort()] return render_template("leaderboard.html", result=final)
def best_alternative(map_paths, delta_time): """计算最好的轨迹""" mtx = road_score(map_paths) mtx = np.column_stack((mtx, time_score(map_paths, delta_time), mode_score(map_paths))) criteria = [sk.MAX, sk.MAX, sk.MAX] data = sk.Data(mtx, criteria, cnames=['road', 'time', 'turn'], weights=[.4, .2, .4]) dm = closeness.TOPSIS() dec = dm.decide(data) return map_paths[dec.best_alternative_]
def perform_topsis(raw_data, survey_data): SITE_ROOT = os.path.dirname(os.path.realpath(__file__)) df = pd.read_csv(SITE_ROOT + "/data/cleaned_data.csv") matrix = [] ids = [] # for matrix: # overall weather diff avg | crimerate | nightlife score for school in raw_data['schools']: cur_id = school['id'] ids.append(cur_id) winter_temp = float(df.loc[df['UNITID'] == np.int64(int(cur_id)), 'WINTER_TAVG'].iloc[0]) spring_temp = float(df.loc[df['UNITID'] == np.int64(int(cur_id)), 'SPRING_TAVG'].iloc[0]) summer_temp = float(df.loc[df['UNITID'] == np.int64(int(cur_id)), 'SUMMER_TAVG'].iloc[0]) fall_temp = float(df.loc[df['UNITID'] == np.int64(int(cur_id)), 'FALL_TAVG'].iloc[0]) winter_diff = abs(float(survey_data['winter']) - winter_temp) spring_diff = abs(float(survey_data['spring']) - spring_temp) summer_diff = abs(float(survey_data['summer']) - summer_temp) fall_diff = abs(float(survey_data['fall']) - fall_temp) diff = (winter_diff + spring_diff + summer_diff + fall_diff) / 4.0 max_crimerate = float(df['CRIME_COUNT'].max()) crimerate = float(df.loc[df['UNITID'] == np.int64(int(cur_id)), 'CRIME_COUNT'].iloc[0]) matrix.append([ diff, crimerate / max_crimerate, get_bar_data(school['lat'], school['lon']) ]) #print ( matrix ) criteria = [MIN, MIN, MAX] data = Data(matrix, criteria, weights=[ float(raw_data['weather']['importance']), float(raw_data['crime']['importance']), float(raw_data['nightlife']['importance']) ], anames=ids, cnames=["weather", "crime", "nightlife"]) analysis = closeness.TOPSIS() res = analysis.decide(data) #print( res ) rank_list = res.rank_.tolist() sorted_ids = [None] * len(ids) for i in range(0, len(rank_list)): sorted_ids[rank_list[i] - 1] = int(ids[i]) #print( sorted_ids ) return sorted_ids
def execute(self): alternative_names = self._X.columns.tolist() criterion_names = list(self._key_features.keys()) criteria = [MAX for i in criterion_names] weights = [ i / len(criterion_names) for i in range(len(criterion_names)) ] df = pd.DataFrame(self._key_features, index=alternative_names) data = Data(df.as_matrix(), criteria, weights, anames=df.index.tolist(), cnames=df.columns) #if self._verbose: #data.plot("radar"); dm1 = simple.WeightedSum() dm2 = simple.WeightedProduct() dm3 = closeness.TOPSIS() dec1 = dm1.decide(data) dec2 = dm2.decide(data) dec3 = dm3.decide(data) ranks = [dec1.rank_, dec2.rank_, dec3.rank_] self._ensemble_results = pd.DataFrame( { "TOPSIS": dec3.rank_, "WeightedSum": dec1.rank_, "WeightedProduct": dec2.rank_ }, index=df.index.tolist()) # Only keep features that our decision makers deemed in the top % specified num_features_requested = math.ceil( len(alternative_names) * self._featurePercentage) sum_ranks = sum(ranks) argmin_sorted = np.argpartition(sum_ranks, num_features_requested) self._kept_features = [] count = 0 for i in argmin_sorted: self._kept_features.append(alternative_names[i]) count += 1 if count >= num_features_requested: break if self._verbose: print("", self._featurePercentage * 100, " % -> (" + str(num_features_requested) + ") features kept.") print(self._kept_features) return self._ensemble_results, self._kept_features
def calculate(priority, products): new_scores = {} scores = products['scores'] product_ids = list(scores.keys()) if len(product_ids) == 0: return first_product_id = product_ids[0] entity = list(scores[first_product_id].keys()) criteria = [MAX] * len(entity) matrix = Topsis.create_matrix(scores) priority = Topsis.reshape_priority(priority) data = Data(matrix, criteria, weights=priority, anames=product_ids, cnames=entity) dm = closeness.TOPSIS(mnorm="sum") decision = dm.decide(data) ranks = decision.rank_.tolist() for idx in range(len(ranks)): pid = product_ids[idx] new_scores[pid] = { 'pid': pid, 'rank': ranks[idx], 'attrs': scores[pid] } products['scores'] = new_scores return decision
#The index of the row of the best alternative (if this decision is a 𝛾-solution) print(dec.best_alternative_, data.anames[dec.best_alternative_]) #And the kernel of the non supered alternatives (if this decision is a 𝛽-solution) # this return None because this # decision is not a beta-solution print(dec.kernel_) dm = simple.WeightedProduct() print(dm) dec = dm.decide(data) print(dec) #TOPSIS dm = closeness.TOPSIS() print(dm) dec = dm.decide(data) print(dec) #The TOPSIS add more information into the decision object. print(dec.e_) print("Ideal:", dec.e_.ideal) print("Anti-Ideal:", dec.e_.anti_ideal) print("Closeness:", dec.e_.closeness) #Finally we can change the normalization criteria of the alternative matric to sum (divide every value by the sum opf #their criteria) and check the result: #dm = closeness.TOPSIS(mnorm="sum") dm = closeness.TOPSIS()
def main(): # # Read the data # matrix = [] names = [] properties = [] read_data('data_with_missing_values.csv', matrix, names, properties) # # complete missing data # matrix = complete_data(matrix) # # Prepare the weights # # properties = ['ScreenSize', 'PrimaryCamera', 'SecondaryCamera', 'RAM', 'Battery', 'Memory', 'SDSlot', 'TalkTime', # 'Price', 'Announced', 'VoiceControl', 'SoundSpeaker', 'Weight', 'PhysicalKeyboard'] regular_users_weights = [ 0.07, 0.07, 0.05, 0.09, 0.11, 0.08, 0.06, 0.09, 0.12, 0.02, 0.05, 0.07, 0.08, 0.04 ] # sum(weights) = 1 children_weights = [ 0.06, 0.07, 0.05, 0.09, 0.09, 0.07, 0.04, 0.04, 0.25, 0.02, 0.06, 0.04, 0.09, 0.03 ] # sum(weights) = 1 photographers_weights = [ 0.1, 0.15, 0.12, 0.09, 0.06, 0.09, 0.07, 0.04, 0.09, 0.02, 0.02, 0.04, 0.07, 0.04 ] # sum(weights) = 1 buisness_man_weights = [ 0.1, 0.07, 0.06, 0.09, 0.02, 0.09, 0.07, 0.11, 0.1, 0.04, 0.09, 0.05, 0.02, 0.09 ] # sum(weights) = 1 travelers_weights = [ 0.08, 0.12, 0.1, 0.06, 0.11, 0.08, 0.07, 0.06, 0.01, 0.05, 0.07, 0.09, 0.07, 0.03 ] # sum(weights) = 1 groups_weights = [ regular_users_weights, children_weights, photographers_weights, buisness_man_weights, travelers_weights ] validate_weight_groups(groups_weights) # # Calculate the balance for the best result, you can mix from all the groups with the wieght values # # balance vector between the groups balance_vector = [0, 0, 0, 0, 1] validate_balance_vector(balance_vector) # # compute the balanced weights # weights = [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] for i in range(len(weights)): st = 0 for j in range(len(balance_vector)): st += balance_vector[j] * groups_weights[j][i] weights[i] = st # # criteria # # criteria for what is good value min or max criteria = [1, 1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, -1, 1] # -1 -> minimum is best, 1 -> maximum is best # # run TOPSIS # rc = cl.TOPSIS() rcc = rc.decide(matrix, criteria, weights) display_matrix = to_result_object(names, properties, rcc, weights, True) print(to_table_string(display_matrix))
def loop(self): """Periodic job.""" if self.__mcda_descriptor is not None and self.__active: # Step 1: creating structure to handle all metrics self.create_mcda_structure() # Step 2: Update WTP/LVAP association map self.update_wtp_association_map() # Step 3: for each criteria, get all metrics and populate structure for crr_criteria in self.__mcda_descriptor['criteria']: if crr_criteria == 'wtp_load_measured_mbps': if not self.get_wtp_load_measurements(): return elif crr_criteria == 'wtp_queue_delay_ms': if not self.get_wtp_queue_delay_measurements(): return elif crr_criteria == 'wtp_channel_load_rate': if not self.get_wtp_channel_load_measurements(): return elif crr_criteria == 'wtp_sta_rssi_dbm': if not self.get_lvap_rssi_measurements(): return elif crr_criteria == 'wtp_load_expected_mbps': self.initialize_wtp_load_expected() elif crr_criteria == 'sta_association_flag': self.get_sta_association_flag() # Step 4: get all flows from flow manager APP if self.get_flow_handler(): if self.__flow_handler['flows'] is not None: # Step 5: Compute WTP expected load if present in the criteria if 'wtp_load_expected_mbps' in self.__mcda_descriptor['criteria']: self.compute_wtp_load_expected_mbps() # Step 6: for each lvap in the network, get a decision using the TOPSIS method for lvap in self.lvaps(): crr_lvap_addr = str(lvap.addr) # Create MCDA structure mtx = [] wtp_addresses = [] for crr_wtp_addr in self.__mcda_handover_manager['wtps']: wtp_addresses.append(crr_wtp_addr) if crr_lvap_addr in self.__mcda_handover_manager['wtps'][crr_wtp_addr]['lvaps']: mtx.append( self.__mcda_handover_manager['wtps'][crr_wtp_addr]['lvaps'][crr_lvap_addr]['metrics'][ 'values']) # if any of the (active) flows in this LVAP is QoS, use QoS weights # otherwise, stick with the BE weights mcda_weights = self.__mcda_descriptor['weights_be'] if crr_lvap_addr in self.__flow_handler['lvap_flow_map']: if any(i in self.__flow_handler['lvap_flow_map'][crr_lvap_addr] for i in self.__flow_handler['qos_flows']): mcda_weights = self.__mcda_descriptor['weights_qos'] # Lists must have the same length data = Data(mtx, self.__mcda_targets, weights=mcda_weights, anames=wtp_addresses, cnames=self.__mcda_descriptor['criteria']) dm = closeness.TOPSIS() dec = dm.decide(data) best_alternative_wtp_addr = data.anames[dec.best_alternative_] if self.__db_monitor: for i in range(0, len(mtx)): closeness_list = dec.e_.closeness.tolist() ranks = dec.rank_.tolist() closeness_res = closeness_list[i] if math.isnan(closeness_res): closeness_res = None fields = ['LVAP_ADDR', 'WTP_ADDR'] + \ self.__mcda_descriptor['criteria'] + \ ['RANK', 'CLOSENESS'] values = [crr_lvap_addr, wtp_addresses[i]] + mtx[i] + [ranks[i], closeness_res] # Saving into db self.monitor.insert_into_db(table='mcda_results', fields=fields, values=values) # # TODO: Improve writing info... # f = open(self.__mcda_results_filename, 'w+') # f.write('Decision for LVAP: ' + crr_lvap_addr + '\n' + str( # dec) + '\nMove to WTP: ' + best_alternative_wtp_addr + '\n') # f.close() # Step 7: is handover needed? Do it and set the flag to 0 for all other blocks # (this could be improved, but get block with given address should be implemented) # Compute WTP expected load if present in the criteria if 'sta_association_flag' in self.__mcda_descriptor['criteria']: sta_association_index = self.__mcda_descriptor['criteria'].index('sta_association_flag') old_wtp_addr = None for block in self.blocks(): crr_wtp_addr = str(block.addr) if lvap.blocks[0] is not None: if crr_wtp_addr == best_alternative_wtp_addr: # Do handover to this block only if the station is not connected to it sta_crr_wtp_addr = str(lvap.blocks[0].addr) if sta_crr_wtp_addr != best_alternative_wtp_addr: self.log.info("Handover triggered!") old_wtp_addr = sta_crr_wtp_addr # Handover now.. lvap.blocks = block # and update metrics if 'sta_association_flag' in self.__mcda_descriptor['criteria']: self.__mcda_handover_manager['wtps'][crr_wtp_addr]['lvaps'][crr_lvap_addr]['metrics'][ 'values'][sta_association_index] = 1 elif 'sta_association_flag' in self.__mcda_descriptor['criteria']: self.__mcda_handover_manager['wtps'][crr_wtp_addr]['lvaps'][crr_lvap_addr]['metrics'][ 'values'][sta_association_index] = 0 # Recalculate WTP expected load on handover, if any... # OBS: not possible to access lvap.blocks[0] while performing handover if 'wtp_load_expected_mbps' in self.__mcda_descriptor['criteria']: if old_wtp_addr is not None: self.recalculate_wtp_load_expected_mbps(old_wtp_addr=old_wtp_addr, best_alternative_wtp_addr=best_alternative_wtp_addr, moving_lvap_addr=crr_lvap_addr) # Start considering association and expected load from now on... if self.__initial_association: self.__initial_association = False if self.__db_monitor is not None: fields = self.__mcda_descriptor['criteria'] + ['TYPE'] values = self.__mcda_descriptor['weights_qos'] + ['QoS'] # Saving into db self.monitor.insert_into_db(table='mcda_weights', fields=fields, values=values) fields = self.__mcda_descriptor['criteria'] + ['TYPE'] values = self.__mcda_descriptor['weights_be'] + ['BE'] # Saving into db self.monitor.insert_into_db(table='mcda_weights', fields=fields, values=values) # Keeping only the last measurements in db self.monitor.keep_last_measurements_only('mcda_association_stats') self.monitor.keep_last_measurements_only('mcda_results') self.monitor.keep_last_measurements_only('mcda_weights')
#The index of the row of the best alternative (if this decision is a 𝛾-solution) print(dec.best_alternative_, data.anames[dec.best_alternative_]) #And the kernel of the non supered alternatives (if this decision is a 𝛽-solution) # this return None because this # decision is not a beta-solution print(dec.kernel_) dm = simple.WeightedProduct() print(dm) dec = dm.decide(data) print(dec) #TOPSIS dm = closeness.TOPSIS() print(dm) dec = dm.decide(data) print(dec) #The TOPSIS add more information into the decision object. print(dec.e_) print("Ideal:", dec.e_.ideal) print("Anti-Ideal:", dec.e_.anti_ideal) print("Closeness:", dec.e_.closeness) #Finally we can change the normalization criteria of the alternative matric to sum (divide every value by the sum opf #their criteria) and check the result: #dm = closeness.TOPSIS(mnorm="sum") dm = closeness.TOPSIS()
def calculate(id): # fetch & preprocess project = mongo.db.projects.find_one_or_404({"id": id}) projectCharacteristics = {p["id"]: p for p in project["characteristics"]} cnames = [*projectCharacteristics] # fetch & preprocess characteristics cursor = mongo.db.characteristics.find({"id": {"$in": cnames}}) characteristics = {d["id"]: d for d in cursor} # fetch & preprocess methodchunks cursor = mongo.db.methodchunks.find( {"characteristics.id": { "$in": cnames }}) method_chunks = {} for document in cursor: document["characteristics"] = { d["id"]: d for d in document["characteristics"] } method_chunks[document["id"]] = document # create encoder from sklearn.preprocessing import OrdinalEncoder for cid, pc in projectCharacteristics.items(): if (pc["rule"] == "preference_list"): pass elif (pc["rule"] == "exact"): pass else: #maximum, minimum if (cid in characteristics): for cv in characteristics[cid]["characteristicValues"]: if (cv["ref"] == pc["ref"]): if (pc["rule"] == "maximum"): pc["value"] = list(reversed(cv["values"])) else: pc["value"] = cv["values"] break values = pc["value"] + ["N/A"] values.reverse() # ordinal values asc order (smallest to largest) enc = OrdinalEncoder(categories=[values]) enc.fit([[v] for v in values]) pc["encoder"] = enc # build mtx import pandas as pd df = pd.DataFrame([], columns=cnames) for mid, m in method_chunks.items(): obj = {} for cid, pc in projectCharacteristics.items(): if (cid in m["characteristics"]): if (pc["ref"] == m["characteristics"][cid]["ref"]): obj[cid] = m["characteristics"][cid]["value"] df = df.append(pd.Series(obj, index=df.columns, name=mid)) df.fillna("N/A", inplace=True) print(df) separator() if len(df.index.values) == 0: return "No match" # apply encoding encoded = df.copy() for key, value in encoded.items(): values = [ v if v in projectCharacteristics[key]["value"] else "N/A" for v in value ] encoded.loc[:, key] = projectCharacteristics[key]["encoder"].transform( [[v] for v in values]) encoded = encoded.loc[:, (encoded != 0).any(axis=0)] # print(encoded) # separator() # construct from skcriteria import Data, MAX from skcriteria.madm import simple, closeness optimal_senses = [] weights = [] for cid, pc in encoded.items(): optimal_senses.append(MAX) weights.append(projectCharacteristics[cid].get("weight", 1)) data = Data(encoded.values, optimal_senses, weights=weights, anames=encoded.index, cnames=encoded.columns) #print(data) #separator() # WeightedSum model = simple.WeightedSum(mnorm="vector", wnorm="sum") de = model.decide(data) print(de) separator() print(de.e_) print("Points:", de.e_.points) # TOPSIS model2 = closeness.TOPSIS(mnorm="vector", wnorm="sum") de2 = model2.decide(data) print(de2) separator() print(de2.e_) print("Ideal:", de2.e_.ideal) print("Anti-Ideal:", de2.e_.anti_ideal) print("Closeness:", de2.e_.closeness) # build response res = {} for cid, mc in method_chunks.items(): mc["characteristics"] = [c for cid, c in mc["characteristics"].items()] z = [{ "methodChunk": method_chunks[de._data._anames[i]], "score": de.e_.points[i], "rank": int(de._rank[i]) } for i in range(0, len(de.mtx))] z2 = [{ "methodChunk": method_chunks[de2._data._anames[i]], "score": de2.e_.closeness[i], "rank": int(de2._rank[i]) } for i in range(0, len(de2.mtx))] res["results"] = [{ "model": "WeightedSum", "values": sorted(z, key=lambda x: x["rank"]) }, { "model": "TOPSIS", "values": sorted(z2, key=lambda x: x["rank"]) }] for cid, pc in projectCharacteristics.items(): pc.pop("_id", None) pc["encoder"] = pc["encoder"].categories[0] project["characteristics"] = [ pc for cid, pc in projectCharacteristics.items() ] project.pop("_id", None) res["project"] = project # print(res) # tes = {} # tes["project"] = projectCharacteristics # tes["method_chunks"] = method_chunks # tes["characteristics"] = characteristics default = lambda o: f"<<non-serializable: {type(o).__qualname__}>>" result = json.loads(json_util.dumps(res, default=default)) return result
def flow(self, models_to_flow=[], params=None, test_size=0.2, nfolds=3, nrepeats=3, n_jobs=1, metrics=[], verbose=False, regressors=True, ensemble=False, featurePercentage=0.25): # Enforce parameters assert isinstance(nfolds, int), "nfolds must be integer" assert isinstance(nrepeats, int), "nrepeats must be integer" assert isinstance(n_jobs, int), "n_jobs must be integer" assert isinstance(verbose, bool), "verbosem ust be bool" assert isinstance(params, dict), "params must be a dict" assert isinstance(test_size, float), "test_size must be a float" assert isinstance(metrics, list), "model scoring must be a list" assert isinstance(regressors, bool), "regressor must be bool" assert isinstance(ensemble, bool), "ensemble must be bool" # Enforce logic for regressors #if regressors: # assert(not any(["c" in k.split("__") for k,v in params.items()]), "You selected classifiers with the regressors flag true. Comon\' man!") #else: # assert(not any(["r" in k.split("__") for k,v in params.items()]), "You selected regressors with the regressors flag false. Comon\' man!") self._nfolds = nfolds self._nrepeats = nrepeats self._n_jobs = n_jobs self._verbose = verbose self._allParams = params self._metrics = metrics self._test_size = test_size self._regressors = regressors self._ensemble = ensemble self._featurePercentage = featurePercentage # Inform the streamline to user. stringbuilder = "" for thing in models_to_flow: stringbuilder += thing stringbuilder += " --> " if self._verbose: if self._regressors: print("*************************") print("=> (Regressor) " + "=> Feature Selection Streamline: " + stringbuilder[:-5]) print("*************************") elif self._regressors == False: print("*************************") print("=> (Classifier) " + "=> Feature Selection Streamline: " + stringbuilder[:-5]) print("*************************") else: print( "Invalid model selected. Please set regressors=True or regressors=False." ) print def supportVectorRegression(): self._svr_params = {} for k, v in self._allParams.items(): if "svr" in k: self._svr_params[k] = v self._svr_params["svr__kernel"] = ['linear'] model = SupportVectorRegressorPredictiveModel( self._X_train, self._y_train, self._svr_params, self._nfolds, self._n_jobs, self._verbose) return abs(model.getBestEstimator().coef_.flatten()) def randomForestRegression(): self._rfr_params = {} for k, v in self._allParams.items(): if "rfr" in k: self._rfr_params[k] = v model = RandomForestRegressorPredictiveModel( self._X_train, self._y_train, self._rfr_params, self._nfolds, self._n_jobs, self._verbose) return abs(model.getBestEstimator().feature_importances_.flatten()) def adaptiveBoostingRegression(): self._abr_params = {} for k, v in self._allParams.items(): if "abr" in k: self._abr_params[k] = v model = AdaptiveBoostingRegressorPredictiveModel( self._X_train, self._y_train, self._abr_params, self._nfolds, self._n_jobs, self._verbose) return abs(model.getBestEstimator().feature_importances_.flatten()) def lassoRegression(): self._lasso_params = {} for k, v in self._allParams.items(): if "lasso" in k: self._lasso_params[k] = v model = LassoRegressorPredictiveModel(self._X_train, self._y_train, self._lasso_params, self._nfolds, self._n_jobs, self._verbose) return abs(model.getBestEstimator().coef_.flatten()) def elasticNetRegression(): self._enet_params = {} for k, v in self._allParams.items(): if "enet" in k: self._enet_params[k] = v model = ElasticNetRegressorPredictiveModel( self._X_train, self._y_train, self._enet_params, self._nfolds, self._n_jobs, self._verbose) return abs(model.getBestEstimator().coef_.flatten()) def mixed_selection(): if self._verbose: print("Executing: mixed_selection") X = self._X y = self._y initial_list = [] threshold_in_specified = False threshold_out_specified = False if "mixed_selection__threshold_in" in self._allParams.keys(): assert (isinstance( self._allParams["mixed_selection__threshold_in"], float), "threshold_in must be a float") threshold_in = self._allParams["mixed_selection__threshold_in"] threshold_in_specified = True else: threshold_in = 0.01 if "mixed_selection__threshold_out" in self._allParams.keys(): assert (isinstance( self._allParams["mixed_selection__threshold_out"], float), "threshold_out must be a float") threshold_out = self._allParams[ "mixed_selection__threshold_out"] threshold_out_specified = True else: threshold_out = 0.05 if "mixed_selection__verbose" in self._allParams.keys(): assert (isinstance(self._allParams["mixed_selection__verbose"], bool), "verbose must be a bool") verbose = self._allParams["mixed_selection__verbose"] else: verbose = False if threshold_in_specified and threshold_out_specified: assert ( threshold_in < threshold_out, "threshold in must be strictly less than the threshold out to avoid infinite looping." ) #initial_list = self._initial_list #threshold_in = self._threshold_in #threshold_out = self._threshold_out #verbse = self._verbose """ Perform a forward-backward feature selection based on p-value from statsmodels.api.OLS Arguments: X - pandas.DataFrame with candidate features y - list-like with the target initial_list - list of features to start with (column names of X) threshold_in - include a feature if its p-value < threshold_in threshold_out - exclude a feature if its p-value > threshold_out verbose - whether to print the sequence of inclusions and exclusions Returns: list of selected features Always set threshold_in < threshold_out to avoid infinite looping. See https://en.wikipedia.org/wiki/Stepwise_regression for the details """ included = list(initial_list) while True: changed = False # forward step excluded = list(set(X.columns) - set(included)) new_pval = pd.Series(index=excluded) for new_column in excluded: model = sm.OLS( y, sm.add_constant( pd.DataFrame(X[included + [new_column]]))).fit() new_pval[new_column] = model.pvalues[new_column] best_pval = new_pval.min() if best_pval < threshold_in: best_feature = new_pval.idxmin() #best_feature = new_pval.argmin() included.append(best_feature) changed = True if verbose: print('Adding {:30} with p-value {:.6}'.format( best_feature, best_pval)) # backward step model = sm.OLS(y, sm.add_constant(pd.DataFrame( X[included]))).fit() # use all coefs except intercept pvalues = model.pvalues.iloc[1:] worst_pval = pvalues.max() # null if pvalues is empty if worst_pval > threshold_out: changed = True worst_feature = pvalues.idxmax() #worst_feature = pvalues.argmax() included.remove(worst_feature) if verbose: print('Dropping {:30} with p-value {:.6}'.format( worst_feature, worst_pval)) if not changed: break new_included = [] for col in X.columns: if col in included: new_included.append(1) else: new_included.append(0) return new_included def partialLeastSquaresRegression(): if self._verbose: print("Executing: plsr") # The components are not helpful for this context. They might be for transformation, however. #if "plsr__n_components" in self._allParams.keys(): # n_components = self._allParams["plsr__n_components"] #else: # n_components = 2 pls_model = PLSRegression() pls_out = pls_model.fit(self._X, self._y) # The coefficients are used to show direction of the relationship return abs(pls_out.coef_.flatten()) ############################################ ########## Classifiers Start Here ########## ############################################ def adaptiveBoostingClassifier(): self._abc_params = {} for k, v in self._allParams.items(): if "abc" in k: self._abc_params[k] = v model = AdaptiveBoostingClassifierPredictiveModel( self._X_train, self._y_train, self._abc_params, self._nfolds, self._n_jobs, self._verbose) return model.getBestEstimator().feature_importances_.flatten() def randomForestClassifier(): self._rfc_params = {} for k, v in self._allParams.items(): if "rfc" in k: self._rfc_params[k] = v model = RandomForestClassifierPredictiveModel( self._X_train, self._y_train, self._rfc_params, self._nfolds, self._n_jobs, self._verbose) return model.getBestEstimator().feature_importances_.flatten() def supportVectorClassifier(): self._svc_params = {} for k, v in self._allParams.items(): if "svc" in k: self._svc_params[k] = v self._svc_params["svc__kernel"] = ['linear'] model = SupportVectorClassifierPredictiveModel( self._X_train, self._y_train, self._svc_params, self._nfolds, self._n_jobs, self._verbose) coefs = model.getBestEstimator().coef_ prods = coefs[0, :] for i in range(1, len(coefs)): prods = np.multiply(prods, coefs[i, :]) return abs(prods) # Valid regressors regression_options = { "mixed_selection": mixed_selection, "svr": supportVectorRegression, "rfr": randomForestRegression, "abr": adaptiveBoostingRegression, "lasso": lassoRegression, "enet": elasticNetRegression, "plsr": partialLeastSquaresRegression } # Valid classifiers classification_options = { 'abc': adaptiveBoostingClassifier, 'rfc': randomForestClassifier, 'svc': supportVectorClassifier } # Define return dictionary return_dict = {} # Train test split self._X_train, self._X_test, self._y_train, self._y_test = train_test_split( self._X, self._y, test_size=self._test_size) # Wrapper models self._key_features = {} if self._regressors: for key in models_to_flow: self._key_features[key] = regression_options[key]() elif self._regressors == False: for key in models_to_flow: self._key_features[key] = classification_options[key]() else: print( "Invalid model type. Please set regressors=True or regressors=False." ) print if self._verbose: print return_dict['feature_importances'] = self._key_features self._ensemble_results = None self._kept_features = None if self._ensemble: alternative_names = self._X.columns.tolist() criterion_names = list(self._key_features.keys()) criteria = [MAX for i in criterion_names] weights = [ i / len(criterion_names) for i in range(len(criterion_names)) ] df = pd.DataFrame(self._key_features, index=alternative_names) data = Data(df.as_matrix(), criteria, weights, anames=df.index.tolist(), cnames=df.columns) #if self._verbose: #data.plot("radar"); dm1 = simple.WeightedSum() dm2 = simple.WeightedProduct() dm3 = closeness.TOPSIS() dec1 = dm1.decide(data) dec2 = dm2.decide(data) dec3 = dm3.decide(data) self._ensemble_results = pd.DataFrame( { "TOPSIS": dec3.rank_, "WeightedSum": dec1.rank_, "WeightedProduct": dec2.rank_ }, index=df.index.tolist()) # Only keep features that our decision makers deemed in the top % specified num_features_requested = math.ceil( len(alternative_names) * self._featurePercentage) ranks = dec1.rank_ + dec2.rank_ + dec3.rank_ argmin_sorted = np.argpartition(ranks, num_features_requested) self._kept_features = [] count = 0 for i in argmin_sorted: self._kept_features.append(alternative_names[i]) count += 1 if count >= num_features_requested: break print("", self._featurePercentage * 100, " % -> (" + str(num_features_requested) + ") features kept.") print(self._kept_features) # Print data with only those features return_dict['ensemble_results'] = self._ensemble_results return_dict['kept_features'] = self._kept_features return return_dict
def get_madm_concensus(Wijk=None, num_optimizers=100, data_shape=(10,5), batch_size=10, policy=np.average, verbose=False): # Get data from simulation if Wijk is None: Wijk, move_sequence = get_k_optimizations(num_optimizers=num_optimizers, data_shape=data_shape, batch_size=batch_size, verbose=verbose) # Construct alternative-space alternatives={} alternative_num=0 for i in range(Wijk[:,:,0].shape[0]): for j in range(Wijk[:,:,0].shape[1]): alternatives[alternative_num]=(i,j) alternative_num+=1 #print(alternatives) # Construct decision-matrix DM=np.empty((alternative_num,Wijk.shape[2])) for a,loc in alternatives.items(): for k in range(Wijk.shape[2]): DM[a,k]=Wijk[loc[0],loc[1],k] #print(DM) # Putting it all together alternative_names = [v for k,v in alternatives.items()] criterion_names = [k for k in range(Wijk.shape[2])] criteria = [MAX for i in criterion_names] weights = [1/len(criterion_names) for i in range(len(criterion_names))] df = pd.DataFrame(DM, index=alternative_names, columns=criterion_names) if verbose: print("Alternatives {}".format(alternative_names)) print("Criteria {}".format(criterion_names)) print("Weights {}".format(weights)) print("Decision Matrix {}".format(df)) # Execute MADM data = Data(df.as_matrix(), criteria, weights, anames=df.index.tolist(), cnames=df.columns ) # Execute on 3 decision makers dm1 = simple.WeightedSum() dm2 = simple.WeightedProduct() dm3 = closeness.TOPSIS() dec1 = dm1.decide(data) dec2 = dm2.decide(data) dec3 = dm3.decide(data) ranks=[dec1.rank_, dec2.rank_,dec3.rank_] results = pd.DataFrame({"TOPSIS":dec3.rank_, "WeightedSum":dec1.rank_, "WeightedProduct":dec2.rank_}, index=df.index.tolist()) if verbose: print("MADM Results: {}".format(results)) concensus_results=pd.DataFrame({"ConsensusRank":policy(results, axis=1)},index=results.index) rij=concensus_results.as_matrix().reshape(Wijk.shape[0],Wijk.shape[1]) rij_move_sequence=np.argmin(rij,axis=1) #if verbose: # print("rij {}".format(rij)) # print("rij_move_sequence {}".format(rij_move_sequence)) return rij, rij_move_sequence #wijk,_=get_k_optimizations(data=None, num_optimizers=5, data_shape=(10,5), batch_size=5, verbose=True) #rij, _=get_madm_concensus(Wijk=wijk, policy=np.average ) #print(rij) #print(_)