Beispiel #1
0
def get_ranking():
    # Use a service account
    cred = credentials.Certificate('merlin-c4fa7-firebase-adminsdk-7cfbn-8323034d25.json')
    firebase_admin.initialize_app(cred)

    db = firestore.client()
    user_ref = db.collection(u'users')
    docs = user_ref.stream()
    users_data = users_data.loc[:, ['ID', 'skills_score', 'work_experience', 'rating', 'origin']]
    users_data.head(10)

    criteria_data = Data(
        users_data.iloc[:, 1:],           # the pandas dataframe
        [MAX, MAX, MAX,MIN],              # direction of goodness for each column
        anames = users_data['ID'],        # each entity's name, here  userId
        cnames = users_data.columns[1:],  # attribute/column name
        # weights=[1,1,1,1,1]             # weights for each attribute (optional)
        )

    dm = simple.WeightedSum(mnorm="sum")
    dec = dm.decide(criteria_data)

    user_list = []
    for doc in docs:
        return jsonify({user_list.append(doc.to_dict())})
    def execute(self):

        alternative_names = self._X.columns.tolist()
        criterion_names = list(self._key_features.keys())
        criteria = [MAX for i in criterion_names]
        weights = [
            i / len(criterion_names) for i in range(len(criterion_names))
        ]

        df = pd.DataFrame(self._key_features, index=alternative_names)

        data = Data(df.as_matrix(),
                    criteria,
                    weights,
                    anames=df.index.tolist(),
                    cnames=df.columns)
        #if self._verbose:
        #data.plot("radar");

        dm1 = simple.WeightedSum()
        dm2 = simple.WeightedProduct()
        dm3 = closeness.TOPSIS()
        dec1 = dm1.decide(data)
        dec2 = dm2.decide(data)
        dec3 = dm3.decide(data)

        ranks = [dec1.rank_, dec2.rank_, dec3.rank_]
        self._ensemble_results = pd.DataFrame(
            {
                "TOPSIS": dec3.rank_,
                "WeightedSum": dec1.rank_,
                "WeightedProduct": dec2.rank_
            },
            index=df.index.tolist())

        # Only keep features that our decision makers deemed in the top % specified
        num_features_requested = math.ceil(
            len(alternative_names) * self._featurePercentage)
        sum_ranks = sum(ranks)
        argmin_sorted = np.argpartition(sum_ranks, num_features_requested)
        self._kept_features = []

        count = 0
        for i in argmin_sorted:
            self._kept_features.append(alternative_names[i])
            count += 1
            if count >= num_features_requested:
                break

        if self._verbose:
            print("", self._featurePercentage * 100,
                  " % -> (" + str(num_features_requested) + ") features kept.")
            print(self._kept_features)

        return self._ensemble_results, self._kept_features
Beispiel #3
0
def getBestRankedCandidate(nbToExtract, candidateSet, rulesSetsQuality, criteriaQuality):
    # print(str(candidateSet))
    # print(str(rulesSetsCandidates))

    tmpRulesSetsQuality = None

    criteria = []
    idAttributesToRemove = []
    for idCrit in range(len(criteriaQuality)):
        if criteriaQuality[idCrit] == '+':
            criteria.append(MAX)
        elif criteriaQuality[idCrit] == '-':
            criteria.append(MIN)
        else:
            idAttributesToRemove.append(idCrit)

    attributes = removeAttributeId(['Polarity', 'Diversity', 'Distancing', 'Surprise'],
                                   idAttributesToRemove)
    if len(attributes) < 4:
        tmpRulesSetsQuality = rulesSetsQuality
        rulesSetsQuality = []
        for ruleQI in tmpRulesSetsQuality:
            newQI = removeAttributeId(ruleQI, idAttributesToRemove)
            rulesSetsQuality.append(newQI)

    print(str(attributes))
    candidateIds = [i for i in range(len(candidateSet))]
    data = Data(rulesSetsQuality,
                criteria,
                anames=candidateIds,
                cnames=attributes)

    # apply a simple weighted sums method
    dm = simple.WeightedSum()
    res = dm.decide(data)
    current_ranking = res.rank_

    # extract bests candidates
    if nbToExtract > len(current_ranking):
        nbToExtract = len(current_ranking)

    bestCandidates = []
    for i in range(nbToExtract):
        bestCandidateId = list(current_ranking).index(i+1)
        bestCandidate = candidateSet[bestCandidateId]

        if tmpRulesSetsQuality is None:
            bestCandidateQI = rulesSetsQuality[bestCandidateId]
        else:
            bestCandidateQI = tmpRulesSetsQuality[bestCandidateId]

        bestCandidates.append((bestCandidate, bestCandidateQI))

    return bestCandidates
Beispiel #4
0
def rank_label(df_lab,  wgt):

  criteria_data = Data(
      df_lab.iloc[:, 1:5],          # the pandas dataframe
      [MIN, MIN, MIN, MIN],      # direction of goodness for each column
      anames = df_lab['Index'], # each entity's name, here car name
      cnames = df_lab.columns[1:5], # attribute/column name
      weights=wgt          # weights for each attribute (optional)
      )

  df_lab_copy = df_lab.copy()

  # weighted sum, sumNorm
  dm = simple.WeightedSum(mnorm="sum")
  #print(dm.tolist())
  dec = dm.decide(criteria_data)
  #print(dec)
  df_lab_copy.loc[:, 'rank_weightedSum_sumNorm_inverse'] = dec.rank_

  # weighted sum, maxNorm
  dm = simple.WeightedSum(mnorm="max")
  dec = dm.decide(criteria_data)
  df_lab_copy.loc[:, 'rank_weightedSum_maxNorm_inverse'] = dec.rank_

  # weighted product, sumNorm
  dm = simple.WeightedProduct(mnorm="sum")
  dec = dm.decide(criteria_data)
  df_lab_copy.loc[:, 'rank_weightedProduct_sumNorm_inverse'] = dec.rank_

  # weighted product, sumNorm
  dm = simple.WeightedProduct(mnorm="max")
  dec = dm.decide(criteria_data)
  df_lab_copy.loc[:, 'rank_weightedProduct_maxNorm_inverse'] = dec.rank_

  # sort for better visualization
  df_lab_copy.sort_values(by=['rank_weightedSum_sumNorm_inverse'], inplace=True)

  return df_lab_copy
Beispiel #5
0
    cnames=[
        "Run", "HS", "Average", "BF", "Strikerate", "Centuries", "Fifties",
        "zeros", "Fours", "Sixes", "Maiden", "Runs_bowl", "Wickets",
        "Average_bowling", "Economical", "Strikerate_bowl", "4wickets",
        "5wickets"
    ])

#data.plot();
data.plot("box")

#data.plot.violin();
#data.plot.radar(cmap="inferno", show_criteria=False);

# first create the decision maker
# (with the default hiper parameters)
dm = simple.WeightedSum()
print(dm)
# Now lets decide the ranking
dec = dm.decide(data)
print(dec)

print(dec.e_)
print(dec.e_.points)

print("Generate a ranking of alternatives?", dec.alpha_solution_)
print("Generate a kernel of best alternatives?", dec.beta_solution_)
print("Choose the best alternative?", dec.gamma_solution_)

#The rank as numpy array (if this decision is a 𝛼-solution)

print(dec.rank_)
Beispiel #6
0
def drv(weights, abc, climit, ntest, ntest_kwargs, alpha_norm, alpha_rank,
        njobs, agg_only_consensus):
    # PREPROCESS

    # determine numbers of parallel jobs
    njobs = joblib.cpu_count() if njobs is None else njobs

    # determine the normal test
    ntest = NORMAL_TESTS.get(ntest, ntest)
    ntest_kwargs = {} if ntest_kwargs is None else ntest_kwargs

    # number of participants & alternatives
    N, I = np.shape(abc[0])

    # number of criteria
    J = len(abc)

    # placeholder to store the results
    results = {"N_": N, "I_": I, "J_": J}

    # WEIGHTS
    if np.ndim(weights) > 1:
        wresults = subproblem(mtx=weights,
                              climit=climit,
                              alpha_norm=alpha_norm,
                              ntest=ntest,
                              ntest_kwargs=ntest_kwargs)
    else:
        wresults = {}

    # copy weights results to the global results
    results.update({
        "wmtx_": wresults.get("nproducts"),
        "wsst_": wresults.get("sst"),
        "wssw_": wresults.get("ssw"),
        "wssb_": wresults.get("ssb"),
        "wssu_": wresults.get("ssu"),
        "wivr_": wresults.get("ivr"),
        "wntest_sts_": wresults.get("ntest_sts"),
        "wntest_pvals_": wresults.get("ntest_pvals"),
        "wntest_reject_h0_": wresults.get("ntest_reject_h0"),
        "win_consensus_": wresults.get("in_consensus"),
        "weights_mean_": wresults.get("resume")
    })

    # ALTERNATIVES
    with joblib.Parallel(n_jobs=njobs) as jobs:
        wresults = jobs(
            joblib.delayed(subproblem)(amtx,
                                       climit=climit,
                                       alpha_norm=alpha_norm,
                                       ntest=ntest,
                                       ntest_kwargs=ntest_kwargs)
            for amtx in abc)

    # copy alt results to the global results
    results.update({
        "amtx_criteria_":
        tuple([r["nproducts"] for r in wresults]),
        "asst_":
        np.hstack([r["sst"] for r in wresults]),
        "assw_":
        np.hstack([r["ssw"] for r in wresults]),
        "assb_":
        np.hstack([r["ssb"] for r in wresults]),
        "assu_":
        np.hstack([r["ssu"] for r in wresults]),
        "aivr_":
        np.hstack([r["ivr"] for r in wresults]),
        "ain_consensus_":
        np.hstack([r["in_consensus"] for r in wresults]),
        "antest_sts_":
        np.vstack([r["ntest_sts"] for r in wresults]),
        "antest_pvals_":
        np.vstack([r["ntest_pvals"] for r in wresults]),
        "antest_reject_h0_":
        np.vstack([r["ntest_reject_h0"] for r in wresults]),
        "amtx_mean_":
        np.vstack([r["resume"] for r in wresults])
    })

    # CONSENSUS
    consensus = np.all(results["ain_consensus_"])
    if consensus and results["weights_mean_"] is not None:
        consensus = consensus and results["win_consensus_"]
    results["consensus_"] = consensus  # to global results

    # GLOBAL REJECT H0
    reject_h0 = np.any(results["antest_reject_h0_"])
    if not reject_h0 and results["wntest_reject_h0_"] is not None:
        reject_h0 = reject_h0 or np.any(results["wntest_reject_h0_"])
    results["ntest_reject_h0_"] = reject_h0

    # AGGREGATION
    if consensus or not agg_only_consensus:
        aggregator = simple.WeightedSum(mnorm="none", wnorm="none")

        criteria = [max] * J

        weights_mean = (1 if results["weights_mean_"] is None else
                        results["weights_mean_"])
        agg_m = aggregator.decide(results["amtx_mean_"].T,
                                  criteria=criteria,
                                  weights=weights_mean)

        with joblib.Parallel(n_jobs=1) as jobs:
            agg_p = jobs(
                joblib.delayed(run_aggregator)(idx=idx,
                                               mtxs=results["amtx_criteria_"],
                                               criteria=criteria,
                                               weights=results["wmtx_"],
                                               aggregator=aggregator)
                for idx in range(N))
            agg_p = tuple(agg_p)

        with joblib.Parallel(n_jobs=1) as jobs:
            # rank verification
            ttest_results = jobs(
                joblib.delayed(rank_ttest_rel)(
                    agg_p=agg_p, aidx=aidx, bidx=bidx)
                for aidx, bidx in it.combinations(range(I), 2))

            ttest_size = len(ttest_results)
            rank_t, rank_p = np.empty(ttest_size), np.empty(ttest_size)
            for idx, r in enumerate(ttest_results):
                rank_t[idx] = r.statistic
                rank_p[idx] = r.pvalue

        rank_fdr = fdr_by(alpha=alpha_rank, pvals=rank_p, I=I)
        rank_results = rank_p < rank_fdr
        rank_results_resume = np.all(rank_results)
    else:
        agg_p, agg_m = None, None
        rank_t, rank_p, rank_fdr, rank_results = None, None, None, None
        rank_results_resume = False

    # to global results
    results["aggregation_criteria_"] = agg_p
    results["aggregation_mean_"] = agg_m

    results["rank_check_t_"] = rank_t
    results["rank_check_pval_"] = rank_p
    results["rank_check_fdr_"] = rank_fdr
    results["rank_check_results_"] = rank_results
    results["rank_check_results_resume_"] = rank_results_resume

    results["strict_preference_"] = (consensus and not reject_h0
                                     and rank_results_resume)

    return results
Beispiel #7
0
def calculate(id):
    # fetch & preprocess
    project = mongo.db.projects.find_one_or_404({"id": id})
    projectCharacteristics = {p["id"]: p for p in project["characteristics"]}

    cnames = [*projectCharacteristics]

    # fetch & preprocess characteristics
    cursor = mongo.db.characteristics.find({"id": {"$in": cnames}})
    characteristics = {d["id"]: d for d in cursor}

    # fetch & preprocess methodchunks
    cursor = mongo.db.methodchunks.find(
        {"characteristics.id": {
            "$in": cnames
        }})
    method_chunks = {}
    for document in cursor:
        document["characteristics"] = {
            d["id"]: d
            for d in document["characteristics"]
        }
        method_chunks[document["id"]] = document

    # create encoder
    from sklearn.preprocessing import OrdinalEncoder
    for cid, pc in projectCharacteristics.items():
        if (pc["rule"] == "preference_list"):
            pass
        elif (pc["rule"] == "exact"):
            pass
        else:  #maximum, minimum
            if (cid in characteristics):
                for cv in characteristics[cid]["characteristicValues"]:
                    if (cv["ref"] == pc["ref"]):
                        if (pc["rule"] == "maximum"):
                            pc["value"] = list(reversed(cv["values"]))
                        else:
                            pc["value"] = cv["values"]
                        break
        values = pc["value"] + ["N/A"]
        values.reverse()  # ordinal values asc order (smallest to largest)
        enc = OrdinalEncoder(categories=[values])
        enc.fit([[v] for v in values])
        pc["encoder"] = enc

    # build mtx
    import pandas as pd
    df = pd.DataFrame([], columns=cnames)
    for mid, m in method_chunks.items():
        obj = {}
        for cid, pc in projectCharacteristics.items():
            if (cid in m["characteristics"]):
                if (pc["ref"] == m["characteristics"][cid]["ref"]):
                    obj[cid] = m["characteristics"][cid]["value"]
        df = df.append(pd.Series(obj, index=df.columns, name=mid))
    df.fillna("N/A", inplace=True)
    print(df)
    separator()

    if len(df.index.values) == 0:
        return "No match"
    # apply encoding
    encoded = df.copy()
    for key, value in encoded.items():
        values = [
            v if v in projectCharacteristics[key]["value"] else "N/A"
            for v in value
        ]
        encoded.loc[:, key] = projectCharacteristics[key]["encoder"].transform(
            [[v] for v in values])
    encoded = encoded.loc[:, (encoded != 0).any(axis=0)]
    # print(encoded)
    # separator()

    # construct
    from skcriteria import Data, MAX
    from skcriteria.madm import simple, closeness
    optimal_senses = []
    weights = []
    for cid, pc in encoded.items():
        optimal_senses.append(MAX)
        weights.append(projectCharacteristics[cid].get("weight", 1))

    data = Data(encoded.values,
                optimal_senses,
                weights=weights,
                anames=encoded.index,
                cnames=encoded.columns)
    #print(data)
    #separator()

    # WeightedSum
    model = simple.WeightedSum(mnorm="vector", wnorm="sum")
    de = model.decide(data)
    print(de)
    separator()

    print(de.e_)
    print("Points:", de.e_.points)

    # TOPSIS
    model2 = closeness.TOPSIS(mnorm="vector", wnorm="sum")
    de2 = model2.decide(data)
    print(de2)
    separator()

    print(de2.e_)
    print("Ideal:", de2.e_.ideal)
    print("Anti-Ideal:", de2.e_.anti_ideal)
    print("Closeness:", de2.e_.closeness)

    # build response
    res = {}
    for cid, mc in method_chunks.items():
        mc["characteristics"] = [c for cid, c in mc["characteristics"].items()]
    z = [{
        "methodChunk": method_chunks[de._data._anames[i]],
        "score": de.e_.points[i],
        "rank": int(de._rank[i])
    } for i in range(0, len(de.mtx))]
    z2 = [{
        "methodChunk": method_chunks[de2._data._anames[i]],
        "score": de2.e_.closeness[i],
        "rank": int(de2._rank[i])
    } for i in range(0, len(de2.mtx))]

    res["results"] = [{
        "model": "WeightedSum",
        "values": sorted(z, key=lambda x: x["rank"])
    }, {
        "model": "TOPSIS",
        "values": sorted(z2, key=lambda x: x["rank"])
    }]

    for cid, pc in projectCharacteristics.items():
        pc.pop("_id", None)
        pc["encoder"] = pc["encoder"].categories[0]
    project["characteristics"] = [
        pc for cid, pc in projectCharacteristics.items()
    ]
    project.pop("_id", None)
    res["project"] = project

    # print(res)
    # tes = {}
    # tes["project"] = projectCharacteristics
    # tes["method_chunks"] = method_chunks
    # tes["characteristics"] = characteristics

    default = lambda o: f"<<non-serializable: {type(o).__qualname__}>>"
    result = json.loads(json_util.dumps(res, default=default))
    return result
Beispiel #8
0
def plot_heatmap(logic):
    plot_datas = normalize_data(logic)
    patent_names = patent_data['patent_number']
    attribute_names = patent_data.columns[1:]
    sns.heatmap(plot_datas,
                annot=True,
                yticklabels=patent_names,
                xticklabels=attribute_names,
                fmt='.2g')


###########
# print final ranking table with different multi criteria decision makers

dm = simple.WeightedSum()
dec = dm.decide(criteria_data)
print(dec)
print(dec.e_.points)  ##print each rank's value
print(dec.rank_)  ##print ranks

print("==============================")

dm = simple.WeightedProduct()
dec = dm.decide(criteria_data)
print(dec)
print(dec.e_.points)  ##print each rank's value
print(dec.rank_)  ##print ranks

print("==============================")
Beispiel #9
0
    def flow(self,
             models_to_flow=[],
             params=None,
             test_size=0.2,
             nfolds=3,
             nrepeats=3,
             n_jobs=1,
             metrics=[],
             verbose=False,
             regressors=True,
             ensemble=False,
             featurePercentage=0.25):

        # Enforce parameters
        assert isinstance(nfolds, int), "nfolds must be integer"
        assert isinstance(nrepeats, int), "nrepeats must be integer"
        assert isinstance(n_jobs, int), "n_jobs must be integer"
        assert isinstance(verbose, bool), "verbosem ust be bool"
        assert isinstance(params, dict), "params must be a dict"
        assert isinstance(test_size, float), "test_size must be a float"
        assert isinstance(metrics, list), "model scoring must be a list"
        assert isinstance(regressors, bool), "regressor must be bool"
        assert isinstance(ensemble, bool), "ensemble must be bool"

        # Enforce logic for regressors
        #if regressors:
        #  assert(not any(["c" in k.split("__") for k,v in params.items()]), "You selected classifiers with the regressors flag true. Comon\' man!")
        #else:
        #  assert(not any(["r" in k.split("__") for k,v in params.items()]), "You selected regressors with the regressors flag false. Comon\' man!")

        self._nfolds = nfolds
        self._nrepeats = nrepeats
        self._n_jobs = n_jobs
        self._verbose = verbose
        self._allParams = params
        self._metrics = metrics
        self._test_size = test_size
        self._regressors = regressors
        self._ensemble = ensemble
        self._featurePercentage = featurePercentage

        # Inform the streamline to user.
        stringbuilder = ""
        for thing in models_to_flow:
            stringbuilder += thing
            stringbuilder += " --> "

        if self._verbose:

            if self._regressors:
                print("*************************")
                print("=> (Regressor) " + "=> Feature Selection Streamline: " +
                      stringbuilder[:-5])
                print("*************************")
            elif self._regressors == False:
                print("*************************")
                print("=> (Classifier) " +
                      "=> Feature Selection Streamline: " + stringbuilder[:-5])
                print("*************************")
            else:
                print(
                    "Invalid model selected. Please set regressors=True or regressors=False."
                )
                print

        def supportVectorRegression():
            self._svr_params = {}
            for k, v in self._allParams.items():
                if "svr" in k:
                    self._svr_params[k] = v

            self._svr_params["svr__kernel"] = ['linear']
            model = SupportVectorRegressorPredictiveModel(
                self._X_train, self._y_train, self._svr_params, self._nfolds,
                self._n_jobs, self._verbose)
            return abs(model.getBestEstimator().coef_.flatten())

        def randomForestRegression():
            self._rfr_params = {}
            for k, v in self._allParams.items():
                if "rfr" in k:
                    self._rfr_params[k] = v

            model = RandomForestRegressorPredictiveModel(
                self._X_train, self._y_train, self._rfr_params, self._nfolds,
                self._n_jobs, self._verbose)
            return abs(model.getBestEstimator().feature_importances_.flatten())

        def adaptiveBoostingRegression():
            self._abr_params = {}
            for k, v in self._allParams.items():
                if "abr" in k:
                    self._abr_params[k] = v

            model = AdaptiveBoostingRegressorPredictiveModel(
                self._X_train, self._y_train, self._abr_params, self._nfolds,
                self._n_jobs, self._verbose)
            return abs(model.getBestEstimator().feature_importances_.flatten())

        def lassoRegression():
            self._lasso_params = {}
            for k, v in self._allParams.items():
                if "lasso" in k:
                    self._lasso_params[k] = v

            model = LassoRegressorPredictiveModel(self._X_train, self._y_train,
                                                  self._lasso_params,
                                                  self._nfolds, self._n_jobs,
                                                  self._verbose)
            return abs(model.getBestEstimator().coef_.flatten())

        def elasticNetRegression():
            self._enet_params = {}
            for k, v in self._allParams.items():
                if "enet" in k:
                    self._enet_params[k] = v

            model = ElasticNetRegressorPredictiveModel(
                self._X_train, self._y_train, self._enet_params, self._nfolds,
                self._n_jobs, self._verbose)
            return abs(model.getBestEstimator().coef_.flatten())

        def mixed_selection():

            if self._verbose:
                print("Executing: mixed_selection")

            X = self._X
            y = self._y

            initial_list = []
            threshold_in_specified = False
            threshold_out_specified = False

            if "mixed_selection__threshold_in" in self._allParams.keys():
                assert (isinstance(
                    self._allParams["mixed_selection__threshold_in"],
                    float), "threshold_in must be a float")
                threshold_in = self._allParams["mixed_selection__threshold_in"]
                threshold_in_specified = True
            else:
                threshold_in = 0.01

            if "mixed_selection__threshold_out" in self._allParams.keys():
                assert (isinstance(
                    self._allParams["mixed_selection__threshold_out"],
                    float), "threshold_out must be a float")
                threshold_out = self._allParams[
                    "mixed_selection__threshold_out"]
                threshold_out_specified = True
            else:
                threshold_out = 0.05

            if "mixed_selection__verbose" in self._allParams.keys():
                assert (isinstance(self._allParams["mixed_selection__verbose"],
                                   bool), "verbose must be a bool")
                verbose = self._allParams["mixed_selection__verbose"]
            else:
                verbose = False

            if threshold_in_specified and threshold_out_specified:
                assert (
                    threshold_in < threshold_out,
                    "threshold in must be strictly less than the threshold out to avoid infinite looping."
                )

            #initial_list = self._initial_list
            #threshold_in = self._threshold_in
            #threshold_out = self._threshold_out
            #verbse = self._verbose
            """ Perform a forward-backward feature selection 
            based on p-value from statsmodels.api.OLS
            Arguments:
                X - pandas.DataFrame with candidate features
                y - list-like with the target
                initial_list - list of features to start with (column names of X)
                threshold_in - include a feature if its p-value < threshold_in
                threshold_out - exclude a feature if its p-value > threshold_out
                verbose - whether to print the sequence of inclusions and exclusions
            Returns: list of selected features 
            Always set threshold_in < threshold_out to avoid infinite looping.
            See https://en.wikipedia.org/wiki/Stepwise_regression for the details
            """

            included = list(initial_list)
            while True:
                changed = False

                # forward step
                excluded = list(set(X.columns) - set(included))
                new_pval = pd.Series(index=excluded)

                for new_column in excluded:

                    model = sm.OLS(
                        y,
                        sm.add_constant(
                            pd.DataFrame(X[included + [new_column]]))).fit()
                    new_pval[new_column] = model.pvalues[new_column]

                best_pval = new_pval.min()

                if best_pval < threshold_in:
                    best_feature = new_pval.idxmin()
                    #best_feature = new_pval.argmin()
                    included.append(best_feature)
                    changed = True
                    if verbose:
                        print('Adding  {:30} with p-value {:.6}'.format(
                            best_feature, best_pval))

                # backward step
                model = sm.OLS(y, sm.add_constant(pd.DataFrame(
                    X[included]))).fit()
                # use all coefs except intercept
                pvalues = model.pvalues.iloc[1:]
                worst_pval = pvalues.max()  # null if pvalues is empty
                if worst_pval > threshold_out:
                    changed = True
                    worst_feature = pvalues.idxmax()
                    #worst_feature = pvalues.argmax()
                    included.remove(worst_feature)
                    if verbose:
                        print('Dropping {:30} with p-value {:.6}'.format(
                            worst_feature, worst_pval))

                if not changed:
                    break

            new_included = []
            for col in X.columns:
                if col in included:
                    new_included.append(1)
                else:
                    new_included.append(0)

            return new_included

        def partialLeastSquaresRegression():

            if self._verbose:
                print("Executing: plsr")
            # The components are not helpful for this context. They might be for transformation, however.
            #if "plsr__n_components" in self._allParams.keys():
            #  n_components = self._allParams["plsr__n_components"]
            #else:
            #  n_components = 2
            pls_model = PLSRegression()
            pls_out = pls_model.fit(self._X, self._y)

            # The coefficients are used to show direction of the relationship
            return abs(pls_out.coef_.flatten())

        ############################################
        ########## Classifiers Start Here ##########
        ############################################

        def adaptiveBoostingClassifier():
            self._abc_params = {}
            for k, v in self._allParams.items():
                if "abc" in k:
                    self._abc_params[k] = v

            model = AdaptiveBoostingClassifierPredictiveModel(
                self._X_train, self._y_train, self._abc_params, self._nfolds,
                self._n_jobs, self._verbose)
            return model.getBestEstimator().feature_importances_.flatten()

        def randomForestClassifier():
            self._rfc_params = {}
            for k, v in self._allParams.items():
                if "rfc" in k:
                    self._rfc_params[k] = v

            model = RandomForestClassifierPredictiveModel(
                self._X_train, self._y_train, self._rfc_params, self._nfolds,
                self._n_jobs, self._verbose)
            return model.getBestEstimator().feature_importances_.flatten()

        def supportVectorClassifier():
            self._svc_params = {}
            for k, v in self._allParams.items():
                if "svc" in k:
                    self._svc_params[k] = v

            self._svc_params["svc__kernel"] = ['linear']
            model = SupportVectorClassifierPredictiveModel(
                self._X_train, self._y_train, self._svc_params, self._nfolds,
                self._n_jobs, self._verbose)

            coefs = model.getBestEstimator().coef_
            prods = coefs[0, :]
            for i in range(1, len(coefs)):
                prods = np.multiply(prods, coefs[i, :])
            return abs(prods)

        # Valid regressors
        regression_options = {
            "mixed_selection": mixed_selection,
            "svr": supportVectorRegression,
            "rfr": randomForestRegression,
            "abr": adaptiveBoostingRegression,
            "lasso": lassoRegression,
            "enet": elasticNetRegression,
            "plsr": partialLeastSquaresRegression
        }

        # Valid classifiers
        classification_options = {
            'abc': adaptiveBoostingClassifier,
            'rfc': randomForestClassifier,
            'svc': supportVectorClassifier
        }

        # Define return dictionary
        return_dict = {}

        # Train test split
        self._X_train, self._X_test, self._y_train, self._y_test = train_test_split(
            self._X, self._y, test_size=self._test_size)

        # Wrapper models
        self._key_features = {}

        if self._regressors:
            for key in models_to_flow:
                self._key_features[key] = regression_options[key]()
        elif self._regressors == False:
            for key in models_to_flow:
                self._key_features[key] = classification_options[key]()
        else:
            print(
                "Invalid model type. Please set regressors=True or regressors=False."
            )
            print
        if self._verbose:
            print

        return_dict['feature_importances'] = self._key_features

        self._ensemble_results = None
        self._kept_features = None
        if self._ensemble:

            alternative_names = self._X.columns.tolist()
            criterion_names = list(self._key_features.keys())
            criteria = [MAX for i in criterion_names]
            weights = [
                i / len(criterion_names) for i in range(len(criterion_names))
            ]

            df = pd.DataFrame(self._key_features, index=alternative_names)

            data = Data(df.as_matrix(),
                        criteria,
                        weights,
                        anames=df.index.tolist(),
                        cnames=df.columns)
            #if self._verbose:
            #data.plot("radar");

            dm1 = simple.WeightedSum()
            dm2 = simple.WeightedProduct()
            dm3 = closeness.TOPSIS()
            dec1 = dm1.decide(data)
            dec2 = dm2.decide(data)
            dec3 = dm3.decide(data)

            self._ensemble_results = pd.DataFrame(
                {
                    "TOPSIS": dec3.rank_,
                    "WeightedSum": dec1.rank_,
                    "WeightedProduct": dec2.rank_
                },
                index=df.index.tolist())

            # Only keep features that our decision makers deemed in the top % specified
            num_features_requested = math.ceil(
                len(alternative_names) * self._featurePercentage)
            ranks = dec1.rank_ + dec2.rank_ + dec3.rank_
            argmin_sorted = np.argpartition(ranks, num_features_requested)
            self._kept_features = []

            count = 0
            for i in argmin_sorted:
                self._kept_features.append(alternative_names[i])
                count += 1
                if count >= num_features_requested:
                    break

            print("", self._featurePercentage * 100,
                  " % -> (" + str(num_features_requested) + ") features kept.")
            print(self._kept_features)
            # Print data with only those features
            return_dict['ensemble_results'] = self._ensemble_results
            return_dict['kept_features'] = self._kept_features

        return return_dict
Beispiel #10
0
def get_madm_concensus(Wijk=None, num_optimizers=100, data_shape=(10,5), batch_size=10, policy=np.average, verbose=False):
        
    # Get data from simulation
    if Wijk is None:
        Wijk, move_sequence = get_k_optimizations(num_optimizers=num_optimizers,
                                                  data_shape=data_shape,
                                                  batch_size=batch_size, 
                                                  verbose=verbose)
    
    # Construct alternative-space
    alternatives={}
    alternative_num=0
    for i in range(Wijk[:,:,0].shape[0]):
        for j in range(Wijk[:,:,0].shape[1]):
            alternatives[alternative_num]=(i,j)
            alternative_num+=1
    #print(alternatives)
    
    # Construct decision-matrix
    DM=np.empty((alternative_num,Wijk.shape[2]))
    for a,loc in alternatives.items():
        for k in range(Wijk.shape[2]):
            DM[a,k]=Wijk[loc[0],loc[1],k]
    #print(DM)
    
    # Putting it all together
    alternative_names = [v for k,v in alternatives.items()]
    criterion_names = [k for k in range(Wijk.shape[2])]
    criteria = [MAX for i in criterion_names]
    weights = [1/len(criterion_names) for i in range(len(criterion_names))]
    df = pd.DataFrame(DM,
                      index=alternative_names,
                      columns=criterion_names)
     
    if verbose:
        print("Alternatives {}".format(alternative_names))
        print("Criteria {}".format(criterion_names))
        print("Weights {}".format(weights))
        print("Decision Matrix {}".format(df))
    
    
    # Execute MADM
    data = Data(df.as_matrix(),
                criteria,
                weights,
                anames=df.index.tolist(),
                cnames=df.columns
                )
    
    # Execute on 3 decision makers
    dm1 = simple.WeightedSum()
    dm2 = simple.WeightedProduct()
    dm3 = closeness.TOPSIS()
    dec1 = dm1.decide(data)
    dec2 = dm2.decide(data)
    dec3 = dm3.decide(data)
    
    ranks=[dec1.rank_, dec2.rank_,dec3.rank_]
    results = pd.DataFrame({"TOPSIS":dec3.rank_,
                            "WeightedSum":dec1.rank_,
                            "WeightedProduct":dec2.rank_},
                            index=df.index.tolist())
    
    if verbose:
        print("MADM Results: {}".format(results))
    concensus_results=pd.DataFrame({"ConsensusRank":policy(results, axis=1)},index=results.index)
    rij=concensus_results.as_matrix().reshape(Wijk.shape[0],Wijk.shape[1])
    rij_move_sequence=np.argmin(rij,axis=1)
    #if verbose:
    #    print("rij {}".format(rij))
    #    print("rij_move_sequence {}".format(rij_move_sequence))
    return rij, rij_move_sequence

#wijk,_=get_k_optimizations(data=None, num_optimizers=5, data_shape=(10,5), batch_size=5, verbose=True)

#rij, _=get_madm_concensus(Wijk=wijk, policy=np.average )
#print(rij)
#print(_)