def inliner_voting(common, deep, clf="LR"): datasets = ens.read_dataset(None, deep) inliners = [get_inliner_dict(date_i) for date_i in datasets] full_datasets = ens.read_dataset(common, deep) results = learn.train_ens(full_datasets, clf=clf) weights = [ inliner_weights(inliners_i, result_i) for inliners_i, result_i in zip(inliners, results) ] names = results[0].names n_clf = len(results) y_pred = [] for j, name_j in enumerate(names): print(name_j) weights_j = [weights[i][j] for i in range(n_clf)] if (sum(weights_j) > 2): votes = [ results[i].y_pred[j] for i in range(n_clf) if (weights_j[i] == 1) ] else: votes = [results[i].y_pred[j] for i in range(n_clf)] # print(weights_j) print(len(votes)) votes = np.array(votes) pred_i = np.argmax(np.sum(votes, axis=0)) # print(pred_i) y_pred.append(pred_i) y_pred = np.array(y_pred) return learn.Result(results[0].y_true, y_pred, names)
def __call__(self,common_path,deep_path): common=ens.read_dataset(common_path,None)[0] common.norm() common=reduce(common,n=self.n_common) if(not deep_path): return [common] binary=ens.read_dataset(None,deep_path) for data_i in binary: data_i.norm() binary=[reduce(data_i,n=self.n_binary) for data_i in binary] return [ common+binary_i for binary_i in binary]
def basic_selection(common_path,binary_path,clf="LR"): datasets=ens.read_dataset(common_path,binary_path) acc=np.array([ validate_acc(data_i).get_acc() for data_i in datasets]) s_clf=dataset_selection(datasets,acc) print(len(s_clf)) return ens.ensemble(common_path,binary_path,True,clf,s_clf)[0]
def diff_voting(common,deep,clf="LR"): datasets=ens.read_dataset(common,deep) weights=find_weights(datasets) results=learn.train_ens(datasets,clf="LR") votes=ens.Votes(results) result=votes.weighted(weights) return result
def visualize_corl(paths): datasets = ens.read_dataset(paths["common"], paths["binary"]) results = [learn.train_model(data_i) for data_i in datasets] d = [result_i.true_one_hot() for result_i in results] C = corl(results, d) C = np.round_(C, decimals=3) heat_map(C)
def cv_exp(common, binary, out_path=None, clf="LR"): datasets = ens.read_dataset(common, binary) validation = [CrossVal(0.1 * (i + 1)) for i in range(2, 10)] result_dict = {} for valid_i in validation: votes_i = ens.Votes(valid_i(datasets, clf)[1]) result_dict[str(valid_i)] = votes_i.voting(False) exp.result_exp("no weights", result_dict, out_path)
def score_opt(paths, clf="LR"): datasets = ens.read_dataset(paths['common'], paths['binary']) val = auc.CrossVal(0.5) new_datasets, results = val(datasets, clf) weights = find_weights(results) results = learn.train_ens(datasets) votes = score_dataset(results, weights) final_result = votes.voting(False) final_result.report()
def rename_frames(paths, json_path): datasets = ens.read_dataset(paths["common"], paths["binary"]) helper = get_renam_fun(json_path) if (get_fun): return helper new_datasets = datasets #[helper(data_i) for data_i in datasets] votes = ens.Votes(learn.train_ens(new_datasets, clf="LR")) result_i = votes.voting(False) result_i.report()
def split_exp(common, binary, clf="LR", out_path=None): datasets = ens.read_dataset(common, binary) gens = { "KFold,2": KFoldGen(2), "KFold,5": KFoldGen(5), "Strat,2": StratGen(2), "Strat,5": StratGen(5) } loss = {"LogLoss": LogLoss, "MSELoss": MSELoss, "LinearLoss": LinearLoss} lines = exp_template(datasets, clf, gens, loss) print(lines)
def full_train(common, binary, clf): datasets = ens.read_dataset(common, binary) results = [] for data_i in datasets: train, test = data_i.split() model = learn.make_model(train, clf) X_train, y_true = train.get_X(), train.get_labels() y_pred = model.predict_proba(X_train) result_i = learn.Result(y_true, y_pred, test.names()) results.append(result_i) return ens.Votes(results)
def auc_exp(common, binary, clf="LR", out_path=None): datasets = ens.read_dataset(common, binary) gens = { "Strat,0.1": StratGen(2, 0.1), "Strat,0.3": StratGen(2, 0.3), "Strat,0.5": StratGen(2, 0.5), "Strat,0.9": StratGen(2, 0.9) } loss = {"MSELoss": MSELoss} lines = exp_template(datasets, clf, gens, loss, out_path) print(lines)
def ensemble(paths, system=None, clf="LR", s_clf=None, transform=None): datasets = ens.read_dataset(paths["common"], paths["binary"]) if (transform): datasets = [transform(data_i) for data_i in datasets] results = [ learn.train_model(data_i, clf_type=clf, binary=False) for data_i in datasets ] if (s_clf): results = [results[clf_i] for clf_i in s_clf] votes = ens.Votes(results) return voting(votes, system)
def __call__(self,paths,clf="LR"): datasets=ens.read_dataset(paths['common'],paths['binary']) def helper(valid): print("Valid") new_datasets,results=valid(datasets) return self.single_optim(new_datasets,results,clf) if(type(self.validation)==list ): results={str(valid_i):helper(valid_i) for valid_i in self.validation} return results else: return helper(self.validation)
def save_dataset(common, binary, out_path): datasets = ens.read_dataset(common, binary) files.make_dir(out_path) subdirs = { desc_i: "%s/%s" % (out_path, desc_i) for desc_i in ["features", "votes"] } for path_i in subdirs.values(): files.make_dir(path_i) for i, data_i in enumerate(datasets): feat_i = "%s/%d" % (subdirs["features"], i) data_i.save(feat_i) results = learn.train_ens(datasets, clf="LR") for i, result_i in enumerate(results): votes_i = "%s/%d" % (subdirs["votes"], i) raise Exception(result_i.y_pred.dtype) text_i = list(result_i.y_pred.astype(str)) lines = [] for line_j, name_j in zip(text_i, result_i.names): line_j = str(line_j).replace("\n", "") lines.append("%s#%s" % (line_j, name_j)) files.save_txt(lines, votes_i)
def selected_common(common_path,out_path,n=100): dataset=ens.read_dataset(common_path,None)[0] dataset.norm() new_data=reduce(dataset,n) new_data.save(out_path)
def helper(common_path, binary_path, clf="LR"): datasets = ens.read_dataset(common_path, binary_path) results = [person_acc(data_i) for data_i in datasets] return ens.Votes(results)
def person_selection(common_path, binary_path, clf="LR"): datasets = ens.read_dataset(common_path, binary_path) clf_acc = np.array([person_acc(data_i).get_acc() for data_i in datasets]) s_clf = acc.dataset_selection(datasets, clf_acc) print(len(s_clf)) return ens.ensemble(common_path, binary_path, True, clf, s_clf)[0]
def make_reduced_dataset(common, out_path, n_feats=350): common = files.get_paths(common) dataset = ens.read_dataset(common, None)[0] dataset.norm() redu_data = reduction.reduce(dataset, n_feats) redu_data.save(out_path)
def base_train(common, binary, clf): datasets = ens.read_dataset(common, binary) results = learn.train_ens(datasets, clf=clf) return ens.Votes(results)
def __call__(self,common_path,deep_path): datasets=ens.read_dataset(common_path,deep_path) for data_i in datasets: data_i.norm() return [reduce(data_i,n=self.n_feats) for data_i in datasets]