def make_votes(datasets, binary, clf): if (type(clf) == list): votes = [] for clf_i in clf: votes += [ learn.train_model(data_i, binary, clf_i) for data_i in datasets ] else: votes = [learn.train_model(data_i, binary, clf) for data_i in datasets] if (binary): votes = [one_hot_result(vote_i) for vote_i in votes] return votes
def visualize_corl(paths): datasets = ens.read_dataset(paths["common"], paths["binary"]) results = [learn.train_model(data_i) for data_i in datasets] d = [result_i.true_one_hot() for result_i in results] C = corl(results, d) C = np.round_(C, decimals=3) heat_map(C)
def validate_acc(data_i): train=data_i.split()[0] rename_i={name_i:"%d_%d_%d" % (name_i.get_cat()+1,i%2,i) for i,name_i in enumerate(train.keys())} new_data=train.rename(rename_i) result=learn.train_model(new_data,binary=False,clf_type="LR") return result#.get_acc()
def cross_acc(datasets): datasets=[data_i.split()[0] for data_i in datasets] for data_i in datasets: data_i.info=["%s_%d" %(name_i.split('_')[0],i) for i,name_i in enumerate(data_i.info)] acc=[ learn.train_model(data_i, binary=True,clf_type="LR",acc_only=True) for data_i in datasets] return acc
def make_votes(common_path, binary_path, clf="LR", read=None): if (read is None): read = read_dataset datasets = read(common_path, binary_path) if (len(datasets) == 0): raise Exception("No data at %s" % binary_path) results = [ learn.train_model(data_i, clf_type=clf, binary=False) for data_i in datasets ] return Votes(results)
def ensemble(paths, system=None, clf="LR", s_clf=None, transform=None): datasets = ens.read_dataset(paths["common"], paths["binary"]) if (transform): datasets = [transform(data_i) for data_i in datasets] results = [ learn.train_model(data_i, clf_type=clf, binary=False) for data_i in datasets ] if (s_clf): results = [results[clf_i] for clf_i in s_clf] votes = ens.Votes(results) return voting(votes, system)
def __call__(self, datasets, clf="LR"): results = [] new_datasets = [] for data_i in datasets: train_i, test_i = data_i.split() s_train_i = self.subsample(train_i) s_data_i = {**s_train_i, **test_i} s_data_i = feats.Feats(s_data_i) new_datasets.append(s_data_i) result_i = learn.train_model(s_data_i, binary=False, clf_type=clf) results.append(result_i) return new_datasets, results
def ens_acc(paths, clf="LR", acc_only=True): if (type(paths) == tuple): datasets = tools.read_datasets(paths) return [ learn.train_model(data_i, False, clf, acc_only) for data_i in datasets ] votes = learn.read_votes(paths) y_true = [int(name_i.split("_")[0]) - 1 for name_i in votes[0][2]] result = [learn.voting([vote_i], False) for vote_i in votes] acc = [accuracy_score(y_true, result_i) for result_i in result] return acc
def __call__(self, paths, binary=False, clf="LR", s_clf=None): datasets = self.read(paths["common"], paths["binary"]) if (self.transform): datasets = [self.transform(data_i) for data_i in datasets] results = [ learn.train_model(data_i, clf_type=clf, binary=False) for data_i in datasets ] votes = Votes(results) if (s_clf): votes = Votes([votes.results[i] for i in s_clf]) result = votes.voting(binary) print(result.get_acc()) return result, votes
def person_acc(data_i): train = data_i.split()[0] persons = set([name_i.get_person() for name_i in train.keys()]) results = [] print("person acc") for j in list(persons): def helper(name_i): cat_i = name_i.get_cat() + 1 person_i = int(name_i.get_person() != j) return "%d_%d" % (cat_i, person_i) rename_j = { name_i: "%s_%d" % (helper(name_i), i) for i, name_i in enumerate(train.keys()) } train_j = train.rename(rename_j) result_j = learn.train_model(train_j, binary=False, clf_type="LR") results.append(result_j) return unify_results(results)
import data_generator import learn if __name__ == '__main__': # Generate sample data data_generator.generate_data() # Train the linear regression model learn.train_model()