def read_deep(deep_path): if (type(deep_path) == list): datasets = [] for deep_i in deep_path: datasets += feats.read(deep_i) return datasets return feats.read(deep_path)
def binary_selection(in_path): common_path,deep_path=in_path deep_data=feats.read(deep_path) deep_data=[binary_helper(i,data_i) for i,data_i in enumerate(deep_data)] common=feats.read(common_path)[0] datasets=[ common+deep_i for deep_i in deep_data] return dataset
def read_dataset(common_path, deep_path): if (not common_path): return read_deep(deep_path) #feats.read(deep_path) if (not deep_path): return feats.read(common_path) common_data = feats.read(common_path)[0] deep_data = read_deep(deep_path) datasets = [common_data + data_i for data_i in deep_data] return datasets
def combined_dataset(common_path, deep_path, sub_datasets=False): if (not common_path): return feats.read(deep_path) if (not deep_path): return feats.read(common_path) common_data = feats.read(common_path)[0] deep_data = feats.read(deep_path) datasets = [common_data + data_i for data_i in deep_data] if (sub_datasets): return datasets, common_data, deep_data return datasets
def exp(common_path, deep_path): common = feats.read(common_path)[0] common = make_clusters(common.split()[0]) binary = [ make_clusters(binary_i.split()[0]) for binary_i in feats.read(deep_path) ] print("Common:") base_quality = common.quality() print(base_quality) print("Binary:") for i, binary_i in enumerate(binary): print(i) print((binary_i.quality() - base_quality) > 0)
def concat_dataset(in_path): if (type(in_path) == tuple): common_path, deep_path = in_path # raise Exception(type(common_path)) if (type(common_path) == list): common_data = feats.read_unified(common_path) else: common_data = feats.read(common_path) # return multi_dataset(common_path,deep_path) # return combined_dataset(common_path,deep_path) deep_data = feats.read(deep_path) datasets = [common_data + data_i for data_i in deep_data] return datasets return feats.read(in_path)
def total_selection(in_path): common_path,deep_path=in_path deep_data=feats.read(deep_path) deep_train=[deep_i.split()[0] for deep_i in deep_data] info=[np.median(train_i.mutual()) for train_i in deep_train] info=(info-np.mean(info))/np.std(info) print(info) common=feats.read(common_path)[0] datasets=[ common+data_i for i,data_i in enumerate(deep_data) if(info[i]>-1)] return datasetst
def read_votes(in_path): data = feats.read(in_path) votes = [] y_true = data[0].get_labels() for data_i in data: votes.append([y_true, data_i.X, data_i.info]) return votes
def read_datasets(in_path): if (type(in_path) == tuple): common_path, deep_path = in_path if (type(common_path) == list): return multi_dataset(common_path, deep_path) return combined_dataset(common_path, deep_path) return feats.read(in_path)
def split_plot(in_path): dataset = feats.read(in_path)[0] def helper(i, y_i): name_i = dataset.info[i] person_i = int(name_i.split("_")[1]) return (person_i % 2) tsne_plot(dataset, show=True, color_helper=helper)
def reduce_cross(in_path, step=50): feat_dict = feats.read(in_path)[0] n = int(feat_dict.dim()[0] / step) acc, dataset = [], [] for i in range(n): feat_i = reduction.reduce(feat_dict, (i + 1) * step) dataset.append(feat_i) acc.append(rename.cross_validate(feat_i)) print(acc) k = np.argmax(acc) return ((k + 1) * step)
def tsne_plot(in_path, show=True, color_helper="cat", names=False): feat_dataset = feats.read(in_path)[0].split()[1] tsne = manifold.TSNE(n_components=2, perplexity=30) X, y, names = feat_dataset.as_dataset() X = tsne.fit_transform(X) color_helper = lambda i, y_i: y_i return plot_embedding(X, y, title="tsne", color_helper=color_helper, show=show, names=names)
def random_cat(feat_dict): if (type(feat_dict) == str): feat_dict = feats.read(feat_dict)[0] by_cat = defaultdict(lambda: []) for name_i in feat_dict.keys(): by_cat[name_i.get_cat()].append(name_i) rename = {} for cat_i, names_i in by_cat.items(): random.shuffle(names_i) for j, name_j in enumerate(names_i): new_name_j = "%d_%d_%d" % (name_j.get_cat() + 1, j % 2, j) rename[name_j] = new_name_j return rename
def visual(in_path): dataset = feats.read(in_path)[0] result = inliners.knn.get_detector(dataset, k=5, as_dict=True) def helper(i, y_i): name_i = dataset.info[i] person_i = int(name_i.split("_")[1]) in_test = ((person_i % 2) == 0) if (in_test): return result[name_i] + 1 return in_test reduction.tsne_plot(dataset, show=True, color_helper=helper)
def train_model(data,binary=False,clf_type="LR",selector=None): if(type(data)==str): data=feats.read(data)[0] data.norm() print(data.dim()) print(len(data)) train,test=data.split(selector) model=make_model(train,clf_type) X_test,y_true=test.get_X(),test.get_labels() if(binary): y_pred=model.predict(X_test) else: y_pred=model.predict_proba(X_test) return Result(y_true,y_pred,test.names())
def train_model(data, binary=False, clf_type="LR", acc_only=False): if (type(data) == str): data = feats.read(data)[0] data.norm() train, test = data.split() model = learn.clf.get_cls(clf_type) model.fit(train.X, train.get_labels()) y_true = test.get_labels() if (binary): y_pred = model.predict(test.X) else: y_pred = model.predict_proba(test.X) if (acc_only): return accuracy_score(y_true, y_pred) else: return y_true, y_pred, test.info
def tsne_plot(in_path, show=True, color_helper="cat", names=False): feat_dataset = feats.read(in_path)[0] if (type(in_path) == str) else in_path feat_dataset = feat_dataset.split()[1] tsne = manifold.TSNE(n_components=2, perplexity=30) #init='pca', random_state=0) X = tsne.fit_transform(feat_dataset.X) y = feat_dataset.get_labels() names = feat_dataset.info if (names) else None if (type(color_helper) == str or type(color_helper) == tuple): color_helper = get_colors_helper(feat_dataset.info, color_helper) return plot_embedding(X, y, title="tsne", color_helper=color_helper, show=show, names=names)
def separ(in_path): dataset = feats.read(in_path)[0] train, test = dataset.split() clusters = make_clusters(test) print(clusters.quality())