def main(): data_set = DataSets.get_followers() - 1 n = data_set.max(axis=0).max() + 1 init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) print( sess.run( tf.scatter_nd(data_set.values.tolist(), data_set.shape[0] * [1.0], [n, n])))
def main(): steps = 20 data_set = DataSets.get_wiki_vote() data_set -= 1 n_raw = data_set.max(axis=0).max() + 1 beta = tf.constant(0.85, tf.float32, name="Beta") n = tf.constant(n_raw, tf.float32, name="NodeCounts") a = tf.Variable(tf.transpose( tf.scatter_nd(data_set.values.tolist(), data_set.shape[0] * [1.0], [n_raw, n_raw])), tf.float64, name="AdjacencyMatrix") v = tf.Variable(tf.fill([n_raw, 1], tf.pow(n, -1)), name="PageRankVector") o_degree = tf.reduce_sum(a, 0) condition = tf.not_equal(o_degree, 0) transition = tf.transpose( tf.where(condition, tf.transpose(beta * tf.div(a, o_degree) + (1 - beta) / n), tf.fill([n_raw, n_raw], tf.pow(n, -1)))) page_rank = tf.matmul(transition, v, a_is_sparse=True) run_iteration = tf.assign(v, page_rank) ranks = tf.transpose(tf.py_func(ranked, [-v], tf.int64))[0] init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for step in range(steps): sess.run(run_iteration) print(sess.run(v)) print(sess.run(ranks)) np.savetxt('logs/test.csv', sess.run(ranks), fmt='%i') tf.summary.FileWriter('logs/.', sess.graph) pass
return self.gain_list.idxmax(), self.data[self.gain_list.idxmax()] def gain(self, subdata, h_S): result = pd.Series(index=subdata.columns) for column in subdata.columns: a = self.sub_entropy(subdata[column]) counts = subdata[column].value_counts() p = (counts / counts.sum()) result[column] = (h_S - (p * a).sum()) return result, subdata def entropy(self, subdata): counts = subdata[self.class_name].value_counts() p = (counts / counts.sum()) return (p * np.log2(1 / p)).sum() def sub_entropy(self, subdata): result = pd.Series(index=subdata.unique()) cross = pd.concat([subdata, self.data[self.class_name]], axis=1) for cat in subdata.unique(): result[cat] = self.entropy(cross[subdata == cat]) return result def __str__(self): return str(self.gain_list) if __name__ == '__main__': data_pd = DataSets.get_weber_nominal() print(GainRanking(data_pd, data_pd.columns[-1]))
from numerical.data_science.res import DataSets as ds from sklearn.model_selection import train_test_split from sklearn import tree from sklearn.tree import DecisionTreeClassifier def learn_function(a, b, c, d, e): return bool(not (a and b) or not (c and d)) != bool(e) if __name__ == '__main__': np_data = ds.generate_from_logic_method(learn_function).data clf = tree.DecisionTreeClassifier() X_train, X_test, y_train, y_test = train_test_split(np_data[:, :-1], np_data[:, -1], test_size=0.33, random_state=42) clf = DecisionTreeClassifier() clf = clf.fit(X_train, y_train)
return 'AGE_4' def discretize_PRE4(value): if value <= 2.66: return 'PRE4_1' elif 2.66 < value <= 2.88: return 'PRE4_2' else: return 'PRE4_3' def discretize_PRE5(value): if value <= 2.05: return 'PRE5_1' else: return 'PRE5_2' if __name__ == '__main__': pd_data = DataSets.get_thoraric_surgery() pd_data['PRE4'] = pd_data['PRE4'].apply(discretize_PRE4, 1).astype('category') pd_data['PRE5'] = pd_data['PRE5'].apply(discretize_PRE5, 1).astype('category') pd_data['AGE'] = pd_data['AGE'].apply(discretize_AGE, 1).astype('category') print(pd_data) pd_data.to_csv('ThoraricSurgery_discrete.csv', index=False)
from numerical.data_science.res import DataSets from numerical.data_science import GainRankingContinous from numerical.data_science import ID3 class J48(ID3): def __init__(self, training_set, class_name): ID3.__init__(self, training_set, class_name, ranking=GainRankingContinous) if __name__ == '__main__': data_pd_2 = DataSets.get_weather() j48_tennis = J48(data_pd_2, data_pd_2.columns[-1]) print(j48_tennis)
cuts = [] for current in new_data[self.class_name]: if temp != current: cuts.append((new_data[subdata.name].iloc[i] + new_data[subdata.name].iloc[i - 1]) / 2) temp = current i += 1 alt = pd.DataFrame() for cut in cuts: alt = pd.concat([ alt, subdata.apply(GainRankingContinous.discretize_split, 1, args=(cut, )).astype('category').rename(cut) ], axis=1) return alt[self.gain(alt, self.h_S)[0].idxmax()] @staticmethod def discretize_split(value, point): if value < point: return ' < ' + str(point) else: return '>= ' + str(point) if __name__ == '__main__': data_pd_2 = DataSets.get_weather_semi_nominal().ix[:, 1:] print(GainRankingContinous(data_pd_2, data_pd_2.columns[-1]))
elif d.shape[1] == 1: tree.add_child(v1, " ".join(d[self.class_name].unique())) elif d.shape[0] == 0: tree.add_child(v1, None) else: tree.add_child(v1, self.generate_tree(d)) return tree def __str__(self): return str(self.tree) if __name__ == '__main__': data_pd = DataSets.get_weber_nominal() id3_tennis = ID3(data_pd, data_pd.columns[-1]) print(id3_tennis) ''' pd_careval = DataSets.get_car_eval() id3_careval = ID3(pd_careval, pd_careval.columns[-1]) print(id3_careval) ''' pd_credit = DataSets.get_credit().ix[:, 1:] id3_credit = ID3(pd_credit, pd_credit.columns[0]) print(id3_credit)