def __init__(self, x, noisy, index=1, alpha=0.3, delta=1e-5, hidden_units=10): self.x = x self.y = 1 # 学过的都标记为1 self.iput_units = self.x.shape[1].value self.hidden_units = hidden_units self.out_units = 2 self.out = self.x self.net = NetWork(None) self.alpha = alpha self.delta = delta self.index = index self.masked = None self.random_noisy = tf.random_normal( [self.iput_units, self.iput_units]) self.noisy = noisy self.build() self.v = 1 return
def __init__(self, x, y, hidden_units = 1024, alpha = 0.2, delta = 1e-3): self.x = x; self.y = y; self.out = self.x; self.iput_units = self.x.shape[1].value; self.out_units = self.y.shape[1].value; self.hidden_units = hidden_units; self.net = NetWork(None); self.alpha = alpha; self.delta = delta; self.step2 = 1; self.build();
class KBNets(): # 分两种,一种是存的,一种是学的。学的用DQL。 """ 知识库,最好可以定义一个自学习的方法,用多线程,类似于任务计划; 主要是将NN当作存储设备来使用; 这样可以使得我们抛开原有的信息,例如下标,指针等。 假设成一个完全未知的agent来学习。 """ """ 现在还没有解决关于存储的问题,所以不会讨论如何增量存储的问题; 但有想法是否可以通过“沉淀”来降低或者阻断新增知识对以往知识的梯度更新; 或者,利用weight-mask来点亮对应区域内的weight? """ """ 关键是要记住,最好就是过拟合。泛化能力不是通过控制拟合获得,而是通过逻辑推理获得 """ """ params = {n_features: 10, n_lay:3, type_1:fc, hidd_1: 64, type_2:fc, hid_2: 32, type_3:out, hid_3: 2} """ def __init__(self, params): n_lay = params['n_lay']; self.net = NetWork(None); self.x_ = tf.placeholder(tf.float32, shape = [None, params['n_features']]); self.out = None; h_hat = params['n_features']; for i in range(n_lay): t = params['type_{}'.format(i)]; h = params['hidd_{}'.format(i)]; if t == 'fc': self.out = self.net.fc_block(self.out, shape = [h_hat, h]); h_hat = h; self.y_ = tf.placeholder(tf.float32, shape = [None, h_hat]); self.loss = tf.reduce_sum(tf.squared_difference(self.out, self.y_)); lr = params['lr']; if lr is None: lr = 1e-3; self.opt = tf.train.AdamOptimizer(lr).minimize(self.loss); return; def pred(self, infos, sess): o = sess.run([self.out], {self.x_ : infos}); return o; """ ngram """ def train(self, infos, sess): i = 0; x_ = infos['x_']; y_ = infos['y_']; while i < 100000: _, l = sess.run([self.opt, self.loss], {self.x_: x_, self.y_: y_}); if (l - 0) < 1e-10: break; return;
def __init__(self, params): n_lay = params['n_lay']; self.net = NetWork(None); self.x_ = tf.placeholder(tf.float32, shape = [None, params['n_features']]); self.out = None; h_hat = params['n_features']; for i in range(n_lay): t = params['type_{}'.format(i)]; h = params['hidd_{}'.format(i)]; if t == 'fc': self.out = self.net.fc_block(self.out, shape = [h_hat, h]); h_hat = h; self.y_ = tf.placeholder(tf.float32, shape = [None, h_hat]); self.loss = tf.reduce_sum(tf.squared_difference(self.out, self.y_)); lr = params['lr']; if lr is None: lr = 1e-3; self.opt = tf.train.AdamOptimizer(lr).minimize(self.loss); return;
validation_x = file['validation_x'][:] validation_y = np.transpose(np.array([file['validation_y'][:]])) validation = DataSet(validation_x, validation_y, onehot=True) test_x = file['test_x'][:] test_y = np.transpose(np.array([file['test_y'][:]])) test = DataSet(test_x, test_y, onehot=True) file.close() return train, validation, test train, validation, test = load_mnist('mnist') dict_data = {'train': train, 'validation': validation, 'test': test} net = NetWork(None) DELTA = 1e-3 INPUT_DIM = 20 x = tf.placeholder(tf.float32, shape=[None, INPUT_DIM]) # 用autoencoder压缩到20维 noisy = tf.placeholder(tf.float32, shape=[None, INPUT_DIM]) # 用autoencoder压缩到20维 y = tf.placeholder(tf.float32, shape=[None, 10]) ''' 构建网络 ''' with tf.variable_scope('identify_net'): ''' 用来验证输入的是不是一个已经学习到的知识 先将知识用超大的fc进行记忆后,再来训练这个网络 理想状态是可以输出占用的weight位置,二值{0,1};是个稀疏矩阵,应该进行压缩
class Classifier_Net(): def __init__(self, x, y, hidden_units = 1024, alpha = 0.2, delta = 1e-3): self.x = x; self.y = y; self.out = self.x; self.iput_units = self.x.shape[1].value; self.out_units = self.y.shape[1].value; self.hidden_units = hidden_units; self.net = NetWork(None); self.alpha = alpha; self.delta = delta; self.step2 = 1; self.build(); ''' 这里可以考虑使用分类算法了 但进来的y 不是one-hot,所以还是用回归算了 ''' def build(self): self.w1 = self.net._norm_variable('lay1', [self.iput_units, self.hidden_units]); self.w1_mask = self.net._norm_variable('lay1_mask', self.w1.shape); # 重要数据,每一次训练完之后需要在上面减去w_mask_bi,下次用的时候再加上。是个全局变量 self.identify_mask_1 = tf.Variable(np.ones([self.iput_units, self.hidden_units]), dtype = tf.float32, trainable = False); self.w2 = self.net._norm_variable('lay2', [self.hidden_units, self.out_units]); self.w2_mask = self.net._norm_variable('lay2_mask', self.w2.shape); self.identify_mask_2 = tf.Variable(np.ones([self.hidden_units, self.out_units]), dtype = tf.float32, trainable = False); # 这个是要存到每一个identifier里面的 self.w1_mask_bi = tf.nn.sigmoid(self.w1_mask); self.w1_mask_bi = tf.multiply(self.w1_mask_bi, self.identify_mask_1); self.w2_mask_bi = tf.nn.sigmoid(self.w2_mask); self.w2_mask_bi = tf.multiply(self.w2_mask_bi, self.identify_mask_2); # self.w1_mask_bi = tf.nn.sigmoid(self.w1_mask); # self.w1_mask_bi = tf.multiply(self.w1_mask_bi, self.identify_mask_1); # 这两步应该是没有同步mask修改。所以存在一定的问题 w11 = tf.multiply(self.w1, self.w1_mask_bi); self.out = tf.nn.tanh(tf.matmul(self.out, w11)); # self.reg = tf.nn.l2_loss(w1); # self.reg = self.alpha * self.reg; self.reg_mask = tf.reduce_sum(tf.abs(self.w1_mask_bi)); # self.w2_mask_bi = tf.nn.sigmoid(self.w2_mask); # self.w2_mask_bi = tf.multiply(self.w2_mask_bi, self.identify_mask_2); w22 = tf.multiply(self.w2, self.w2_mask_bi); self.out = tf.nn.softmax(tf.matmul(self.out, w22)); # self.reg = self.reg + tf.nn.l2_loss(w2); # self.reg = self.alpha * self.reg; self.reg_mask += tf.reduce_sum(tf.abs(self.w2_mask_bi)); self.loss1 = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits( labels=self.y, logits= self.out)); self.global_step_classifier = tf.Variable( name = 'global_step_classifier', initial_value = 1); def loss(self, reg = True): return self.loss1 + self.alpha * self.reg_mask; def optimizer(self): self.opt_classifier = tf.train.AdamOptimizer(1e-2).minimize( self.loss(), global_step = self.global_step_classifier); return self.opt_classifier; def renewmask(self, sess): # 对现有变量进行分配值; sess.run([tf.assign(self.w1_mask, tf.random_normal(self.w1_mask.shape, stddev = 0.05)), tf.assign(self.w2_mask, tf.random_normal(self.w2_mask.shape, stddev = 0.05))]); self.step2 = 1; def mask(self, sess, end = False): mask = {'lay1': self.w1_mask_bi.eval(session = sess).copy(), 'lay2': self.w2_mask_bi.eval(session = sess).copy()}; if self.step2 == 1: self.step2 = 1 - self.step2; ass1 = tf.assign(self.w1_mask, tf.where(self.w1_mask_bi >= self.delta, tf.ones_like(self.w1_mask) * 1000, tf.ones_like(self.w1_mask) * -1000)); ass2 = tf.assign(self.w2_mask, tf.where(self.w2_mask_bi >= self.delta, tf.ones_like(self.w2_mask) * 1000, tf.ones_like(self.w2_mask) * -1000)); sess.run([ass1, ass2]); if end: #将可用的位置不断减少 mask = {'lay1': self.w1_mask_bi.eval(session = sess).copy(), 'lay2': self.w2_mask_bi.eval(session = sess).copy()}; sess.run(tf.assign(self.identify_mask_1, self.identify_mask_1 - mask['lay1'])); sess.run(tf.assign(self.identify_mask_2, self.identify_mask_2 - mask['lay2'])); self.renewmask(sess); return mask; def pred(self, sess = None, mask = None): opts = None; if mask is not None: idx_mask_1_bak = self.identify_mask_1.eval(session = sess); idx_mask_2_bak = self.identify_mask_2.eval(session = sess); # sess.run(tf.assign(self.identify_mask_1, self.identify_mask_1 + mask['lay1'])); # sess.run(tf.assign(self.identify_mask_2, self.identify_mask_2 + mask['lay2'])); sess.run(tf.assign(self.identify_mask_1, mask['lay1'])); sess.run(tf.assign(self.identify_mask_2, mask['lay2'])); self.step2 = 1; ass1 = tf.assign(self.w1_mask, tf.where(mask['lay1'] >= self.delta, tf.ones_like(self.w1_mask) * 1000, tf.ones_like(self.w1_mask) * -1000)); ass2 = tf.assign(self.w2_mask, tf.where(mask['lay2'] >= self.delta, tf.ones_like(self.w2_mask) * 1000, tf.ones_like(self.w2_mask) * -1000)); sess.run([ass1, ass2]); # opts = [tf.assign(self.identify_mask_1, self.identify_mask_1 - mask['lay1']), # tf.assign(self.identify_mask_2, self.identify_mask_2 - mask['lay2'])]; opts = [tf.assign(self.identify_mask_1, idx_mask_1_bak), tf.assign(self.identify_mask_2, idx_mask_2_bak)]; return self.out, opts;
src = 'mnist' #src = 'fasion mnist'; mnist = input_data.read_data_sets("data/{}".format(src), one_hot=True) train_x = mnist.train._images train_y = mnist.train._labels validation_x = mnist.validation._images validation_y = mnist.validation._labels test_x = mnist.test._images test_y = mnist.test._labels TRAIN_DECODER = True net = NetWork(None) x = tf.placeholder(tf.float32, shape=[None, 28 * 28]) encoder = tf.reshape(x, [-1, 28, 28, 1]) encoder = net.resn_block(encoder, [1, 1], name='RESN1', actfunc='relu') #encoder = net.resn_block(encoder, [1, 1], name = 'RESN2', actfunc = 'relu'); #encoder = net.resn_block(encoder, [1, 1], name = 'RESN3', actfunc = 'relu'); #encoder = net.conv_block(encoder, [3, 3, 1, 1], [1, 1, 1, 1], # name = 'CONV1', padding = 'VALID', actfunc = 'relu'); #encoder = net.conv_block(encoder, [5, 5, 1, 1], [1, 1, 1, 1], # name = 'CONV2', padding = 'VALID', actfunc = 'relu'); #encoder = net.conv_block(encoder, [3, 3, 1, 1], [1, 1, 1, 1], # name = 'CONV3', padding = 'VALID', actfunc = 'sigmoid'); #encoder = x; #encoder = net.fc_block(encoder, [28 * 28, 1024], name = 'FC11', actfunc = 'sigmoid'); #encoder = net.fc_block(encoder, [1024, 512], name = 'FC12', actfunc = 'sigmoid');
class Identify_Net(): def __init__(self, x, noisy, index=1, alpha=0.3, delta=1e-5, hidden_units=10): self.x = x self.y = 1 # 学过的都标记为1 self.iput_units = self.x.shape[1].value self.hidden_units = hidden_units self.out_units = 2 self.out = self.x self.net = NetWork(None) self.alpha = alpha self.delta = delta self.index = index self.masked = None self.random_noisy = tf.random_normal( [self.iput_units, self.iput_units]) self.noisy = noisy self.build() self.v = 1 return def build(self): with tf.variable_scope('Identify_{}'.format(self.index)): w = self.net._cons_variable('lay1', [self.iput_units, self.hidden_units]) b = self.net._cons_variable('lay1b', [1, self.hidden_units]) self.out = tf.nn.softplus(tf.matmul(self.out, w) + b) # ? * 10 self.reg = tf.nn.l2_loss(w) w2 = self.net._cons_variable('lay11', [self.hidden_units, 5]) b2 = self.net._cons_variable('lay11b', [1, 5]) self.out = tf.nn.softplus(tf.matmul(self.out, w2) + b2) self.reg = tf.nn.l2_loss(w2) #decoder w2 = self.net._cons_variable('dlay11', [5, self.hidden_units]) b2 = self.net._cons_variable('dlay11b', [1, self.hidden_units]) self.out = tf.nn.softplus(tf.matmul(self.out, w2) + b2) self.reg = tf.nn.l2_loss(w2) w = self.net._cons_variable('dlay1', [self.hidden_units, self.iput_units]) b = self.net._cons_variable('dlay1b', [1, self.iput_units]) self.out = tf.nn.softplus(tf.matmul(self.out, w) + b) # ? * 10 self.reg = tf.nn.l2_loss(w) # self.loss1 = tf.reduce_mean(tf.reduce_mean(tf.abs(self.x - self.out), 1))\ self.learn = 1 - tf.reduce_mean(tf.abs(self.x - self.out), 1) self.global_step_identify = tf.Variable( name='global_step_identify', initial_value=1) def build_dis(self): with tf.variable_scope('Identify_{}'.format(self.index)): # self.noisy = 1.05 - self.out; self.w = self.net._cons_variable( 'lay1', [self.iput_units, self.hidden_units]) b = self.net._cons_variable('lay1b', [1, self.hidden_units]) # self.out = tf.pow(tf.matmul(self.out, self.w), 3); self.out = (tf.matmul(self.out, self.w)) self.noisy = tf.nn.softplus(tf.matmul(self.noisy, self.w) + b) self.reg = tf.nn.l2_loss(self.w) # self.w2 = self.net._cons_variable('lay11', # [self.hidden_units, self.hidden_units]); # b2 = self.net._cons_variable('lay11b', # [1, self.hidden_units]) ## self.out = tf.nn.softplus(tf.matmul(self.out, w)); # self.out = tf.nn.softplus(tf.matmul(self.out, self.w2) + b2) # self.noisy = tf.nn.softplus(tf.matmul(self.noisy, self.w2) + b2); # self.reg = tf.nn.l2_loss(self.w2); # # self.w22 = self.net._cons_variable('lay21', # [self.hidden_units, self.hidden_units]); # b2 = self.net._cons_variable('lay21b', # [1, self.hidden_units]) # self.out = tf.nn.softplus(tf.matmul(self.out, self.w22) + b2) # self.noisy = tf.nn.softplus(tf.matmul(self.noisy, self.w22) + b2); # self.reg = tf.nn.l2_loss(self.w22); # # self.w23 = self.net._cons_variable('lay22', # [self.hidden_units, self.hidden_units]); # b2 = self.net._cons_variable('lay22b', # [1, self.hidden_units]) ## self.out = tf.nn.softplus(tf.matmul(self.out, w)); # self.out = tf.nn.softplus(tf.matmul(self.out, self.w23) + b2) # self.noisy = tf.nn.softplus(tf.matmul(self.noisy, self.w23) + b2); # self.reg = tf.nn.l2_loss(self.w23); self.w3 = self.net._norm_variable( 'lay10', [self.hidden_units, self.out_units]) self.out = tf.nn.softmax(tf.matmul(self.out, self.w3)) self.noisy = tf.nn.softmax(tf.matmul(self.noisy, self.w3)) # self.out = (tf.matmul(self.out, self.w3)); # self.noisy = (tf.matmul(self.noisy, self.w3)); self.reg += tf.nn.l2_loss(self.w3) # self.loss1 = tf.reduce_sum(tf.squared_difference(self.out , tf.transpose([self.x[:, -1]])))\ # + 0.4 * tf.reduce_mean(tf.squared_difference(self.noisy, 0)); self.loss1 = 0.7 *tf.reduce_mean(-tf.log(self.out[:, 1]))\ # + 0.3 * tf.reduce_mean(-tf.log(self.noisy[:, 0])); # self.learn = 1 - tf.abs(tf.subtract(self.out, tf.transpose([self.x[:, -1]]))); self.learn = self.out # print(self.out.shape) # print(self.learn.shape) self.global_step_identify = tf.Variable( name='global_step_identify', initial_value=1) def loss(self, reg=True): return self.loss1 def optimizer(self, reg=True): self.opt_identify = tf.train.RMSPropOptimizer( learning_rate=1e-2).minimize(self.loss(reg), global_step=self.global_step_identify) return self.opt_identify def learned(self): return self.learn def mask(self, mask=None): if mask is None: return self.masked else: self.masked = mask return self.masked def threshold(self, v=None): if v is None: return self.v else: self.v = v
from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("data/mnist", one_hot=True) train_x = mnist.train._images train_x = np.reshape(train_x, [-1, 28, 28, 1]) train_y = mnist.train._labels validation_x = mnist.validation._images validation_x = np.reshape(validation_x, [-1, 28, 28, 1]) validation_y = mnist.validation._labels test_x = mnist.test._images test_x = np.reshape(test_x, [-1, 28, 28, 1]) test_y = mnist.test._labels net = NetWork(None) x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) y = tf.placeholder(tf.float32, shape=[None, 10]) #o = tf.reshape(x, [-1, 28, 28, 1]); o = x o = net.resn_block(o, [1, 1], name='RESN1') #o = net.resn_block(o, [4, 4], name = 'RESN2'); #o = net.resn_block(o, [8, 4], name = 'RESN3'); o = tf.reshape(o, [-1, 28 * 28 * 1]) o = net.fc_block(o, [28 * 28 * 1, 1024], name='FC1') o = net.fc_block(o, [1024, 10], name='FC2', actfunc=False) o1 = tf.nn.softmax(o) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=o1))