def __init__(self, input_shape, filters=[32, 64, 128, 10], n_clusters=10, alpha=1.0): super(DCEC, self).__init__() self.n_clusters = n_clusters self.input_shape = input_shape self.alpha = alpha self.pretrained = False self.y_pred = [] self.cae = CAE(input_shape, filters) #调用自编码器 hidden = self.cae.get_layer(name='embedding').output #将嵌入层得到的嵌入特征保留 self.encoder = Model(inputs=self.cae.input, outputs=hidden) #由cae的输入到嵌入层构建encoder模型 # Define DCEC model clustering_layer = ClusteringLayer(self.n_clusters, name='clustering')( hidden) #将嵌入层的输出作为聚类层的输入 self.model = Model(inputs=self.cae.input, outputs=[ clustering_layer, self.cae.output ]) #定义DCEC的模型(从cae的输入开始,最后输出有cae的输出和聚类层的输出)
def __init__(self, input_shape: Tuple[int, int, int], filters: Iterable[int] = [32, 64, 128, 32], n_clusters: int = 32, alpha: int = 1): """DCEC Model :param input_shape: Shape of the input layer in the model :param filters: Number of filters in the convolutional layers, plus the size of the clustering layer. Hence the length should equal len(convolutional layers) + 1. :param n_clusters: k, the number of clusters to target # TODO Do we need this parameter? :param alpha: parameter in Student's t distribution """ # TODO Add activation as a parameter to this model super(DCEC, self).__init__() self.n_clusters = n_clusters self.input_shape = input_shape self.alpha = alpha self.pretrained = False self.y_pred = [] self.cae = CAE(input_shape, filters) hidden = self.cae.get_layer(name="embedding").output self.encoder = Model(inputs=self.cae.input, outputs=hidden) # Define DCEC model clustering_layer = ClusteringLayer(self.n_clusters, name="clustering")(hidden) self.model = Model(inputs=self.cae.input, outputs=[clustering_layer, self.cae.output])
def __init__(self, input_shape, filters=[32, 64, 128, 10], n_clusters=10, alpha=1.0): super(DCEC, self).__init__() self.n_clusters = n_clusters self.input_shape = input_shape self.alpha = alpha self.pretrained = False self.y_pred = [] self.cae = CAE(input_shape, filters) hidden = self.cae.get_layer(name='embedding').output self.encoder = Model(inputs=self.cae.input, outputs=hidden) # Define DCEC model clustering_layer = ClusteringLayer(self.n_clusters, name='clustering')(hidden) self.model = Model(inputs=self.cae.input, outputs=[clustering_layer, self.cae.output])
class DCEC(object): def __init__(self, input_shape, filters=[32, 64, 128, 10], n_clusters=10, alpha=1.0): super(DCEC, self).__init__() self.n_clusters = n_clusters self.input_shape = input_shape self.alpha = alpha self.pretrained = False self.y_pred = [] self.cae = CAE(input_shape, filters) hidden = self.cae.get_layer(name='embedding').output self.encoder = Model(inputs=self.cae.input, outputs=hidden) # Define DCEC model clustering_layer = ClusteringLayer(self.n_clusters, name='clustering')(hidden) self.model = Model(inputs=self.cae.input, outputs=[clustering_layer, self.cae.output]) def pretrain(self, x, batch_size=256, epochs=200, optimizer='adam', save_dir='results/temp'): print('...Pretraining...') self.cae.compile(optimizer=optimizer, loss='mse') from keras.callbacks import CSVLogger csv_logger = CSVLogger(args.save_dir + '/pretrain_log.csv') # begin training t0 = time() self.cae.fit(x, x, batch_size=batch_size, epochs=epochs, callbacks=[csv_logger]) print('Pretraining time: ', time() - t0) self.cae.save(save_dir + '/pretrain_cae_model.h5') print('Pretrained weights are saved to %s/pretrain_cae_model.h5' % save_dir) self.pretrained = True def load_weights(self, weights_path): self.model.load_weights(weights_path) def extract_feature(self, x): # extract features from before clustering layer return self.encoder.predict(x) def predict(self, x): q, _ = self.model.predict(x, verbose=0) return q.argmax(1) @staticmethod def target_distribution(q): weight = q**2 / q.sum(0) return (weight.T / weight.sum(1)).T def compile(self, loss=['kld', 'mse'], loss_weights=[1, 1], optimizer='adam'): self.model.compile(loss=loss, loss_weights=loss_weights, optimizer=optimizer) def fit(self, x, y=None, batch_size=256, maxiter=2e4, tol=1e-3, update_interval=140, cae_weights=None, save_dir='./results/temp'): print('Update interval', update_interval) save_interval = x.shape[0] / batch_size * 5 print('Save interval', save_interval) # Step 1: pretrain if necessary t0 = time() if not self.pretrained and cae_weights is None: print('...pretraining CAE using default hyper-parameters:') print(' optimizer=\'adam\'; epochs=200') self.pretrain(x, batch_size, save_dir=save_dir) self.pretrained = True elif cae_weights is not None: self.cae.load_weights(cae_weights) print('cae_weights is loaded successfully.') # Step 2: initialize cluster centers using k-means t1 = time() print('Initializing cluster centers with k-means.') kmeans = KMeans(n_clusters=self.n_clusters, n_init=20) self.y_pred = kmeans.fit_predict(self.encoder.predict(x)) y_pred_last = np.copy(self.y_pred) self.model.get_layer(name='clustering').set_weights( [kmeans.cluster_centers_]) # Step 3: deep clustering # logging file import csv, os if not os.path.exists(save_dir): os.makedirs(save_dir) logfile = open(save_dir + '/dcec_log.csv', 'w') logwriter = csv.DictWriter( logfile, fieldnames=['iter', 'acc', 'nmi', 'ari', 'L', 'Lc', 'Lr']) logwriter.writeheader() t2 = time() loss = [0, 0, 0] index = 0 for ite in range(int(maxiter)): if ite % update_interval == 0: q, _ = self.model.predict(x, verbose=0) p = self.target_distribution( q) # update the auxiliary target distribution p # evaluate the clustering performance self.y_pred = q.argmax(1) if y is not None: acc = np.round(metrics.acc(y, self.y_pred), 5) nmi = np.round(metrics.nmi(y, self.y_pred), 5) ari = np.round(metrics.ari(y, self.y_pred), 5) loss = np.round(loss, 5) logdict = dict(iter=ite, acc=acc, nmi=nmi, ari=ari, L=loss[0], Lc=loss[1], Lr=loss[2]) logwriter.writerow(logdict) print('Iter', ite, ': Acc', acc, ', nmi', nmi, ', ari', ari, '; loss=', loss) # check stop criterion delta_label = np.sum(self.y_pred != y_pred_last).astype( np.float32) / self.y_pred.shape[0] y_pred_last = np.copy(self.y_pred) if ite > 0 and delta_label < tol: print('delta_label ', delta_label, '< tol ', tol) print('Reached tolerance threshold. Stopping training.') logfile.close() break # train on batch if (index + 1) * batch_size > x.shape[0]: loss = self.model.train_on_batch( x=x[index * batch_size::], y=[p[index * batch_size::], x[index * batch_size::]]) index = 0 else: loss = self.model.train_on_batch( x=x[index * batch_size:(index + 1) * batch_size], y=[ p[index * batch_size:(index + 1) * batch_size], x[index * batch_size:(index + 1) * batch_size] ]) index += 1 # save intermediate model if ite % save_interval == 0: # save DCEC model checkpoints print('saving model to:', save_dir + '/dcec_model_' + str(ite) + '.h5') self.model.save_weights(save_dir + '/dcec_model_' + str(ite) + '.h5') ite += 1 # save the trained model logfile.close() print('saving model to:', save_dir + '/dcec_model_final.h5') self.model.save_weights(save_dir + '/dcec_model_final.h5') t3 = time() print('Pretrain time: ', t1 - t0) print('Clustering time:', t3 - t1) print('Total time: ', t3 - t0)
def Layer_Wise_preTrain(batch_size = 1, nkerns = [3, 4], dataset=None, n_epochs=6, k_Top=5): """ Layer-wise Convolutional auto-encoders. """ if dataset is None: dataset = Preprocess_Input().load_data() train_set_x = dataset[0][0] train_set_z = dataset[0][2] n_train_batch = train_set_x.get_value(borrow=True).shape[0] n_train_batch /= batch_size print '... Building AutoEncoders' rng = numpy.random.RandomState(96813) index = T.lscalar('index') learning_rate = T.dscalar('rate') x = T.matrix('x') z = T.iscalar('z') #index.tag.test_value = 0 #learning_rate.tag.test_value = .3 em = 50 layer0_input = x[:, :z*50].reshape((batch_size, 1, 50, -1)) # Auto-Encoder for Conv. LAYER 1 layer0 = CAE(rng, input=layer0_input, image_shape=(batch_size, 1, em, None), \ filter_shape=(nkerns[0], 1, 1, 7), factor=.5, s=z, k_Top=k_Top, do_fold=True) #zz = layer0.get_cost_updates(learning_rate) #print 'hidden:', theano.function([index, learning_rate], [zz], on_unused_input='ignore', \ # givens={x: train_set_x[index * batch_size: (index + 1) * batch_size], \ # z: train_set_z[index]})(0, .3) cost, updates = layer0.get_cost_updates(learning_rate) #print 'DECODE RESULT:\n', theano.function([index], [layer0.output.shape, layer0_input.shape, z.type, layer0.zz.shape], \ # givens={x: train_set_x[index * batch_size: (index + 1) * batch_size], \ # z: train_set_z[index]})(0) train0 = theano.function([index, learning_rate], cost, updates=updates, \ givens={x: train_set_x[index * batch_size: (index + 1) * batch_size], z: train_set_z[index]}) em /= 2 # Auto-Encoder for Conv. LAYER 2. layer1_input = layer0.Output() layer1 = CAE(rng, input=layer1_input, image_shape=(batch_size, nkerns[0], em, None), \ filter_shape=(nkerns[1], nkerns[0], 1, 3), factor=.0, s=z, k_Top=k_Top, do_fold=True) cost1, updates1 = layer1.get_cost_updates(learning_rate) train1 = theano.function([index, learning_rate], cost1, updates=updates1, \ givens={x: train_set_x[index * batch_size: (index + 1) * batch_size], z: train_set_z[index]}) em /= 2 # Auto-Encoder for Hidden Layer. hidden_input = layer1.Output().flatten(2) hidden_layer = AE(rng, input=hidden_input, n_visible=nkerns[1]*em*k_Top, n_hidden=100) cost_h, updates_h = hidden_layer.get_cost_updates(learning_rate) train_h = theano.function([index, learning_rate], cost_h, updates=updates_h, \ givens={x: train_set_x[index * batch_size: (index + 1) * batch_size], z: train_set_z[index]}) print '... Pretraining model' ls_1 = [] rate = 1e-2 epoch = 0 while epoch < n_epochs: epoch = epoch + 1 for minibatch in xrange(n_train_batch): cost_ij = train0(minibatch, rate) ls_1.append(cost_ij) rate *= .95 print '\tepoch %i : cost: %f' % (epoch, cost_ij) # print layer0.W.get_value(borrow=True) ls_2 = [] rate = 1e-2 epoch = 0 while epoch < n_epochs: epoch = epoch + 1 for minibatch in xrange(n_train_batch): cost_ij = train1(minibatch, rate) ls_2.append(cost_ij) rate *= .95 print '\tepoch %i : cost: %f' % (epoch, cost_ij) # print layer1.W.get_value(borrow=True) ls_3 = [] rate=4e-2 epoch = 0 while epoch < n_epochs: epoch = epoch + 1 for minibatch in xrange(n_train_batch): cost_ij = train_h(minibatch, rate) ls_3.append(cost_ij) rate *= .95 print '\tepoch %i : cost: %f' % (epoch, cost_ij) # PLOT AutoEncoder Cost Function plt.subplot(3, 1, 1) plt.plot(numpy.arange(len(ls_1)) + 1, numpy.asarray(ls_1), 'r.-') plt.title('AutoEncoder Cost function Results') plt.xlabel('Epochs') plt.ylabel('Convolutional Layer 1') plt.subplot(3, 1, 2) plt.plot(numpy.arange(len(ls_2)) + 1, numpy.asarray(ls_2), 'r.-') #plt.title('AutoEncoder Cost function Results') plt.xlabel('Epochs') plt.ylabel('Convolutional Layer 2') plt.subplot(3, 1, 3) plt.plot(numpy.arange(len(ls_3)) + 1, numpy.asarray(ls_3), 'r.-') plt.xlabel('Epochs') plt.ylabel('Hidden Layer values') plt.show() return [layer0.params, layer1.params, hidden_layer.params]
class DCEC(object): def __init__(self, input_shape: Tuple[int, int, int], filters: Iterable[int] = [32, 64, 128, 32], n_clusters: int = 32, alpha: int = 1): """DCEC Model :param input_shape: Shape of the input layer in the model :param filters: Number of filters in the convolutional layers, plus the size of the clustering layer. Hence the length should equal len(convolutional layers) + 1. :param n_clusters: k, the number of clusters to target # TODO Do we need this parameter? :param alpha: parameter in Student's t distribution """ # TODO Add activation as a parameter to this model super(DCEC, self).__init__() self.n_clusters = n_clusters self.input_shape = input_shape self.alpha = alpha self.pretrained = False self.y_pred = [] self.cae = CAE(input_shape, filters) hidden = self.cae.get_layer(name="embedding").output self.encoder = Model(inputs=self.cae.input, outputs=hidden) # Define DCEC model clustering_layer = ClusteringLayer(self.n_clusters, name="clustering")(hidden) self.model = Model(inputs=self.cae.input, outputs=[clustering_layer, self.cae.output]) # TODO we should really be training for 200 epochs # TODO Can we do a bigger batch size here? # TODO Should we train for longer? def pretrain(self, x, batch_size=512, epochs=200, optimizer="adam", save_dir="results/temp"): logger.info("...Pretraining...") self.cae.compile(optimizer=optimizer, loss="mse") from tensorflow.keras.callbacks import CSVLogger csv_logger = CSVLogger(args.save_dir + "/pretrain_log.csv") # begin training t0 = time() self.cae.fit(x, x, batch_size=batch_size, epochs=epochs, callbacks=[csv_logger]) logger.info("Pretraining time: {}".format(time() - t0)) self.cae.save(save_dir + "/pretrain_cae_model.h5") logger.info( "Pretrained weights are saved to %s/pretrain_cae_model.h5" % save_dir) save_results_to_gcs(save_dir) self.pretrained = True def load_weights(self, weights_path): self.model.load_weights(weights_path) def extract_feature(self, x): # extract features from before clustering layer return self.encoder.predict(x) def predict(self, x): q, _ = self.model.predict(x, verbose=0) return q.argmax(1) @staticmethod def target_distribution(q): weight = q**2 / q.sum(0) return (weight.T / weight.sum(1)).T @staticmethod def should_stop(losses, threshold=0.0005): assert len(losses) >= 3, "Losses is not length 3" a = sum(losses) / len(losses) for x in losses: if abs(x - a) > threshold: return False return True def compile(self, loss=["kld", "mse"], loss_weights=[1, 1], optimizer="adam"): self.model.compile(loss=loss, loss_weights=loss_weights, optimizer=optimizer) def fit( self, x, y=None, batch_size=512, # This was 256, Castellano used 128 maxiter=2e4, tol=1e-3, update_interval=140, # Was 140 cae_weights=None, save_dir="./results/temp", ): logger.info("Update interval {}".format(update_interval)) save_interval = int(x.shape[0] / batch_size * 5) logger.info("Save interval {}".format(save_interval)) # Step 1: pretrain if necessary t0 = time() if not self.pretrained and cae_weights is None: logger.info("...pretraining CAE using default hyper-parameters:") logger.info(" optimizer='adam'; epochs=200") self.pretrain(x, batch_size, save_dir=save_dir) self.pretrained = True elif cae_weights is not None: self.cae.load_weights(cae_weights) logger.info("cae_weights is loaded successfully.") # Step 2: initialize cluster centers using k-means t1 = time() logger.info("Initializing cluster centers with k-means.") kmeans = KMeans(n_clusters=self.n_clusters, n_init=20) self.y_pred = kmeans.fit_predict(self.encoder.predict(x)) y_pred_last = np.copy(self.y_pred) self.model.get_layer(name="clustering").set_weights( [kmeans.cluster_centers_]) # Step 3: deep clustering # logging file if not os.path.exists(save_dir): os.makedirs(save_dir) logfile_path = save_dir + "/dcec_log.csv" logfile = open(logfile_path, "w") logwriter = csv.DictWriter( logfile, fieldnames=["iter", "acc", "nmi", "ari", "L", "Lc", "Lr"]) logwriter.writeheader() overall_log_loss = save_dir + "/dcec_log_all.csv" l2 = open(overall_log_loss, "w") lw2 = csv.DictWriter(l2, fieldnames=["iter", "L", "Lc", "Lr"]) lw2.writeheader() loss = [0, 0, 0] index = 0 previous_losses = [] for ite in range(int(maxiter)): if ite % update_interval == 0: logger.info("Updating. Iter {}".format(ite)) q, _ = self.model.predict(x, verbose=0) # model.predict() causes a memory leak in tf2. So, use model(). See notes above # q, _ = self.model(x_tf, training=False) p = self.target_distribution( q) # update the auxiliary target distribution p # evaluate the clustering performance self.y_pred = q.argmax(1) if y is not None: logger.info("{} calculating acc".format(ite)) acc = np.round(metrics.acc(y, self.y_pred), 5) nmi = np.round(metrics.nmi(y, self.y_pred), 5) ari = np.round(metrics.ari(y, self.y_pred), 5) loss = np.round(loss, 5) logdict = dict(iter=ite, acc=acc, nmi=nmi, ari=ari, L=loss[0], Lc=loss[1], Lr=loss[2]) logwriter.writerow(logdict) logger.info( "Iter {}: Acc {}, nmi {}, ari {}; loss={}".format( ite, acc, nmi, ari, loss)) loss_dict = { "iter": ite, "L": loss[0], "Lc": loss[1], "Lr": loss[2] } logwriter.writerow(loss_dict) logger.info("iter {i}; L {L}; Lc {Lc}; Lr {Lr}".format( i=ite, **loss_dict)) logger.info("Evaluating full loss") loss_all = self.model.evaluate(x, y=[p, x], batch_size=batch_size, verbose=0) previous_losses.append(loss_all[0]) ld = { "iter": ite, "L": loss_all[0], "Lc": loss_all[1], "Lr": loss_all[2] } logger.info( "Overall loss. iter {iter}; L {L}; Lc {Lc}; Lr {Lr}". format(**ld)) lw2.writerow(ld) # check stop criterion delta_label = np.sum(self.y_pred != y_pred_last).astype( np.float32) / self.y_pred.shape[0] logger.info("delta_label={}".format(delta_label)) y_pred_last = np.copy(self.y_pred) if self.n_clusters > 1 and ite > 0 and delta_label < tol: logger.info("delta_label {} < tol {}".format( delta_label, tol)) logger.info( "Reached tolerance threshold. Stopping training.") logfile.close() break elif self.n_clusters == 1 and len( previous_losses) >= 3 and self.should_stop( previous_losses): logger.info( "Stopping criteria reached: Last 3 losses {}".format( previous_losses[-3:])) break # train on batch if (index + 1) * batch_size > x.shape[0]: loss = self.model.train_on_batch( x=x[index * batch_size::], y=[p[index * batch_size::], x[index * batch_size::]]) index = 0 else: loss = self.model.train_on_batch( x=x[index * batch_size:(index + 1) * batch_size], y=[ p[index * batch_size:(index + 1) * batch_size], x[index * batch_size:(index + 1) * batch_size], ], ) index += 1 loss_dict = { "iter": ite, "L": loss[0], "Lc": loss[1], "Lr": loss[2] } logwriter.writerow(loss_dict) if ite % 10 == 0: logger.info("iter={};L={};L_c={};L_r={}".format(ite, *loss)) # save intermediate model if ite % save_interval == 0: # save DCEC model checkpoints logger.info( "saving model to: {}".format(save_dir + "/dcec_model_" + str(ite) + ".h5")) path = save_dir + "/dcec_model_" + str(ite) + ".h5" self.model.save_weights(path) gcs_copy(path) gcs_copy(logfile_path) gcs_copy(overall_log_loss) ite += 1 # save the trained model logfile.close() l2.close() logger.info("saving model to: {}".format(save_dir + "/dcec_model_final.h5")) self.model.save_weights(save_dir + "/dcec_model_final.h5") t3 = time() logger.info("Pretrain time: {}".format(t1 - t0)) logger.info("Clustering time: {}".format(t3 - t1)) logger.info("Total time: {}".format(t3 - t0)) save_results_to_gcs(save_dir)