def PNtrain(dataset: str, batch_size=30500, lr=1e-3): if dataset == 'MNIST': (x_train, y_train), (x_test, y_test) = MNIST() model = MLP(n_layers=6, activation='relu', use_softmax=True) optimizer = tf.keras.optimizers.Adam(lr=lr) elif dataset == 'epsilon': (x_train, y_train), (x_test, y_test) = Epsilon() model = MLP(n_layers=6, activation='softsign', use_softmax=True) optimizer = tf.keras.optimizers.Adam(lr=lr) elif dataset == '20News': (x_train, y_train), (x_test, y_test) = News20() model = MLP(n_layers=5, activation='softsign', use_softmax=True) optimizer = tf.keras.optimizers.Adagrad(lr=lr) elif dataset == 'CIFAR-10': (x_train, y_train), (x_test, y_test) = Cifar10() model = CNN(use_softmax=True) optimizer = tf.keras.optimizers.Adam(lr=lr) else: raise ValueError('Incorrect argument!') pi_p = np.count_nonzero(y_train) / y_train.size pi_n = 1 - pi_p n_p = 1000 n_n = int(np.round((pi_n / 2 / pi_p)**2 * n_p)) p_index = np.random.choice(y_train.sum(), n_p) n_index = np.random.choice(np.logical_not(y_train).sum(), n_n) train_data_x = np.concatenate( (x_train[y_train][p_index], x_train[np.logical_not(y_train)][n_index])) train_data_y = np.concatenate((np.ones(n_p), np.zeros(n_n))) train_data_x, x_test = train_data_x / 255., x_test / 255. model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['acc']) if os.path.isdir('logs/' + dataset + '-PN'): print('Error: log dir exist') exit(1) tensorboard = tf.keras.callbacks.TensorBoard( log_dir="logs/{}/train".format(dataset + '-PN')) summary_writer = tf.summary.FileWriter("logs/{}/test".format(dataset + '-PN')) for i in range(10000): model.fit(train_data_x, train_data_y, batch_size=batch_size, epochs=i + 1, shuffle=True, callbacks=[tensorboard], initial_epoch=i) lossval, acc = model.evaluate(x_test, y_test, batch_size=batch_size) summary = tf.Summary() summary.value.add(tag='test-loss', simple_value=lossval) summary.value.add(tag='test-accuracy', simple_value=acc) summary.value.add(tag='test-error', simple_value=1 - acc) summary_writer.add_summary(summary, i) summary_writer.flush()
def PUtrain(dataset: str, func_loss, batch_size=30500, lr=1e-3, pretrain='no'): if dataset == 'MNIST': (x_train, y_train), (x_test, y_test) = MNIST() model = MLP(n_layers=6, activation='relu', use_softmax=False) optimizer = tf.keras.optimizers.Adam(lr=lr) elif dataset == 'epsilon': (x_train, y_train), (x_test, y_test) = Epsilon() model = MLP(n_layers=6, activation='softsign', use_softmax=False) optimizer = tf.keras.optimizers.Adam(lr=lr) elif dataset == '20News': (x_train, y_train), (x_test, y_test) = News20() model = MLP(n_layers=5, activation='softsign', use_softmax=False) optimizer = tf.keras.optimizers.Adagrad(lr=lr) elif dataset == 'CIFAR-10': (x_train, y_train), (x_test, y_test) = Cifar10() model = CNN(use_softmax=False) optimizer = tf.keras.optimizers.Adam(lr=lr) else: raise ValueError('Error: unknown dataset') if pretrain == 'pretrain': # Override optimizer optimizer = tf.keras.optimizers.Adagrad(lr=lr) lossstr = '-pretrain' elif func_loss is loss.puloss: lossstr = '-uPU' elif func_loss is loss.nnpuloss: lossstr = '-nnPU' elif func_loss is loss.positive_risk: lossstr = '-pRisk' else: raise ValueError('Error: unknown loss') foldername = dataset + lossstr if os.path.isdir('logs/' + foldername): print('Error: log dir exist') exit(2) # prior probability pi_p = np.count_nonzero(y_train) / y_train.size loss.pi_p = pi_p # choose first 1000 samples as training samples n_p, n_n = 1000, y_train.size # randomly choose n_p training samples as positive samples # the rest is unlabeled samples p_index = np.random.choice(y_train.sum(), n_p) train_data_x = np.concatenate((x_train[y_train][p_index], x_train)) train_data_y = np.concatenate((np.ones(n_p), np.zeros(n_n))) # normalize the training data and the test data train_data_x, x_test = train_data_x / 255.0, x_test / 255.0 model.compile( optimizer=optimizer, loss=func_loss, metrics=['acc', loss.positive_risk, loss.negative_risk, loss.error]) # TensorBoard visualization tensorboard = tf.keras.callbacks.TensorBoard( log_dir="logs/{}/train".format(foldername)) summary_writer = tf.summary.FileWriter("logs/{}/test".format(foldername)) callbacks = [tensorboard] # Pretrain related if pretrain == 'pretrain': saver = tf.keras.callbacks.ModelCheckpoint('checkpoint/model.ckpt', monitor='loss', verbose=1, save_best_only=True, save_weights_only=True) callbacks.append(saver) elif pretrain == 'finetune': model.load_weights("checkpoint/model.ckpt") for i in range(10000): model.fit(train_data_x, train_data_y, batch_size=batch_size, epochs=i + 1, shuffle=True, callbacks=callbacks, initial_epoch=i) lossval, acc, prisk, nrisk, err = model.evaluate(x_test, y_test, batch_size=batch_size) summary = tf.Summary() summary.value.add(tag='test-loss', simple_value=lossval) summary.value.add(tag='test-accuracy', simple_value=acc) summary.value.add(tag='test-positive-risk', simple_value=prisk) summary.value.add(tag='test-negative-risk', simple_value=nrisk) summary.value.add(tag='test-error', simple_value=err) summary_writer.add_summary(summary, i) summary_writer.flush()