Beispiel #1
0
 def __init__(self,
              rf_system,
              B,
              T,
              n_authorized,
              g_H,
              d_dropout,
              g_lr=1e-3,
              d_lr=1e-3,
              n_sample=16,
              generate_samples=10000,
              binary_reward=True,
              epsilon=0.1,
              beta=10000,
              d_load_path=None):
     self.rf_system = rf_system
     self.B, self.T = B, T
     self.g_H = g_H
     self.d_dropout = d_dropout
     self.generate_samples = generate_samples
     self.g_lr, self.d_lr = g_lr, d_lr
     self.generator_pre = GeneratorPretraining(g_H)
     self.agent = Agent(sess, B, g_H, g_lr, beta=beta, epsilon=epsilon)
     self.g_beta = Agent(sess, B, g_H, g_lr, beta=beta, epsilon=epsilon)
     if d_load_path is None:
         self.discriminator = Discriminator(dropout=d_dropout)
     else:
         self.discriminator = Discriminator(load_path=d_load_path)
     self.env = Environment(self.rf_system,
                            self.discriminator,
                            self.B,
                            self.T,
                            self.g_beta,
                            n_sample=n_sample,
                            binary_reward=binary_reward)
Beispiel #2
0
class Trainer(object):
    def __init__(self,
                 rf_system,
                 B,
                 T,
                 n_authorized,
                 g_H,
                 d_dropout,
                 g_lr=1e-3,
                 d_lr=1e-3,
                 n_sample=16,
                 generate_samples=10000,
                 binary_reward=True,
                 epsilon=0.1,
                 beta=10000,
                 d_load_path=None):
        self.rf_system = rf_system
        self.B, self.T = B, T
        self.g_H = g_H
        self.d_dropout = d_dropout
        self.generate_samples = generate_samples
        self.g_lr, self.d_lr = g_lr, d_lr
        self.generator_pre = GeneratorPretraining(g_H)
        self.agent = Agent(sess, B, g_H, g_lr, beta=beta, epsilon=epsilon)
        self.g_beta = Agent(sess, B, g_H, g_lr, beta=beta, epsilon=epsilon)
        if d_load_path is None:
            self.discriminator = Discriminator(dropout=d_dropout)
        else:
            self.discriminator = Discriminator(dropout=d_dropout,
                                               load_path=d_load_path)
        self.env = Environment(self.rf_system,
                               self.discriminator,
                               self.B,
                               self.T,
                               self.g_beta,
                               n_sample=n_sample,
                               binary_reward=binary_reward)

    def pre_train(self,
                  g_epochs=3,
                  d_epochs=1,
                  g_pre_data=None,
                  d_pre_data=None,
                  g_lr=1e-3,
                  d_lr=1e-3):
        self.pre_train_generator(g_epochs=g_epochs,
                                 g_pre_data=g_pre_data,
                                 lr=g_lr)
        self.pre_train_discriminator(d_epochs=d_epochs,
                                     d_pre_data=d_pre_data,
                                     lr=d_lr)

    def pre_train_generator(self,
                            g_epochs=3,
                            g_pre_data_train=None,
                            g_pre_data_valid=None,
                            lr=1e-3,
                            silent=False):
        g_adam = Adam(lr)
        self.generator_pre.compile(g_adam, loss='mse')
        if not silent:
            print('Generator pre-training')
            self.generator_pre.summary()

        np.random.seed(int(time.time()))
        pre_train_g_filepath = str(Path.home(
        )) + '/t_pre_train_g_weights_%d' % np.random.randint(0, 100000, 1)
        pre_train_g_c = [
            keras.callbacks.ModelCheckpoint(pre_train_g_filepath,
                                            monitor='val_loss',
                                            save_best_only=True,
                                            save_weights_only=True)
        ]
        self.generator_pre.fit(g_pre_data_train[0],
                               g_pre_data_train[1],
                               validation_data=(g_pre_data_valid[0],
                                                g_pre_data_valid[1]),
                               callbacks=pre_train_g_c,
                               epochs=g_epochs,
                               verbose=(0 if silent else 1))

        self.generator_pre.load_weights(pre_train_g_filepath)
        self.reflect_pre_train()

    def predict_pre_generator(self, g_sig_test=None):
        return self.generator_pre.predict(g_sig_test)

    def predict_generator(self, g_sig_test=None):
        return self.agent.act(g_sig_test)

    def predict_beta_generator(self, g_sig_test=None):
        return self.g_beta.act(g_sig_test)

    def reset_generator(self, lr, epsilon, beta, binary_reward):
        self.agent.generator.set_lr(lr)
        self.g_beta.generator.set_lr(lr)
        self.agent.generator.set_epsilon(epsilon)
        self.g_beta.generator.set_epsilon(epsilon)
        self.agent.generator.set_beta(beta)
        self.g_beta.generator.set_beta(beta)
        self.env.set_reward_type(binary_reward)
        self.agent.generator.sess.run(self.agent.generator.init_lstm)
        self.agent.generator.sess.run(self.agent.generator.init_mean)
        self.agent.generator.sess.run(self.agent.generator.init_var)
        self.agent.generator.sess.run(self.agent.generator.reset_optimizer)
        self.g_beta.generator.sess.run(self.g_beta.generator.init_lstm)
        self.g_beta.generator.sess.run(self.g_beta.generator.init_mean)
        self.g_beta.generator.sess.run(self.g_beta.generator.init_var)
        self.g_beta.generator.sess.run(self.g_beta.generator.reset_optimizer)
#         self.agent.generator.build_graph()
#         self.g_beta.generator.build_graph()

    def pre_train_discriminator(self,
                                d_epochs=1,
                                d_pre_sig_train=None,
                                d_pre_out_train=None,
                                d_pre_sig_valid=None,
                                d_pre_out_valid=None,
                                lr=1e-3):
        d_adam = Adam(lr)
        self.discriminator.compile(d_adam, 'binary_crossentropy')
        self.discriminator.summary()
        print('Discriminator pre-training')

        pre_train_d_filepath = 't_pre_train_d_weights'
        pre_train_d_c = [
            keras.callbacks.ModelCheckpoint(pre_train_d_filepath,
                                            monitor='val_loss',
                                            save_best_only=True,
                                            save_weights_only=True)
        ]
        history = self.discriminator.fit(d_pre_sig_train,
                                         d_pre_out_train,
                                         validation_data=(d_pre_sig_valid,
                                                          d_pre_out_valid),
                                         epochs=d_epochs,
                                         batch_size=self.B,
                                         callbacks=pre_train_d_c)
        plt.plot(history.history['loss'])
        plt.plot(history.history['val_loss'])
        self.discriminator.load_weights(pre_train_d_filepath)

    def norm(self, sig_u):
        pwr = np.sqrt(np.mean(np.sum(sig_u**2, axis=-1), axis=-1))
        sig_u = sig_u / pwr[:, None, None]
        print(sig_u.shape)
        return sig_u

    def get_predicted_sequence(self):
        orig = self.rf_system.get_real_pretx_symbol_block(self.T // 2)
        return orig, self.agent.generator.sampling_sequence_alt(self.T, orig)

    def test_curr_discriminator_batch(self):
        n_samples = 100
        perturbed_signals = np.zeros((n_samples, 256, 2))
        for i in range(n_samples):
            perturbed_signals[
                i, :] = self.rf_system.transmit_real_symbol_block(
                    self.agent.generator.sampling_sequence_alt(
                        self.T,
                        self.rf_system.get_real_pretx_symbol_block(self.T //
                                                                   2)),
                    impersonator=True)
        #print("Disc Batch Accuracy: {:.3f}".format(self.test_discriminator(perturbed_signals, np.ones((n_samples,)))))
        #return self.test_discriminator(self.fft(perturbed_signals), np.ones((n_samples,))) #fft
#         return self.test_discriminator(perturbed_signals, keras.utils.to_categorical(np.ones((n_samples,))*5,5+1))
        return self.test_discriminator(perturbed_signals, np.ones(
            (n_samples, )))

#     def test_curr_discriminator_batch(self):
#         n_samples=100
#         perturbed_signals=np.zeros((n_samples, 256, 2))
#         for i in range(n_samples):
#             perturbed_signals[i,:]=self.rf_system.transmit_real_symbol_block(self.agent.generator.sampling_sequence_alt(self.T, self.rf_system.get_real_pretx_symbol_block(self.T//2)), impersonator=True)
#         print("Disc Batch Accuracy: {:.3f}".format(self.test_discriminator(perturbed_signals, np.ones((n_samples,)))))

    def fft(self, x):
        return np.abs(
            np.fft.fft(
                np.apply_along_axis(lambda args: [complex(*args)], 2,
                                    x).squeeze()))

    def predict_curr_discriminator_for_auth(self):
        signal = self.rf_system.transmit_real_symbol_block(
            self.rf_system.get_real_pretx_symbol_block(self.T // 2),
            authorized=True,
            tx_id=1)
        return self.predict_discriminator(np.expand_dims(signal, 0)).squeeze()

    def predict_curr_discriminator(self):
        perturbed_signal = self.rf_system.transmit_real_symbol_block(
            self.agent.generator.sampling_sequence_alt(
                self.T,
                self.rf_system.get_real_pretx_symbol_block(self.T // 2)),
            impersonator=True)
        return self.predict_discriminator(np.expand_dims(perturbed_signal,
                                                         0)).squeeze()

    def predict_discriminator(self, d_sig_test=None):
        return self.discriminator.predict(d_sig_test)

    def predict_discriminator_mclass(self, d_sig_test=None):
        return np.sum(self.discriminator.predict(d_sig_test)[:, :5], axis=-1)

#     def test_discriminator(self, d_sig_test=None, d_out_test=None):
#         thresh=0.5
#         pred=np.argmax(self.predict_discriminator(d_sig_test), axis=-1)
# #         print(self.predict_discriminator(d_sig_test).shape)
# #         print(d_out_test.shape)
#         u,c=np.unique(pred, return_counts=True)
#         z=np.zeros((6,))
#         z[u]=c
#         print([0,1,2,3,4,5],z.T)
#         return ((pred==np.argmax(d_out_test)).sum())/pred.shape[0]

    def test_discriminator(self, d_sig_test=None, d_out_test=None, thresh=0.5):
        pred = self.predict_discriminator(d_sig_test)
        thresholded = (pred > thresh).astype(int)
        thresholded = thresholded.squeeze()
        return ((thresholded == d_out_test).sum()) / thresholded.shape[0]

    def load_g(self, sess_dir):
        self.generator_pre.save_weights(sess_dir + "/generator_pre_temp.h5")
        self.generator_pre.load_weights(sess_dir + "/generator_weights.h5")
        i = 0
        for layer in self.generator_pre.layers:
            if len(layer.get_weights()) != 0:
                w = layer.get_weights()
                if (len(self.agent.generator.layers) >= (i + 1)):
                    self.agent.generator.layers[i].set_weights(w)
                    self.g_beta.generator.layers[i].set_weights(w)
                i += 1
        self.generator_pre.load_weights(sess_dir + "/generator_pre_temp.h5")

    def save_g(self, sess_dir):
        self.generator_pre.save_weights(sess_dir + "/generator_pre_temp.h5")
        i = 0
        for layer in self.agent.generator.layers:
            if len(layer.get_weights()) != 0:
                w = layer.get_weights()
                if (len(self.generator_pre.layers) >= (i + 1) and
                        len(self.generator_pre.layers[i].get_weights()) != 0):
                    self.generator_pre.layers[i].set_weights(w)
                    i += 1

        model_json = self.generator_pre.to_json()
        with open(sess_dir + "/generator.json", "w") as json_file:
            json_file.write(model_json)
        self.generator_pre.save_weights(sess_dir + "/generator_weights.h5")
        self.generator_pre.load_weights(sess_dir + "/generator_pre_temp.h5")

    def load_pre_train_d(self, sess_dir, suffix=''):
        self.discriminator.load_weights(sess_dir +
                                        "/discriminator_weights%s.h5" % suffix)

    def save_pre_train_d(self, sess_dir, suffix=''):
        model_json = self.discriminator.to_json()
        with open(sess_dir + "/discriminator%s.json" % suffix,
                  "w") as json_file:
            json_file.write(model_json)
        self.discriminator.save_weights(sess_dir +
                                        "/discriminator_weights%s.h5" % suffix)

    def reflect_agent_to_beta(self):
        i = 0
        for layer in self.agent.generator.layers:
            if len(layer.get_weights()) != 0:
                w = layer.get_weights()
                if (len(self.g_beta.generator.layers) >= (i + 1)):
                    self.g_beta.generator.layers[i].set_weights(w)
                i += 1

    def reflect_pre_train(self):
        i = 0
        for layer in self.generator_pre.layers:
            if len(layer.get_weights()) != 0:
                w = layer.get_weights()
                if (len(self.agent.generator.layers) >= (i + 1)):
                    self.agent.generator.layers[i].set_weights(w)
                    self.g_beta.generator.layers[i].set_weights(w)
                i += 1

    def is_convergent(self, trace, sensitivity):
        l = np.shape(trace)[0]
        abs_slope = np.abs(linregress(np.arange(l), trace).slope)
        return abs_slope <= sensitivity

    def train_loop(self,
                   steps=100,
                   g_steps=1,
                   sensitivity=0.02,
                   g_weights_path='data/save/generator.pkl',
                   verbose=True):
        #print('Initial Disc Accuracy: {:.3f}'.format(self.predict_curr_discriminator()))
        #print('{:d}: Disc Batch Accuracy: {:.3f}'.format(0, self.test_curr_discriminator_batch()))
        accus = []
        for step in range(steps):
            # Generator training
            for _ in range(g_steps):
                symbols = self.rf_system.get_real_pretx_symbol_block(self.T //
                                                                     2)
                #print(symbols)
                self.env.reset(symbols)
                states = np.zeros((self.T, 1, 2))
                actions = np.zeros((self.T, 2))
                rewards = np.zeros((self.T, 1))
                for t in range(self.T):
                    state = self.env.get_state()
                    action = self.agent.act(state)
                    next_state, reward, is_episode_end, info = self.env.step(
                        action)
                    #print('Reward: {:.3f}'.format(reward.squeeze()))
                    states[t, :] = state
                    actions[t, :] = action
                    rewards[t, :] = reward
                self.agent.generator.update(states, actions, rewards)
                #print('{:d}, {:d}: Disc Accuracy: {:.3f}, Average reward: {:.3f}'.format(step, 1, self.predict_curr_discriminator(), np.mean(rewards)))

            self.reflect_agent_to_beta()
            #if((step+1)%20==0): print('{:d}: Disc Batch Accuracy: {:.3f}'.format(step, self.test_curr_discriminator_batch()))
            if ((step + 1) % 20 == 0):
                accu = self.test_curr_discriminator_batch()
                print("{:.3f}".format(accu), end=" ", flush=True)
                accus.append(accu)
                if len(accus) >= 3 and np.mean(accus[-3:]) > 0.98:
                    print("\nConverged to 100%: {:.3f}, {:d}".format(
                        np.mean(accus[-3:]), step + 1))
                    return np.mean(accus[-3:]), step + 1
                elif len(accus) >= 10 and self.is_convergent(
                        accus[-10:], sensitivity):
                    print("\nConverged to {:.3f}, {:d}".format(
                        np.mean(accus[-10:]), step + 1))
                    return np.mean(accus[-10:]), step + 1
        ret = accus[-1] if (accus[-1] > accus[-2]) else np.mean(accus[-2:])
        print("\nDid not converge {:.3f}, {:d}".format(ret, step + 1))
        return ret, step + 1

    def train(self,
              steps=100,
              g_steps=1,
              g_weights_path='data/save/generator.pkl',
              verbose=True):
        print('Initial Disc Accuracy: {:.3f}'.format(
            self.predict_curr_discriminator()))
        print('{:d}: Disc Batch Accuracy: {:.3f}'.format(
            0, self.test_curr_discriminator_batch()))
        for step in range(steps):
            # Generator training
            for _ in range(g_steps):
                symbols = self.rf_system.get_real_pretx_symbol_block(self.T //
                                                                     2)
                #print(symbols)
                self.env.reset(symbols)
                states = np.zeros((self.T, 1, 2))
                actions = np.zeros((self.T, 2))
                rewards = np.zeros((self.T, 1))
                for t in range(self.T):
                    state = self.env.get_state()
                    action = self.agent.act(state)
                    next_state, reward, is_episode_end, info = self.env.step(
                        action)
                    #print('Reward: {:.3f}'.format(reward.squeeze()))
                    states[t, :] = state
                    actions[t, :] = action
                    rewards[t, :] = reward
                    #print(state,action,reward)
                    #self.agent.generator.update(state, action, reward)
#                     if is_episode_end:
#                         if verbose:
#                             print('Disc Accuracy: {:.3f}'.format(self.predict_curr_discriminator(self.rf_system)))
#                         break

                self.agent.generator.update(states, actions, rewards)
                #print('{:d}, {:d}: Average reward: {:.3f}'.format(step, 1, np.mean(rewards)))
                #                 print('{:d}, {:d}: Disc Accuracy: {:.3f}, Average reward: {:.3f}, Disc Batch Accuracy: {:.3f}'.format(step, 1, self.predict_curr_discriminator(), np.mean(rewards),
                #                                                                                                                      self.test_curr_discriminator_batch()))
                print(
                    '{:d}, {:d}: Disc Accuracy: {:.3f}, Average reward: {:.3f}'
                    .format(step, 1, self.predict_curr_discriminator(),
                            np.mean(rewards)))
            #if(not verbose): print('Disc Accuracy: {:.3f}'.format(self.predict_curr_discriminator(self.rf_system)))

            # Update env.g_beta to agent
            self.reflect_agent_to_beta()
            if ((step + 1) % 20 == 0):
                print('{:d}: Disc Batch Accuracy: {:.3f}'.format(
                    step, self.test_curr_discriminator_batch()))

    def save(self, g_path, d_path):
        self.agent.save(g_path)
        self.discriminator.save(d_path)

    def load(self, g_path, d_path):
        self.agent.load(g_path)
        self.g_beta.load(g_path)
        self.discriminator.load_weights(d_path)