def main(): if (EXPERIMENT_NAME not in os.listdir()): os.mkdir(EXPERIMENT_NAME) for feature in FEATURES: try: data = np.load(feature + "_stats.npy", allow_pickle=True).item() pca = joblib.load("pca_" + feature + "_stats") train_data, train_data_classes = make_train_data(data, True) test_data, test_data_classes = make_test_data(data) train_data_pca = np.array(pca.transform(train_data)) test_data_pca = np.array(pca.transform(test_data)) for c in C_VALUES: if (feature, c) in SKIP_COMBINATIONS: print("Skipping " + feature + " SVM-linear C = " + str(c)) continue print("Computing " + feature + " SVM-linear C = " + str(c)) res, model = experiment_svm_linear(train_data_pca, train_data_classes, test_data_pca, test_data_classes, c, TRAIN_VERBOSE) if res != None: if SAVE_RESULTS: filename = EXPERIMENT_NAME + "_" + feature + " svm_lin_c_" + str( c) + "_results" path = os.path.join(EXPERIMENT_NAME, filename) joblib.dump(res, path) if SAVE_RESULTS_TXT: filename = EXPERIMENT_NAME + "_" + feature + " svm_lin_c_" + str( c) + "_results.txt" path = os.path.join(EXPERIMENT_NAME, filename) save_txt(res, path) if SAVE_MODEL: filename = EXPERIMENT_NAME + "_" + feature + " svm_lin_c_" + str( c) + "_model" path = os.path.join(EXPERIMENT_NAME, filename) joblib.dump(model, path) except Exception as e: print("Error during " + EXPERIMENT_NAME + " " + feature + " SVM-linear C = " + str(c)) print(e) pass
sample_episode += 1 if sample_episode < 1000: print('episodes:', sample_episode, '| score:', score) writer.add_scalar('data/reward', score, sample_episode) score = 0 total_state = np.stack(total_state).transpose([1, 0, 2, 3, 4]).reshape( [-1, 4, 84, 84]) total_next_state = np.stack(total_next_state).transpose( [1, 0, 2, 3, 4]).reshape([-1, 4, 84, 84]) total_reward = np.stack(total_reward).transpose().reshape([-1]) total_action = np.stack(total_action).transpose().reshape([-1]) total_done = np.stack(total_done).transpose().reshape([-1]) value, next_value, policy = agent.forward_transition( total_state, total_next_state) total_target = [] total_adv = [] for idx in range(num_worker): target, adv = make_train_data( total_reward[idx * num_step:(idx + 1) * num_step], total_done[idx * num_step:(idx + 1) * num_step], value[idx * num_step:(idx + 1) * num_step], next_value[idx * num_step:(idx + 1) * num_step]) # print(target.shape) total_target.append(target) total_adv.append(adv) print('training') agent.train_model(total_state, np.hstack(total_target), total_action, np.hstack(total_adv))
def main(): args = parse_arguments() train_method = args.train_method env_id = args.env_id env_type = args.env_type if env_type == 'atari': env = gym.make(env_id) input_size = env.observation_space.shape output_size = env.action_space.n env.close() else: raise NotImplementedError is_load_model = False is_render = False os.makedirs('models', exist_ok=True) model_path = 'models/{}.model'.format(env_id) predictor_path = 'models/{}.pred'.format(env_id) target_path = 'models/{}.target'.format(env_id) results_dir = os.path.join('outputs', args.env_id) os.makedirs(results_dir, exist_ok=True) logger = Logger(results_dir) writer = SummaryWriter(os.path.join(results_dir, 'tensorboard', args.env_id)) use_cuda = args.use_gpu use_gae = args.use_gae use_noisy_net = args.use_noisynet lam = args.lam num_worker = args.num_worker num_step = args.num_step ppo_eps = args.ppo_eps epoch = args.epoch mini_batch = args.minibatch batch_size = int(num_step * num_worker / mini_batch) learning_rate = args.learning_rate entropy_coef = args.entropy gamma = args.gamma int_gamma = args.int_gamma clip_grad_norm = args.clip_grad_norm ext_coef = args.ext_coef int_coef = args.int_coef sticky_action = args.sticky_action action_prob = args.action_prob life_done = args.life_done pre_obs_norm_step = args.obs_norm_step reward_rms = RunningMeanStd() obs_rms = RunningMeanStd(shape=(1, 1, 84, 84)) discounted_reward = RewardForwardFilter(int_gamma) if args.train_method == 'RND': agent = RNDAgent else: raise NotImplementedError if args.env_type == 'atari': env_type = AtariEnvironment else: raise NotImplementedError agent = agent( input_size, output_size, num_worker, num_step, gamma, lam=lam, learning_rate=learning_rate, ent_coef=entropy_coef, clip_grad_norm=clip_grad_norm, epoch=epoch, batch_size=batch_size, ppo_eps=ppo_eps, use_cuda=use_cuda, use_gae=use_gae, use_noisy_net=use_noisy_net ) logger.info('Start to initialize workers') works = [] parent_conns = [] child_conns = [] for idx in range(num_worker): parent_conn, child_conn = Pipe() work = env_type(env_id, is_render, idx, child_conn, sticky_action=sticky_action, p=action_prob, life_done=life_done, max_step_per_episode=args.max_step_per_episode) work.start() works.append(work) parent_conns.append(parent_conn) child_conns.append(child_conn) states = np.zeros([num_worker, 4, 84, 84]) sample_episode = 0 sample_rall = 0 sample_step = 0 sample_env_idx = 0 sample_i_rall = 0 global_update = 0 global_step = 0 # normalize obs logger.info('Start to initailize observation normalization parameter.....') next_obs = [] for step in range(num_step * pre_obs_norm_step): actions = np.random.randint(0, output_size, size=(num_worker,)) for parent_conn, action in zip(parent_conns, actions): parent_conn.send(action) for parent_conn in parent_conns: s, r, d, rd, lr = parent_conn.recv() next_obs.append(s[3, :, :].reshape([1, 84, 84])) if len(next_obs) % (num_step * num_worker) == 0: next_obs = np.stack(next_obs) obs_rms.update(next_obs) next_obs = [] logger.info('End to initalize...') pbar = tqdm.tqdm(total=args.total_frames) while True: logger.info('Iteration: {}'.format(global_update)) total_state, total_reward, total_done, total_next_state, \ total_action, total_int_reward, total_next_obs, total_ext_values, \ total_int_values, total_policy, total_policy_np = \ [], [], [], [], [], [], [], [], [], [], [] global_step += (num_worker * num_step) global_update += 1 # Step 1. n-step rollout for _ in range(num_step): actions, value_ext, value_int, policy = agent.get_action(np.float32(states) / 255.) for parent_conn, action in zip(parent_conns, actions): parent_conn.send(action) next_states, rewards, dones, real_dones, log_rewards, next_obs = \ [], [], [], [], [], [] for parent_conn in parent_conns: s, r, d, rd, lr = parent_conn.recv() next_states.append(s) rewards.append(r) dones.append(d) real_dones.append(rd) log_rewards.append(lr) next_obs.append(s[3, :, :].reshape([1, 84, 84])) next_states = np.stack(next_states) rewards = np.hstack(rewards) dones = np.hstack(dones) real_dones = np.hstack(real_dones) next_obs = np.stack(next_obs) # total reward = int reward + ext Reward intrinsic_reward = agent.compute_intrinsic_reward( ((next_obs - obs_rms.mean) / np.sqrt(obs_rms.var)).clip(-5, 5)) intrinsic_reward = np.hstack(intrinsic_reward) sample_i_rall += intrinsic_reward[sample_env_idx] total_next_obs.append(next_obs) total_int_reward.append(intrinsic_reward) total_state.append(states) total_reward.append(rewards) total_done.append(dones) total_action.append(actions) total_ext_values.append(value_ext) total_int_values.append(value_int) total_policy.append(policy) total_policy_np.append(policy.cpu().numpy()) states = next_states[:, :, :, :] sample_rall += log_rewards[sample_env_idx] sample_step += 1 if real_dones[sample_env_idx]: sample_episode += 1 writer.add_scalar('data/returns_vs_frames', sample_rall, global_step) writer.add_scalar('data/lengths_vs_frames', sample_step, global_step) writer.add_scalar('data/reward_per_epi', sample_rall, sample_episode) writer.add_scalar('data/reward_per_rollout', sample_rall, global_update) writer.add_scalar('data/step', sample_step, sample_episode) sample_rall = 0 sample_step = 0 sample_i_rall = 0 # calculate last next value _, value_ext, value_int, _ = agent.get_action(np.float32(states) / 255.) total_ext_values.append(value_ext) total_int_values.append(value_int) total_state = np.stack(total_state).transpose([1, 0, 2, 3, 4]).reshape([-1, 4, 84, 84]) total_reward = np.stack(total_reward).transpose().clip(-1, 1) total_action = np.stack(total_action).transpose().reshape([-1]) total_done = np.stack(total_done).transpose() total_next_obs = np.stack(total_next_obs).transpose([1, 0, 2, 3, 4]).reshape([-1, 1, 84, 84]) total_ext_values = np.stack(total_ext_values).transpose() total_int_values = np.stack(total_int_values).transpose() total_logging_policy = np.vstack(total_policy_np) # Step 2. calculate intrinsic reward # running mean intrinsic reward total_int_reward = np.stack(total_int_reward).transpose() total_reward_per_env = np.array([discounted_reward.update(reward_per_step) for reward_per_step in total_int_reward.T]) mean, std, count = np.mean(total_reward_per_env), np.std(total_reward_per_env), len(total_reward_per_env) reward_rms.update_from_moments(mean, std ** 2, count) # normalize intrinsic reward total_int_reward /= np.sqrt(reward_rms.var) writer.add_scalar('data/int_reward_per_epi', np.sum(total_int_reward) / num_worker, sample_episode) writer.add_scalar('data/int_reward_per_rollout', np.sum(total_int_reward) / num_worker, global_update) # logging Max action probability writer.add_scalar('data/max_prob', softmax(total_logging_policy).max(1).mean(), sample_episode) # Step 3. make target and advantage # extrinsic reward calculate ext_target, ext_adv = make_train_data(total_reward, total_done, total_ext_values, gamma, num_step, num_worker) # intrinsic reward calculate # None Episodic int_target, int_adv = make_train_data(total_int_reward, np.zeros_like(total_int_reward), total_int_values, int_gamma, num_step, num_worker) # add ext adv and int adv total_adv = int_adv * int_coef + ext_adv * ext_coef # Step 4. update obs normalize param obs_rms.update(total_next_obs) # Step 5. Training! agent.train_model(np.float32(total_state) / 255., ext_target, int_target, total_action, total_adv, ((total_next_obs - obs_rms.mean) / np.sqrt(obs_rms.var)).clip(-5, 5), total_policy) if args.save_models and global_update % 1000 == 0: torch.save(agent.model.state_dict(), 'models/{}-{}.model'.format(env_id, global_update)) logger.info('Now Global Step :{}'.format(global_step)) torch.save(agent.model.state_dict(), model_path) torch.save(agent.rnd.predictor.state_dict(), predictor_path) torch.save(agent.rnd.target.state_dict(), target_path) pbar.update(num_worker * num_step) if global_step >= args.total_frames: break pbar.close()
def main(): if 'NAME' in os.environ.keys(): NAME = os.environ['NAME'] else: raise ValueError('set NAME via env variable') try: env_settings = json.load(open(default_config['CarIntersectConfigPath'], 'r')) except: env_settings = yaml.load(open(default_config['CarIntersectConfigPath'], 'r')) if 'home-test' not in NAME: wandb.init( project='CarRacing_RND', reinit=True, name=f'rnd_{NAME}', config={'env_config': env_settings, 'agent_config': default_config}, ) # print({section: dict(config[section]) for section in config.sections()}) train_method = default_config['TrainMethod'] env_id = default_config['EnvID'] # env_type = default_config['EnvType'] # if env_type == 'mario': # env = BinarySpaceToDiscreteSpaceEnv(gym_super_mario_bros.make(env_id), COMPLEX_MOVEMENT) # elif env_type == 'atari': # env = gym.make(env_id) # else: # raise NotImplementedError seed = np.random.randint(0, 2 ** 16 - 1) print(f'use name : {NAME}') print(f"use env config : {default_config['CarIntersectConfigPath']}") print(f'use seed : {seed}') print(f"use device : {os.environ['DEVICE']}") os.chdir('..') env = makeCarIntersect(env_settings) eval_env = create_eval_env(makeCarIntersect(env_settings)) # input_size = env.observation_space.shape # 4 input_size = env.observation_space.shape assert isinstance(env.action_space, gym.spaces.Box) action_size = env.action_space.shape[0] # 2 env.close() is_load_model = True is_render = False # model_path = 'models/{}.model'.format(NAME) # predictor_path = 'models/{}.pred'.format(NAME) # target_path = 'models/{}.target'.format(NAME) # writer = SummaryWriter() use_cuda = default_config.getboolean('UseGPU') use_gae = default_config.getboolean('UseGAE') use_noisy_net = default_config.getboolean('UseNoisyNet') lam = float(default_config['Lambda']) num_worker = int(default_config['NumEnv']) num_step = int(default_config['NumStep']) ppo_eps = float(default_config['PPOEps']) epoch = int(default_config['Epoch']) mini_batch = int(default_config['MiniBatch']) batch_size = int(num_step * num_worker / mini_batch) learning_rate = float(default_config['LearningRate']) entropy_coef = float(default_config['Entropy']) gamma = float(default_config['Gamma']) int_gamma = float(default_config['IntGamma']) clip_grad_norm = float(default_config['ClipGradNorm']) ext_coef = float(default_config['ExtCoef']) int_coef = float(default_config['IntCoef']) sticky_action = default_config.getboolean('StickyAction') action_prob = float(default_config['ActionProb']) life_done = default_config.getboolean('LifeDone') reward_rms = RunningMeanStd() obs_rms = RunningMeanStd(shape=(1, 1, 84, 84)) pre_obs_norm_step = int(default_config['ObsNormStep']) discounted_reward = RewardForwardFilter(int_gamma) agent = RNDAgent( input_size, action_size, num_worker, num_step, gamma, lam=lam, learning_rate=learning_rate, ent_coef=entropy_coef, clip_grad_norm=clip_grad_norm, epoch=epoch, batch_size=batch_size, ppo_eps=ppo_eps, use_cuda=use_cuda, use_gae=use_gae, use_noisy_net=use_noisy_net, device=os.environ['DEVICE'], ) # if is_load_model: # print('load model...') # if use_cuda: # agent.model.load_state_dict(torch.load(model_path)) # agent.rnd.predictor.load_state_dict(torch.load(predictor_path)) # agent.rnd.target.load_state_dict(torch.load(target_path)) # else: # agent.model.load_state_dict(torch.load(model_path, map_location='cpu')) # agent.rnd.predictor.load_state_dict(torch.load(predictor_path, map_location='cpu')) # agent.rnd.target.load_state_dict(torch.load(target_path, map_location='cpu')) # print('load finished!') works = [] parent_conns = [] child_conns = [] for idx in range(num_worker): parent_conn, child_conn = Pipe() work = AtariEnvironment(env_id, is_render, idx, child_conn, sticky_action=sticky_action, p=action_prob, life_done=life_done, settings=env_settings) work.start() works.append(work) parent_conns.append(parent_conn) child_conns.append(child_conn) os.chdir('rnd_continues') states = np.zeros([num_worker, 4, 84, 84]) sample_episode = 0 sample_rall = 0 sample_step = 0 sample_env_idx = 0 sample_i_rall = 0 global_update = 0 global_step = 0 logger = Logger(None, use_console=True, use_wandb=True, log_interval=1) print('Test evaluater:') evaluate_and_log( eval_env=eval_env, action_get_method=lambda eval_state: agent.get_action( np.tile(np.float32(eval_state), (1, 4, 1, 1)) / 255. )[0][0].cpu().numpy(), logger=logger, log_animation=False, exp_class='RND', exp_name=NAME, debug=True, ) print('end evaluater test.') # normalize obs print('Start to initailize observation normalization parameter.....') # print('ALERT! pass section') # assert 'home-test' in NAME next_obs = [] for step in range(num_step * pre_obs_norm_step): actions = np.random.uniform(-1, 1, size=(num_worker, action_size)) for parent_conn, action in zip(parent_conns, actions): parent_conn.send(action) for parent_conn in parent_conns: s, r, d, rd, lr = parent_conn.recv() next_obs.append(s[3, :, :].reshape([1, 84, 84])) if len(next_obs) % (num_step * num_worker) == 0: next_obs = np.stack(next_obs) obs_rms.update(next_obs) next_obs = [] print('End to initalize...') while True: total_state, total_reward, total_done, total_next_state, total_action, total_int_reward, total_next_obs, total_ext_values, total_int_values, total_policy_log_prob, total_policy_log_prob_np = \ [], [], [], [], [], [], [], [], [], [], [] # Step 1. n-step rollout for _ in range(num_step): global_step += num_worker # actions, value_ext, value_int, policy = agent.get_action(np.float32(states) / 255.) actions, value_ext, value_int, policy_log_prob = agent.get_action(np.float32(states) / 255.) for parent_conn, action in zip(parent_conns, actions): parent_conn.send(action.cpu().numpy()) next_states, rewards, dones, real_dones, log_rewards, next_obs = [], [], [], [], [], [] for parent_conn in parent_conns: s, r, d, rd, lr = parent_conn.recv() next_states.append(s) rewards.append(r) dones.append(d) real_dones.append(rd) log_rewards.append(lr) next_obs.append(s[3, :, :].reshape([1, 84, 84])) next_states = np.stack(next_states) rewards = np.hstack(rewards) dones = np.hstack(dones) real_dones = np.hstack(real_dones) next_obs = np.stack(next_obs) # total reward = int reward + ext Reward intrinsic_reward = agent.compute_intrinsic_reward( ((next_obs - obs_rms.mean) / np.sqrt(obs_rms.var)).clip(-5, 5)) intrinsic_reward = np.hstack(intrinsic_reward) sample_i_rall += intrinsic_reward[sample_env_idx] total_next_obs.append(next_obs) total_int_reward.append(intrinsic_reward) total_state.append(states) total_reward.append(rewards) total_done.append(dones) total_action.append(actions.cpu().numpy()) total_ext_values.append(value_ext) total_int_values.append(value_int) # total_policy.append(policy) # total_policy_np.append(policy.cpu().numpy()) total_policy_log_prob.extend(policy_log_prob.cpu().numpy()) states = next_states[:, :, :, :] sample_rall += log_rewards[sample_env_idx] sample_step += 1 if real_dones[sample_env_idx]: sample_episode += 1 # writer.add_scalar('data/reward_per_epi', sample_rall, sample_episode) # writer.add_scalar('data/reward_per_rollout', sample_rall, global_update) # writer.add_scalar('data/step', sample_step, sample_episode) logger.log_it({ 'reward_per_episode': sample_rall, 'intrinsic_reward': sample_i_rall, 'episode_steps': sample_step, 'global_step_cnt': global_step, 'updates_cnt': global_update, }) logger.publish_logs(step=global_step) sample_rall = 0 sample_step = 0 sample_i_rall = 0 # calculate last next value _, value_ext, value_int, _ = agent.get_action(np.float32(states) / 255.) total_ext_values.append(value_ext) total_int_values.append(value_int) # -------------------------------------------------- total_state = np.stack(total_state).transpose([1, 0, 2, 3, 4]).reshape([-1, 4, 84, 84]) total_reward = np.stack(total_reward).transpose().clip(-1, 1) # total_action = np.stack(total_action).transpose().reshape([-1, action_size]) total_action = np.array(total_action).reshape((-1, action_size)) # total_log_prob_old = np.array(total_policy_log_prob).reshape((-1)) total_done = np.stack(total_done).transpose() total_next_obs = np.stack(total_next_obs).transpose([1, 0, 2, 3, 4]).reshape([-1, 1, 84, 84]) total_ext_values = np.stack(total_ext_values).transpose() total_int_values = np.stack(total_int_values).transpose() # total_logging_policy = np.vstack(total_policy_np) # Step 2. calculate intrinsic reward # running mean intrinsic reward total_int_reward = np.stack(total_int_reward).transpose() total_reward_per_env = np.array([discounted_reward.update(reward_per_step) for reward_per_step in total_int_reward.T]) mean, std, count = np.mean(total_reward_per_env), np.std(total_reward_per_env), len(total_reward_per_env) reward_rms.update_from_moments(mean, std ** 2, count) # normalize intrinsic reward total_int_reward /= np.sqrt(reward_rms.var) # writer.add_scalar('data/int_reward_per_epi', np.sum(total_int_reward) / num_worker, sample_episode) # writer.add_scalar('data/int_reward_per_rollout', np.sum(total_int_reward) / num_worker, global_update) # ------------------------------------------------------------------------------------------- # logging Max action probability # writer.add_scalar('data/max_prob', softmax(total_logging_policy).max(1).mean(), sample_episode) # Step 3. make target and advantage # extrinsic reward calculate ext_target, ext_adv = make_train_data(total_reward, total_done, total_ext_values, gamma, num_step, num_worker) # intrinsic reward calculate # None Episodic int_target, int_adv = make_train_data(total_int_reward, np.zeros_like(total_int_reward), total_int_values, int_gamma, num_step, num_worker) # add ext adv and int adv total_adv = int_adv * int_coef + ext_adv * ext_coef # ----------------------------------------------- # Step 4. update obs normalize param obs_rms.update(total_next_obs) # ----------------------------------------------- global_update += 1 # Step 5. Training! agent.train_model(np.float32(total_state) / 255., ext_target, int_target, total_action, total_adv, ((total_next_obs - obs_rms.mean) / np.sqrt(obs_rms.var)).clip(-5, 5), total_policy_log_prob) # if global_step % (num_worker * num_step * 100) == 0: # print('Now Global Step :{}'.format(global_step)) # torch.save(agent.model.state_dict(), model_path) # torch.save(agent.rnd.predictor.state_dict(), predictor_path) # torch.save(agent.rnd.target.state_dict(), target_path) if global_update % 100 == 0: evaluate_and_log( eval_env=eval_env, action_get_method=lambda eval_state: agent.get_action( np.tile(np.float32(eval_state), (1, 4, 1, 1)) / 255. )[0][0].cpu().numpy(), logger=logger, log_animation=True, exp_class='RND', exp_name=NAME, ) logger.publish_logs(step=global_step)
def main(): if(EXPERIMENT_NAME not in os.listdir()): os.mkdir(EXPERIMENT_NAME) # neural networks with single hidden layer for feature in FEATURES: try: data = np.load(feature + "_stats.npy",allow_pickle=True).item() pca = joblib.load("pca_" + feature + "_stats_noval") train_data, train_data_classes = make_train_data(data,False) test_data, test_data_classes = make_test_data(data) val_data, val_data_classes = make_val_data(data) train_data_pca = np.array(pca.transform(train_data)) test_data_pca = np.array(pca.transform(test_data)) val_data_pca = np.array(pca.transform(val_data)) train_data_classes = np.array(train_data_classes) val_data_classes = np.array(val_data_classes) test_data_classes = np.array(test_data_classes) print("Computing " + feature + " single hidden layer neural network") res,model = experiment_mlp_singlelayer(train_data_pca,train_data_classes,val_data_pca,val_data_classes,test_data_pca,test_data_classes,MLP_VERBOSE) if res != None: if SAVE_RESULTS: filename = EXPERIMENT_NAME + "_" + feature + "_mlp_single_layer_results" path = os.path.join(EXPERIMENT_NAME,filename) joblib.dump(res,path) if SAVE_RESULTS_TXT: filename = EXPERIMENT_NAME + "_" + feature + "_mlp_single_layer_results.txt" path = os.path.join(EXPERIMENT_NAME,filename) save_txt(res,path) if SAVE_MODEL: filename = EXPERIMENT_NAME + "_" + feature + "_mlp_single_layer" path = os.path.join(EXPERIMENT_NAME,filename) save_mlp(model,path) except Exception as e: print("Error during " + EXPERIMENT_NAME + " " + feature + " single hidden layer neural network") print(e) pass # neural networks with multiple hidden layers # welch32 only data = np.load("welch_32_stats.npy",allow_pickle=True).item() pca = joblib.load("pca_welch_32_stats_noval") train_data, train_data_classes = make_train_data(data,False) test_data, test_data_classes = make_test_data(data) val_data, val_data_classes = make_val_data(data) train_data_pca = np.array(pca.transform(train_data)) test_data_pca = np.array(pca.transform(test_data)) val_data_pca = np.array(pca.transform(val_data)) train_data_classes = np.array(train_data_classes) val_data_classes = np.array(val_data_classes) test_data_classes = np.array(test_data_classes) print("Computing welch_32 multiple hidden layer neural network, specification 1:") print("3 hidden layers, LReLU activation (a = 0.02), learning rate = 0.01") print("decay = 1e-6, momentum = 0.9, patience = 50, max epochs = 2000") try: res,model = experiment_mlp_1(train_data_pca,train_data_classes,val_data_pca,val_data_classes,test_data_pca,test_data_classes,MLP_VERBOSE) if res != None: if SAVE_RESULTS: filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_1_results" path = os.path.join(EXPERIMENT_NAME,filename) joblib.dump(res,path) if SAVE_RESULTS_TXT: filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_1_results.txt" path = os.path.join(EXPERIMENT_NAME,filename) save_txt(res,path) if SAVE_MODEL: filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_1" path = os.path.join(EXPERIMENT_NAME,filename) save_mlp(model,path) except Exception as e: print("Error during " + EXPERIMENT_NAME + " welch_32 multiple hidden layer neural network, specification 1") print(e) pass print("Computing welch_32 multiple hidden layer neural network, specification 2:") print("4 hidden layers, tanh + LReLU activation (a = 0.02), learning rate = 0.005") print("decay = 1e-6, momentum = 0.9, patience = 250, max epochs = 3000") try: res,model = experiment_mlp_2(train_data_pca,train_data_classes,val_data_pca,val_data_classes,test_data_pca,test_data_classes,MLP_VERBOSE) if res != None: if SAVE_RESULTS: filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_2_results" path = os.path.join(EXPERIMENT_NAME,filename) joblib.dump(res,path) if SAVE_RESULTS_TXT: filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_2_results.txt" path = os.path.join(EXPERIMENT_NAME,filename) save_txt(res,path) if SAVE_MODEL: filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_2" path = os.path.join(EXPERIMENT_NAME,filename) save_mlp(model,path) except Exception as e: print("Some problem during " + EXPERIMENT_NAME + " welch_32 multiple hidden layer neural network, specification 2") print(e) pass print("Computing welch_32 multiple hidden layer neural network, specification 3:") print("6 hidden layers, ReLU activation, learning rate = 0.01") print("decay = 1e-6, momentum = 0.9, patience = 70, max epochs = 2000") try: res,model = experiment_mlp_3(train_data_pca,train_data_classes,val_data_pca,val_data_classes,test_data_pca,test_data_classes,MLP_VERBOSE) if res != None: if SAVE_RESULTS: filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_3_results" path = os.path.join(EXPERIMENT_NAME,filename) joblib.dump(res,path) if SAVE_RESULTS_TXT: filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_3_results.txt" path = os.path.join(EXPERIMENT_NAME,filename) save_txt(res,path) if SAVE_MODEL: filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_3" path = os.path.join(EXPERIMENT_NAME,filename) save_mlp(model,path) except Exception as e: print("Error during " + EXPERIMENT_NAME + " welch_32 multiple hidden layer neural network, specification 3") print(e) pass print("Computing welch_32 multiple hidden layer neural network, specification 4:") print("3 hidden layers, tanh activation, learning rate = 0.01") print("decay = 1e-6, momentum = 0.9, patience = 250, max epochs = 3000") try: res,model = experiment_mlp_4(train_data_pca,train_data_classes,val_data_pca,val_data_classes,test_data_pca,test_data_classes,MLP_VERBOSE) if res != None: if SAVE_RESULTS: filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_4_results" path = os.path.join(EXPERIMENT_NAME,filename) joblib.dump(res,path) if SAVE_RESULTS_TXT: filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_4_results.txt" path = os.path.join(EXPERIMENT_NAME,filename) save_txt(res,path) if SAVE_MODEL: filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_4" path = os.path.join(EXPERIMENT_NAME,filename) save_mlp(model,path) except Exception as e: print("Error during " + EXPERIMENT_NAME + " welch_32 multiple hidden layer neural network, specification 4") print(e) pass
def evaluate_masks(args, rng, masks, ddt_partials_u, path_save_model): X_deltaout, Y_tf, ctdata0l, ctdata0r, ctdata1l, ctdata1r = make_train_data( args, args.nbre_sample, args.nombre_round_eval, rng) X_deltaout_v, Y_vf, ctdata0l_v, ctdata0r_v, ctdata1l_v, ctdata1r_v = make_train_data( args, args.nbre_sampleval, args.nombre_round_eval, rng) res_all = [] all_masks = [] dico_train = {} dico_val = {} for masks_act in range(len(masks[0])): ddt_partials = {} hamming_nbre = 0 masks_uniaue = [[masks[i][masks_act]] for i in range(len(masks))] name_input_cic = "" for i in range(len(masks)): hamming_nbre += np.sum( np.array([ int(x) for x in list('{0:0b}'.format(masks_uniaue[i][0])) ])) name_input_cic += str(masks_uniaue[i][0]) name_input_cic += "_" name_input_cic = name_input_cic[:-1] all_masks.append(name_input_cic) ddt_partials[name_input_cic] = ddt_partials_u[name_input_cic] nbre_param = len(ddt_partials[list(ddt_partials.keys())[0]].keys()) X_deltaout, X_DDT, feature_name = convert_binary_to_probability( args, ctdata0l, ctdata0r, ctdata1l, ctdata1r, ddt_partials, masks_uniaue, flag=False) X_deltaout_v, X_DDT_v, feature_name_v = convert_binary_to_probability( args, ctdata0l_v, ctdata0r_v, ctdata1l_v, ctdata1r_v, ddt_partials, masks_uniaue, flag=True) dico_train[name_input_cic] = [X_DDT, feature_name] dico_val[name_input_cic] = [X_DDT_v, feature_name_v] param_best = { 'alpha': 0.3922345684859479, 'average': False, 'l1_ratio': 0.5605798090010486, 'loss': 'hinge', 'penalty': 'elasticnet', 'tol': 0.01 } clf = linear_model.SGDClassifier(**param_best, random_state=args.seed) clf.fit(X_DDT, Y_tf) y_pred = clf.predict(X_DDT_v) res_all.append([ hamming_nbre, 1 - int(np.log2(nbre_param) + 0.1) / hamming_nbre, accuracy_score(y_pred=y_pred, y_true=Y_vf), "QQ" ]) # save_logs(path_save_model + 'logs_linear_reg.txt', y_pred, Y_vf, clf, X_DDT, Y_tf) # plot_coefficients(clf, feature_name, path_save_model, name=path_save_model + "features_importances_Linear.png") # https://towardsdatascience.com/feature-selection-techniques-for-classification-and-python-tips-for-their-application-10c0ddd7918b del ddt_partials res_all = np.array(res_all) np.save(path_save_model + 'masks_quality.txt', res_all) data = res_all data2 = np.zeros_like(data, dtype="float") data2[:, 0] = np.nan_to_num(np.array([int(x) for x in data[:, 0]])) data2[:, 1] = np.nan_to_num(np.array([float(x) for x in data[:, 1]])) data2[:, 2] = np.nan_to_num(np.array([float(x) for x in data[:, 2]])) data2[:, 3] = np.nan_to_num(np.array([1 for x in data[:, 3]])) data4 = np.load(args.path_random) data3 = np.zeros_like(data4, dtype="float") data3[:, 0] = np.array([int(x) for x in data4[:, 0]]) data3[:, 1] = np.array([1 - float(x) for x in data4[:, 1]]) data3[:, 2] = np.array([float(x) for x in data4[:, 2]]) data3[:, 3] = np.array([0 for x in data4[:, 3]]) data_f = np.concatenate((data2, data3)) data_pd = pd.DataFrame( data_f, columns=["hamming", "compression", "accuracy_alone", "label"]) data_pd = data_pd.sort_values(by=['hamming', "compression"]) fig = plt.figure(figsize=(40, 15)) plt.subplot(1, 3, 1) x = data_pd.hamming.values[data_pd['label'] == 1] ys = data_pd.compression.values[data_pd['label'] == 1] colors = cm.rainbow(np.linspace(0, 1, len(ys))) plt.scatter(x, ys, color=colors[0]) x = data_pd.hamming.values[data_pd['label'] == 0] ys = data_pd.compression.values[data_pd['label'] == 0] plt.scatter(x, ys, color=colors[-1]) plt.legend(["SHAPLEY MASKS", "RANDOM MASKS"]) plt.xlabel("Number of Hamming") plt.ylabel("Compression of the DDT by the mask") plt.title("Compressions with Number of Hamming") plt.subplot(1, 3, 2) x = data_pd.hamming.values[data_pd['label'] == 1] ys = data_pd.accuracy_alone[data_pd['label'] == 1] plt.scatter(x, ys, color=colors[0]) x = data_pd.hamming.values[data_pd['label'] == 0] ys = data_pd.accuracy_alone.values[data_pd['label'] == 0] plt.scatter(x, ys, color=colors[-1]) plt.legend(["SHAPLEY MASKS", "RANDOM MASKS"]) plt.xlabel("Number of Hamming") plt.ylabel("Accuracy of the mask alone") plt.title("Accuracy with Number of Hamming") plt.subplot(1, 3, 3) x = data_pd.compression.values[data_pd['label'] == 1] ys = data_pd.accuracy_alone[data_pd['label'] == 1] plt.scatter(x, ys, color=colors[0]) x = data_pd.compression.values[data_pd['label'] == 0] ys = data_pd.accuracy_alone.values[data_pd['label'] == 0] plt.scatter(x, ys, color=colors[-1]) plt.legend(["SHAPLEY MASKS", "RANDOM MASKS"]) plt.ylabel("Accuracy of the mask alone") plt.xlabel("Compression of the DDT by the mask") plt.title("Accuracy with compression") fig.suptitle( 'Plot of the 3 characteristic that make a good mask for SHAPLEY masks (blue) and random masks (red)', fontsize=30) plt.savefig(path_save_model + "2D real plot.png") fig = plt.figure(figsize=(30, 30)) ax = fig.add_subplot(111, projection='3d') # For each set of style and range settings, plot n random points in the box # defined by x in [23, 32], y in [0, 100], z in [zlow, zhigh]. for index, m, color_ici in [(1, 'o', 0), (0, '^', -1)]: xs = data_pd.hamming.values[data_pd['label'] == index] ys = data_pd.compression.values[data_pd['label'] == index] zs = data_pd.accuracy_alone.values[data_pd['label'] == index] ax.scatter(xs, ys, zs, marker=m, color=colors[color_ici], s=30) ax.legend(["SHAPLEY masks", "Random masks"]) ax.set_xlabel('Number of Hamming') ax.set_ylabel('Compression of the DDT by the mask') ax.set_zlabel('Accuracy of the mask alone') fig.suptitle( 'Plot of the 3 characteristic that make a good mask for SHAPLEY masks (blue) and random masks (red)', fontsize=16) plt.savefig(path_save_model + "3D real plot.png") def circleOfCorrelations(pc_infos, ebouli): plt.subplot(1, 2, 1) plt.Circle((10, 15), radius=1, color='g', fill=False) circle1 = plt.Circle((0, 0), radius=1, color='g', fill=False) fig = plt.gcf() fig.gca().add_artist(circle1) for idx in range(len(pc_infos["PC-0"])): x = pc_infos["PC-0"][idx] y = pc_infos["PC-1"][idx] plt.plot([0.0, x], [0.0, y], 'k-') plt.plot(x, y, 'rx') plt.annotate(pc_infos.index[idx], xy=(x, y)) plt.xlabel("PC-1 (%s%%)" % str(ebouli[0])[:4].lstrip("0.")) plt.ylabel("PC-2 (%s%%)" % str(ebouli[1])[:4].lstrip("0.")) plt.xlim((-1, 1)) plt.ylim((-1, 1)) plt.axhline(y=0, color='k', ls='--') plt.axvline(x=0, c='k', ls='--') plt.title("Circle of Correlations") def circleOfCorrelations2(pc_infos, ebouli): plt.subplot(1, 2, 1) plt.Circle((10, 15), radius=1, color='g', fill=False) circle1 = plt.Circle((0, 0), radius=1, color='g', fill=False) fig = plt.gcf() fig.gca().add_artist(circle1) for idx in range(len(pc_infos["PC-0"])): x = pc_infos["PC-0"][idx] y = pc_infos["PC-2"][idx] plt.plot([0.0, x], [0.0, y], 'k-') plt.plot(x, y, 'rx') plt.annotate(pc_infos.index[idx], xy=(x, y)) plt.xlabel("PC-1 (%s%%)" % str(ebouli[0])[:4].lstrip("0.")) plt.ylabel("PC-2 (%s%%)" % str(ebouli[2])[:4].lstrip("0.")) plt.xlim((-1, 1)) plt.ylim((-1, 1)) plt.axhline(y=0, color='k', ls='--') plt.axvline(x=0, c='k', ls='--') plt.title("Circle of Correlations") def myPCA(df, color_ici): # Normalize data plt.figure(figsize=(30, 15)) df_norm = StandardScaler().fit_transform( df) # (df - df.mean()) / df.std() # PCA df_norm = np.nan_to_num(df_norm) pca = PCA(n_components=3) # n_components='mle') pca_res = pca.fit_transform( df_norm) # pca.fit_transform(df_norm.values) # Ebouli ebouli = pd.Series(pca.explained_variance_ratio_) # Circle of correlations # http://stackoverflow.com/a/22996786/1565438 coef = np.transpose(pca.components_) cols = ['PC-' + str(x) for x in range(len(ebouli))] pc_infos = pd.DataFrame(coef, columns=cols, index=df.columns) circleOfCorrelations(pc_infos, ebouli) plt.subplot(1, 2, 2) dat = pd.DataFrame(pca_res, columns=cols) # print(dat["PC-0"]) # print(dat["PC-1"]) plt.scatter(dat["PC-0"].values, dat["PC-1"].values, color=color_ici) plt.xlabel("PC-1 (%s%%)" % str(ebouli[0])[:4].lstrip("0.")) plt.ylabel("PC-2 (%s%%)" % str(ebouli[1])[:4].lstrip("0.")) plt.title("PCA") plt.savefig(path_save_model + str(color_ici) + "2D ACP plot pca1-2 .png") def myPCA2(df, label_QQ, labelrandom, color1, color2): # Normalize data plt.figure(figsize=(30, 15)) df_norm = StandardScaler().fit_transform( df) # (df - df.mean()) / df.std() # PCA df_norm = np.nan_to_num(df_norm) pca = PCA(n_components=3) # n_components='mle') pca_res = pca.fit_transform( df_norm) # pca.fit_transform(df_norm.values) # Ebouli ebouli = pd.Series(pca.explained_variance_ratio_) # Circle of correlations # http://stackoverflow.com/a/22996786/1565438 coef = np.transpose(pca.components_) cols = ['PC-' + str(x) for x in range(len(ebouli))] pc_infos = pd.DataFrame(coef, columns=cols, index=df.columns) circleOfCorrelations(pc_infos, ebouli) plt.subplot(1, 2, 2) dat = pd.DataFrame(pca_res, columns=cols) # print(dat["PC-0"]) # print(dat["PC-1"]) plt.scatter(dat["PC-0"].values[label_QQ], dat["PC-1"].values[label_QQ], color=color1) plt.scatter(dat["PC-0"].values[labelrandom], dat["PC-1"].values[labelrandom], color=color2) plt.xlabel("PC-1 (%s%%)" % str(ebouli[0])[:4].lstrip("0.")) plt.ylabel("PC-3 (%s%%)" % str(ebouli[1])[:4].lstrip("0.")) plt.title("PCA") plt.savefig(path_save_model + str(color_ici) + "2D ACP plot comparaison pca1-2.png") return pca_res def myPCA_v2(df, color_ici): # Normalize data plt.figure(figsize=(30, 15)) df_norm = StandardScaler().fit_transform( df) # (df - df.mean()) / df.std() # PCA df_norm = np.nan_to_num(df_norm) pca = PCA(n_components=3) # n_components='mle') pca_res = pca.fit_transform( df_norm) # pca.fit_transform(df_norm.values) # Ebouli ebouli = pd.Series(pca.explained_variance_ratio_) # Circle of correlations # http://stackoverflow.com/a/22996786/1565438 coef = np.transpose(pca.components_) cols = ['PC-' + str(x) for x in range(len(ebouli))] pc_infos = pd.DataFrame(coef, columns=cols, index=df.columns) circleOfCorrelations2(pc_infos, ebouli) plt.subplot(1, 2, 2) dat = pd.DataFrame(pca_res, columns=cols) # print(dat["PC-0"]) # print(dat["PC-1"]) plt.scatter(dat["PC-0"].values, dat["PC-2"].values, color=color_ici) plt.xlabel("PC-1 (%s%%)" % str(ebouli[0])[:4].lstrip("0.")) plt.ylabel("PC-3 (%s%%)" % str(ebouli[2])[:4].lstrip("0.")) plt.title("PCA") plt.savefig(path_save_model + str(color_ici) + "2D ACP plot pca 1 -3 .png") def myPCA2_v2(df, label_QQ, labelrandom, color1, color2): # Normalize data plt.figure(figsize=(30, 15)) df_norm = StandardScaler().fit_transform( df) # (df - df.mean()) / df.std() # PCA df_norm = np.nan_to_num(df_norm) pca = PCA(n_components=3) # n_components='mle') pca_res = pca.fit_transform( df_norm) # pca.fit_transform(df_norm.values) # Ebouli ebouli = pd.Series(pca.explained_variance_ratio_) # Circle of correlations # http://stackoverflow.com/a/22996786/1565438 coef = np.transpose(pca.components_) cols = ['PC-' + str(x) for x in range(len(ebouli))] pc_infos = pd.DataFrame(coef, columns=cols, index=df.columns) circleOfCorrelations2(pc_infos, ebouli) plt.subplot(1, 2, 2) dat = pd.DataFrame(pca_res, columns=cols) # print(dat["PC-0"]) # print(dat["PC-1"]) plt.scatter(dat["PC-0"].values[label_QQ], dat["PC-2"].values[label_QQ], color=color1) plt.scatter(dat["PC-0"].values[labelrandom], dat["PC-2"].values[labelrandom], color=color2) plt.xlabel("PC-1 (%s%%)" % str(ebouli[0])[:4].lstrip("0.")) plt.ylabel("PC-3 (%s%%)" % str(ebouli[2])[:4].lstrip("0.")) plt.title("PCA") plt.savefig(path_save_model + str(color_ici) + "2D ACP plot comparaison pca1 - 3.png") #df = data_pd[data_pd['label'] == 1].drop("label", axis=1) #myPCA(df, colors[0]) #myPCA_v2(df, colors[0]) #df = data_pd[data_pd['label'] == 0].drop("label", axis=1) #myPCA(df, colors[-1]) #myPCA_v2(df, colors[-1]) #df = data_pd.drop("label", axis=1) #pca_res = myPCA2(df, data_pd['label'] == 1, data_pd['label'] == 0, colors[0], colors[-1]) #myPCA2_v2(df, data_pd['label'] == 1, data_pd['label'] == 0, colors[0], colors[-1]) max_compression = max(data_pd.compression.values) + 0.1 max_accuracy = max(data_pd.accuracy_alone.values) + 0.1 min_accuracy = min(data_pd.accuracy_alone.values) - 0.1 npts = 400 ngridx = 400 ngridy = 400 hamming = np.random.uniform(0, 1, npts) compression = np.random.uniform(0, max_compression, npts) accuracy = np.random.uniform(min_accuracy, max_accuracy, npts) _, score1, score2, score3 = get_score_masks(npts, hamming, compression, accuracy) hamming_i = np.linspace(-0.1, 1.1, ngridx) compression_i = np.linspace(-0.1, max_compression, ngridy) accuracy_i = np.linspace(min_accuracy, max_accuracy, ngridy) triang = tri.Triangulation(hamming, compression) interpolator = tri.LinearTriInterpolator(triang, score1) Xi, Yi = np.meshgrid(hamming_i, compression_i) zi = interpolator(Xi, Yi) fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, figsize=(40, 15)) CS = ax1.contour(hamming_i, compression_i, zi, levels=14, linewidths=0.5, colors='k') cntr1 = ax1.contourf(hamming_i, compression_i, zi, levels=14, cmap="RdBu_r") fig.colorbar(cntr1, ax=ax1) ax1.clabel(CS, inline=1, fontsize=10) ax1.set(xlim=(0, 1), ylim=(0, max_compression)) ax1.set_title('Metric Hamming vs Compression') ax1.set_xlabel('Hamming') ax1.set_ylabel('Compression') triang = tri.Triangulation(hamming, accuracy) interpolator = tri.LinearTriInterpolator(triang, score2) Xi, Yi = np.meshgrid(hamming_i, accuracy_i) zi = interpolator(Xi, Yi) CS = ax2.contour(hamming_i, accuracy_i, zi, levels=14, linewidths=0.5, colors='k') cntr1 = ax2.contourf(hamming_i, accuracy_i, zi, levels=14, cmap="RdBu_r") fig.colorbar(cntr1, ax=ax2) ax2.clabel(CS, inline=1, fontsize=10) ax2.set(xlim=(0, 1), ylim=(min_accuracy, max_accuracy)) ax2.set_title('Metric Hamming vs accuracy') ax2.set_xlabel('Hamming') ax2.set_ylabel('accuracy') triang = tri.Triangulation(compression, accuracy) interpolator = tri.LinearTriInterpolator(triang, score3) Xi, Yi = np.meshgrid(compression_i, accuracy_i) zi = interpolator(Xi, Yi) CS = ax3.contour(compression_i, accuracy_i, zi, levels=14, linewidths=0.5, colors='k') cntr1 = ax3.contourf(compression_i, accuracy_i, zi, levels=14, cmap="RdBu_r") fig.colorbar(cntr1, ax=ax3) ax3.clabel(CS, inline=1, fontsize=10) ax3.set(xlim=(0, max_compression), ylim=(min_accuracy, max_accuracy)) ax3.set_title('Metric Compression vs accuracy') ax3.set_xlabel('Compression') ax3.set_ylabel('accuracy') colors = cm.rainbow(np.linspace(0, 1, len(data_pd.compression.values))) ax1.plot(data_pd.hamming.values[data_pd['label'] == 1] / 32, data_pd.compression.values[data_pd['label'] == 1], 'ko', ms=3, color=colors[0]) ax2.plot(data_pd.hamming.values[data_pd['label'] == 1] / 32, data_pd.accuracy_alone.values[data_pd['label'] == 1], 'ko', ms=3, color=colors[0]) ax3.plot(data_pd.compression.values[data_pd['label'] == 1], data_pd.accuracy_alone.values[data_pd['label'] == 1], 'ko', ms=3, color=colors[0]) ax1.plot(data_pd.hamming.values[data_pd['label'] == 0] / 32, data_pd.compression.values[data_pd['label'] == 0], 'ko', ms=3, color=colors[-1]) plt.legend(["QQ MASKS", "RANDOM MASKS"]) ax2.plot(data_pd.hamming.values[data_pd['label'] == 0] / 32, data_pd.accuracy_alone.values[data_pd['label'] == 0], 'ko', ms=3, color=colors[-1]) plt.legend(["QQ MASKS", "RANDOM MASKS"]) ax3.plot(data_pd.compression.values[data_pd['label'] == 0], data_pd.accuracy_alone.values[data_pd['label'] == 0], 'ko', ms=3, color=colors[-1]) plt.legend(["QQ MASKS", "RANDOM MASKS"]) plt.savefig(path_save_model + "metric.png") npts = len(data_pd.hamming.values[data_pd['label'] == 1] / 32) hamming = data_pd.hamming.values[data_pd['label'] == 1] / 32 compression = data_pd.compression.values[data_pd['label'] == 1] accuracy = data_pd.accuracy_alone.values[data_pd['label'] == 1] score_f, score1, score2, score3 = get_score_masks(npts, hamming, compression, accuracy) print("") print("SCORE OD MASKS:") print(score_f) print("") d = {'masks': all_masks, 'score': score_f} df = pd.DataFrame(data=d) df = df.sort_values(by=['score'], ascending=False) df.to_csv(path_save_model + "masks_and_score.csv", index=False) masks_to_keep = df.masks.values if args.select_maks_thr_score: masks_to_keep = masks_to_keep[df['score'] > args.thr_score] if args.select_maks_max_num: masks_to_keep = masks_to_keep[:args.max_num] masks_final = [[] for x in range(len(args.inputs_type))] for mask_ici in masks_to_keep: list_mask_ici = mask_ici.split("_") for x_index, x_value in enumerate(list_mask_ici): masks_final[x_index].append(int(x_value)) X_DDT_all = np.zeros((len(masks_to_keep), len(ctdata0l ^ ctdata1l)), dtype=np.float16) X_DDT_all_v = np.zeros((len(masks_to_keep), len(ctdata0l_v ^ ctdata1l_v)), dtype=np.float16) feature_name_all = [] for mask_ici_index, mask_ici in enumerate(masks_to_keep): X_DDT_all[mask_ici_index] = np.squeeze(dico_train[mask_ici][0]) X_DDT_all_v[mask_ici_index] = np.squeeze(dico_val[mask_ici][0]) feature_name_all.append(dico_train[mask_ici][1][0]) dico_train[mask_ici] = 0 dico_val[mask_ici] = 0 del dico_train, dico_val print("NUMBER OF MASKS FINAL :", len(masks_to_keep)) dico_final = dict( (k, ddt_partials_u[k]) for k in masks_to_keep if k in ddt_partials_u) competeur = 0 for name_input_cic in dico_final.keys(): competeur += len(dico_final[name_input_cic]) print() print("Nmbre de parametres dans DDT:", competeur) print() X_DDT_df = pd.DataFrame(X_DDT_all.transpose(), columns=feature_name_all) Y_tf_df = pd.DataFrame(Y_tf) X_DDT_v_df = pd.DataFrame(X_DDT_all_v.transpose(), columns=feature_name_all) Y_vf_df = pd.DataFrame(Y_vf) X_DDT_df.to_csv(path_save_model + "X_DDT_df.csv", index=False) Y_tf_df.to_csv(path_save_model + "Y_tf_df.csv", index=False) X_DDT_v_df.to_csv(path_save_model + "X_DDT_v_df.csv", index=False) Y_vf_df.to_csv(path_save_model + "Y_vf_df.csv", index=False) index_interet_1 = Y_tf_df.values == 1 df_1 = X_DDT_df[index_interet_1] index_interet_0 = Y_tf_df.values == 0 df_0 = X_DDT_df[index_interet_0] print() print("START COMAPRASION HISTOGRAMM SAMPLES") print() plt.figure(figsize=(20, 7)) for i, binwidth in enumerate([5]): # Set up the plot ax = plt.subplot(1, 2, 1) # Draw the plot ax.hist(df_1.sum(axis=1) / len(feature_name_all), bins=int(180 / binwidth), color='blue', edgecolor='black') # Title and labels ax.set_title('Histogram of SPECK', size=30) ax.set_xlabel('Probability', size=22) ax.set_ylabel('Number of samples', size=22) # Set up the plot ax = plt.subplot(1, 2, 2) # Draw the plot ax.hist(df_0.sum(axis=1) / len(feature_name_all), bins=int(180 / binwidth), color='blue', edgecolor='black') # Title and labels ax.set_title('Histogram of RANDOM', size=30) ax.set_xlabel('Probability', size=22) ax.set_ylabel('Number of samples', size=22) plt.tight_layout() plt.savefig(path_save_model + "histogramm.png") print() print("START COMAPRASION HISTOGRAMM FEATURS") print() plt.figure(figsize=(25, 15)) df_11 = df_1 / 100.0 df_00 = df_0 / 100.0 x1 = 100 * df_11.sum().astype(np.float64) / len(df_1) x2 = 100 * df_00.sum().astype(np.float64) / len(df_0) # Assign colors for each airline and the names colors = ['#E69F00', '#56B4E9'] names = ['SPECK', 'RANDOM'] # Make the histogram using a list of lists # Normalize the flights and assign colors and names indices = [k for k in range(len(feature_name_all))] # Calculate optimal width width = np.min(np.diff(indices)) / 3 # Make the histogram using a list of lists # Normalize the flights and assign colors and names plt.bar(indices - width, x1, width, color='b', label='SPECK') plt.bar(indices, x2, width, color='r', label='RANDOM') # Plot formatting plt.legend() plt.xlabel(' FEATURE NUMBER ') plt.ylabel('Normalized SUM 1 ') plt.title('COMPARASION FEATURES IMPORANTANCES') plt.savefig(path_save_model + "COMPARASION FEATURES IMPORANTANCES.png") print() print("START INDEPENACE TEST FEATURES LABELS") print() df = X_DDT_df X = df y = Y_tf_df chi_scores = f_classif(X, y) plt.figure(figsize=(25, 15)) p_values = pd.Series(chi_scores[1], index=X.columns) p_values.sort_values(ascending=False, inplace=True) p_values.to_csv(path_save_model + "START INDEPENACE TEST FEATURES LABELS.csv") ax = p_values.plot.bar() fig = ax.get_figure() fig.savefig(path_save_model + "like_variables_results.png") print() print("START INDEPENACE TEST FEATURES FEATURES") print() alpha = 0.05 res = np.zeros((len(feature_name_all), len(feature_name_all))) for i, _ in enumerate(tqdm(feature_name_all)): if i < len(feature_name_all) - 1: feature_name_ici = str(feature_name_all[i]) X = df.drop(feature_name_ici, axis=1) y = df[feature_name_ici] chi_scores = f_classif(X, y) p_values = pd.Series(chi_scores[1], index=X.columns) p_values.sort_values(ascending=False, inplace=True) for index_index, index_v in enumerate(p_values.index): index_v_new = feature_name_all.index(index_v) res[i, int(index_v_new)] = p_values.values[index_index] del X, y if len(df.columns) > 1: df = df.drop(feature_name_ici, axis=1) df = pd.DataFrame(res, index=feature_name_all, columns=feature_name_all) vals = np.around(df.values, 2) norm = plt.Normalize(vals.min() - 1, vals.max() + 1) colours = plt.cm.RdBu(vals) fig = plt.figure(figsize=(100, 100)) ax = fig.add_subplot(111, frameon=True, xticks=[], yticks=[]) the_table = plt.table(cellText=vals, rowLabels=df.index, colLabels=df.columns, colWidths=[0.03] * vals.shape[1], loc='center', cellColours=colours) plt.savefig(path_save_model + "COMPARASION INTRA FEATURES XI 2.png") df.to_csv(path_save_model + "COMPARASION INTRA FEATURES XI 2.csv", index=False) return dico_final, masks_final, X_DDT_all.transpose( ), X_DDT_all_v.transpose(), feature_name_all, Y_tf, Y_vf