def index(request): if not Authorize.authorize(request.user): return HttpResponseRedirect("amrs_user_validation/access_denied") device = u.get_device(request) if device["is_mobile"]: return render(request, "amrs_reports/index_mobile.html", {}) else: return render(request, "amrs_reports/index.html", {})
def adv_sample_papernot(model_name, dataset, target): ''' data contains (image, target_original_label) from a dataset class target is the class to which the data is being misclassified ''' device = utilities.get_device(1) EPOCHS = 10 LAMBDA = 20.0 NUM_SAMPLES = 10 EPSILON = 0.5 samples = [] L2_loss = nn.MSELoss().to(device) Classification_loss = nn.CrossEntropyLoss().to(device) model = torch.load("saved_models/" + model_name).to(device) data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True) idx = 0 samples = torch.zeros([NUM_SAMPLES] + list(dataset[0][0].shape)) # init target target_ = target target = torch.zeros((1, model.n_classes)) target[0][target_] = 1 for data, label in data_loader: data = data.to(device) sample, target = Variable( data.data.to(device), requires_grad=True), Variable(target).to(device) sample = torch.clamp(sample, 0, 1) for epoch in range(EPOCHS): sample, target = Variable( sample.data, requires_grad=True).to(device), Variable(target).to(device) output = torch.sigmoid(model(sample)) loss = L2_loss(output, target) + LAMBDA * L2_loss(sample, data) model.zero_grad() loss.backward() delta = EPSILON * torch.sign(sample.grad.data) sample = Variable(sample.data, requires_grad=True).to(device) output = model(sample) model.zero_grad() output[0][target_].backward(retain_graph=True) jacobian_t = sample.grad.data jacobian_non_t = torch.zeros(jacobian_t.shape) for i in range(model.n_classes): if i == target_: continue model.zero_grad() output[0][i].backward(retain_graph=True) jacobian_non_t += sample.grad.data saliency = np.zeros(sample.reshape(-1).shape) for i in range(sample.shape[2]): for j in range(sample.shape[3]): if jacobian_t[0][0][i][j] < 0 or jacobian_non_t[0][0][i][ j] > 0: continue saliency[i * sample.shape[3] + j] = jacobian_t[0][0][i][j] * abs( jacobian_non_t[0][0][i][j]) indices = np.argsort(saliency) with torch.no_grad(): for i in range(len(indices) - 1, -1, -1): sample, target = Variable( sample.data).to(device), Variable(target).to(device) output = model(sample) pred = output.data.max(1, keepdim=True)[1] if torch.eq(pred[0][0], target[0][target_].long()): print("Done") break sample[0][0][indices[i] // sample.shape[3]][ indices[i] % sample.shape[3]] -= delta[0][0][ indices[i] // sample.shape[3]][indices[i] % sample.shape[3]] sample = (sample - torch.min(sample)) / ( torch.max(sample) - torch.min(sample)) samples[idx] = sample[0] idx += 1 if idx == NUM_SAMPLES: break return samples
def adv_sample(model_name, dataset, target, num_samples): ''' data contains (image, target_original_label) from a dataset class target is the class to which the data is being misclassified ''' device = utilities.get_device(1) EPOCHS = 5000 LAMBDA = 10.0 EPSILON = 0.25 NUM_SAMPLES = num_samples L2_loss = nn.MSELoss().to(device) Classification_loss = nn.CrossEntropyLoss().to(device) model = None if device == utilities.get_device(0): model = torch.load("saved_models/" + model_name, map_location='cpu').to(device) else: model = torch.load("saved_models/" + model_name).to(device) data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True) idx = 0 samples = torch.zeros([NUM_SAMPLES] + list(dataset[0][0].shape)) # init target target_ = target target = torch.zeros((1, model.n_classes)) target[0][target_] = 1 names = [] for data, label, name in data_loader: data = data.to(device) sample, target = Variable( data.to(device), requires_grad=True), Variable(target).to(device) sample = (sample - torch.min(sample)) / (torch.max(sample) - torch.min(sample)) # sample = torch.clamp(sample,0,1) for epoch in range(EPOCHS): sample, target = Variable( sample.data, requires_grad=True).to(device), Variable(target).to(device) output = torch.sigmoid(model(sample)) loss = L2_loss(output, target) + LAMBDA * L2_loss(sample, data) model.zero_grad() loss.backward() sample = sample - EPSILON * sample.grad.data sample = (sample - torch.min(sample)) / (torch.max(sample) - torch.min(sample)) # sample = torch.clamp(sample,0,1) names.append(name[0]) samples[idx] = sample[0] output = model(sample) pred = output.data.max(1, keepdim=True)[1] print(pred[0][0].item(), label.item()) # predicted by our model, predicted by oracle idx += 1 if idx == NUM_SAMPLES: break return samples, names
def actor_critic(agent_name, multiple_agents=False, n_episodes=300, max_t=1000): """ Batch processed the states in a single forward pass with a single neural network Params ====== multiple_agents (boolean): boolean for multiple agents PER (boolean): n_episodes (int): maximum number of training episodes max_t (int): maximum number of timesteps per episode """ env, env_info, states, state_size, action_size, brain_name, num_agents = initialize_env( multiple_agents) device = get_device() scores_window = deque(maxlen=100) scores = np.zeros(num_agents) scores_episode = [] max_trajectory_size = max_t // 10 min_trajectory_size = 1 agent = Actor_Crtic_Agent(brain_name, agent_name, device, state_size, action_size, num_agents, BUFFER_SIZE, BATCH_SIZE, RANDOM_SEED, max_trajectory_size, min_trajectory_size) for i_episode in range(1, n_episodes + 1): env_info = env.reset(train_mode=True)[brain_name] states = env_info.vector_observations agent.reset() scores = np.zeros(num_agents) for t in range(max_t): actions, values, log_probs = agent.act(states) env_info = env.step(actions)[ brain_name] # send the action to the environment next_states = env_info.vector_observations # get the next state rewards = env_info.rewards # get the reward dones = env_info.local_done if multiple_agents: agent.step(states, actions, rewards, next_states, dones, values, log_probs) else: agent.step(states, np.expand_dims(actions, axis=0), rewards, next_states, dones, values, log_probs) states = next_states scores += rewards if t % 20: print('\rTimestep {}\tScore: {:.2f}\tmin: {:.2f}\tmax: {:.2f}'. format(t, np.mean(scores), np.min(scores), np.max(scores)), end="") if np.any(dones): break score = np.mean(scores) scores_window.append(score) # save most recent score scores_episode.append(score) print( '\rEpisode {}\tScore: {:.2f}\tAverage Score: {:.2f}\tMax Score: {:.2f}' .format(i_episode, score, np.mean(scores_window), np.max(scores)), end="\n") update_csv(agent_name, i_episode, np.mean(scores_window), np.max(scores)) agent.save_agent(agent_name) # Early stop if i_episode == 100: return scores_episode if i_episode % 100 == 0: print('\rEpisode {}\tAverage Score: {:.2f}'.format( i_episode, np.mean(scores_window))) if np.mean(scores_window) >= 30.0: print( '\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}' .format(i_episode - 100, np.mean(scores_window))) agent.save_agent(agent_name + "Complete") break return scores_episode
import numpy as np import pandas as pd import matplotlib.pyplot as plt import torch.nn.functional as F import torch.nn as nn from noise import OUNoise, GaussianExploration from agent import D4PGAgent from train import train from utilities import Seeds, initialize_env, get_device from memory import NStepReplayBuffer MULTI = True device = get_device() # gets gpu if available environment_params = { 'multiple_agents': MULTI, # runs 20 or 1 arm environment 'no_graphics': False, # runs no graphics windows version 'train_mode': True, # runs in train mode 'offline': True, # toggle on for udacity jupyter notebook 'agent_count': 20 if MULTI else 1, 'device': device } env, env_info, states, state_size, action_size, brain_name, num_agents = initialize_env( environment_params) seedGenerator = Seeds('seeds') seedGenerator.next()
def main(): # Loading all data sets training_data, validation_data, test_data = get_data_sets() device = get_device() accuracy_threshold = 0.86 # model_params layers = 1 vocab_size = get_vocab_size() embedding_dim = 400 hidden_size = 256 lstm_dropout = 0 layer_dropout = 0 # training_params loss_function = "BCELoss" optimizer = "Adam" batch_size = 512 epochs = 4 weight_decay = 0 learning_rate = 1e-3 # Model params # Training params model = RNNmodel(vocab_size, embedding_dim, hidden_size, lstm_dropout, layer_dropout, layers) # To load a pre-trained model # model.load_state_dict(torch.load("./state")) train(training_data, model, loss_function, learning_rate, epochs, device, batch_size, optimizer, weight_decay) # TODO: Add more evaluation metrics (precision,recall) training_accuracy = evaluate(training_data, model, device) # TODO: Use an F1 score between precision,recall of both training and validation sets if (training_accuracy > accuracy_threshold): torch.save(model.state_dict(), "./state") validation_accuracy = evaluate(validation_data, model, device) print("TRAINING ACCURACY", training_accuracy) print("VALIDATION ACCURACY", validation_accuracy) #test_accuracy = evaluate(test_data, model, device) params_dict = { "model_params": { "layers": layers, "vocab_size": vocab_size, "embedding_dim": embedding_dim, "hidden_size": hidden_size, "lstm_dropout": lstm_dropout, "layer_dropout": layer_dropout }, "training_params": { "loss_function": loss_function, "optimizer": optimizer, "batch_size": batch_size, "learning_rate": learning_rate, "epochs": epochs, "weight_decay": weight_decay } } log_trial(params_dict, training_accuracy, validation_accuracy)
import logging import torch import torch.nn as nn import torch.nn.functional as F import numpy as np from utilities import get_device device = get_device() WEIGHT_LOW = -3e-2 WEIGHT_HIGH = 3e-2 def initialize_weights(model, low, high): for param in model.parameters(): param.data.uniform_(low, high) class Actor(nn.Module): """Actor (Policy) Model.""" def __init__(self, params): """Initialize parameters and build model. Params ====== params (dict-lie): dictionary of parameters """ super(Actor, self).__init__() self.state_size = params['state_size'] self.action_size = params['action_size']
dest='yolotest', default=False, action='store_true') parser.add_argument("--stitch", help="Stitch adv images into original", dest='stitch', default=False, action='store_true') _a = parser.parse_args() args = {} for a in vars(_a): args[a] = getattr(_a, a) device = utilities.get_device(1) dataset = get_Crop_Dataset() utilities.print_dataset_details(dataset) if args['bb']: input_shape = list(dataset["train"][0][0].shape) conv = [input_shape[0]] fc = [] n_classes = 10 epochs = 20 # int(input("Epochs: ")) batch_size = 1000 # int(input("batch_size: ")) lr = 0.01 # float(input("lr: "))
def ddpg(multiple_agents=False, PER=False, n_episodes=300, max_t=1000): """ Deep Deterministic Policy Gradients Params ====== multiple_agents (boolean): boolean for multiple agents PER (boolean): n_episodes (int): maximum number of training episodes max_t (int): maximum number of timesteps per episode """ env, env_info, states, state_size, action_size, brain_name, num_agents = initialize_env( multiple_agents) device = get_device() scores_window = deque(maxlen=100) scores = np.zeros(num_agents) scores_episode = [] agents = [] # shared_memory = NaivePrioritizedBuffer(device, BUFFER_SIZE, BATCH_SIZE) shared_memory = PrioritizedBuffer(device, BUFFER_SIZE, BATCH_SIZE) for agent_id in range(num_agents): agents.append( Actor_Crtic_Agent(brain_name, agent_id, device, state_size, action_size)) for i_episode in range(1, n_episodes + 1): env_info = env.reset(train_mode=True)[brain_name] states = env_info.vector_observations for agent in agents: agent.reset() scores = np.zeros(num_agents) for t in range(max_t): actions = np.array( [agents[i].act(states[i]) for i in range(num_agents)]) env_info = env.step(actions)[ brain_name] # send the action to the environment next_states = env_info.vector_observations # get the next state rewards = env_info.rewards # get the reward dones = env_info.local_done for i in range(num_agents): agents[i].step(states[i], actions[i], rewards[i], next_states[i], dones[i], shared_memory) if shared_memory.batch_passed(): agents[0].learn(shared_memory) agents = share_learning(agents[0].actor_local, agents) states = next_states scores += rewards if t % 20: print('\rTimestep {}\tScore: {:.2f}\tmin: {:.2f}\tmax: {:.2f}'. format(t, np.mean(scores), np.min(scores), np.max(scores)), end="") if np.any(dones): break score = np.mean(scores) scores_window.append(score) # save most recent score scores_episode.append(score) print('\rEpisode {}\tScore: {:.2f}\tAverage Score: {:.2f}'.format( i_episode, score, np.mean(scores_window)), end="\n") if i_episode % 100 == 0: print('\rEpisode {}\tAverage Score: {:.2f}'.format( i_episode, np.mean(scores_window))) if np.mean(scores_window) >= 30.0: print( '\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}' .format(i_episode - 100, np.mean(scores_window))) # torch.save(Agent.actor_local.state_dict(), 'checkpoint_actor.pth') # torch.save(Agent.critic_local.state_dict(), 'checkpoint_critic.pth') break return scores_episode
def actor_critic(agent_name, multiple_agents=False, load_agent=False, n_episodes=300, max_t=1000, train_mode=True): """ Batch processed the states in a single forward pass with a single neural network Params ====== multiple_agents (boolean): boolean for multiple agents PER (boolean): n_episodes (int): maximum number of training episodes max_t (int): maximum number of timesteps per episode """ start = time.time() device = get_device() env, env_info, states, state_size, action_size, brain_name, num_agents = initialize_env( multiple_agents, train_mode) states = torch.from_numpy(states).to(device).float() NUM_PROCESSES = num_agents # Scores is Episode Rewards scores = np.zeros(num_agents) scores_window = deque(maxlen=100) scores_episode = [] actor_critic = ActorCritic(state_size, action_size, device).to(device) agent = A2C_ACKTR(agent_name, actor_critic, value_loss_coef=CRITIC_DISCOUNT, entropy_coef=ENTROPY_BETA, lr=LEARNING_RATE, eps=EPS, alpha=ALPHA, max_grad_norm=MAX_GRAD_NORM, acktr=False, load_agent=load_agent) rollouts = SimpleRolloutStorage(NUM_STEPS, NUM_PROCESSES, state_size, action_size) rollouts.to(device) num_updates = NUM_ENV_STEPS // NUM_STEPS // NUM_PROCESSES # num_updates = NUM_ENV_STEPS // NUM_STEPS print("\n## Loaded environment and agent in {} seconds ##\n".format( round((time.time() - start), 2))) update_start = time.time() timesteps = 0 episode = 0 if load_agent != False: episode = agent.episode while True: """CAN INSERT LR DECAY HERE""" # if episode == MAX_EPISODES: # return scores_episode # Adds noise to agents parameters to encourage exploration # agent.add_noise(PARAMETER_NOISE) for step in range(NUM_STEPS): step_start = time.time() # Sample actions with torch.no_grad(): values, actions, action_log_probs, _ = agent.act(states) clipped_actions = np.clip(actions.cpu().numpy(), *ACTION_BOUNDS) env_info = env.step(actions.cpu().numpy())[ brain_name] # send the action to the environment next_states = env_info.vector_observations # get the next state rewards = env_info.rewards # get the reward rewards_tensor = np.array(env_info.rewards) rewards_tensor[rewards_tensor == 0] = NEGATIVE_REWARD rewards_tensor = torch.from_numpy(rewards_tensor).to( device).float().unsqueeze(1) dones = env_info.local_done masks = torch.from_numpy(1 - np.array(dones).astype(int)).to( device).float().unsqueeze(1) rollouts.insert(states, actions, action_log_probs, values, rewards_tensor, masks, masks) next_states = torch.from_numpy(next_states).to(device).float() states = next_states scores += rewards # print(rewards) if timesteps % 100: print('\rTimestep {}\tScore: {:.2f}\tmin: {:.2f}\tmax: {:.2f}'. format(timesteps, np.mean(scores), np.min(scores), np.max(scores)), end="") if np.any(dones): print( '\rEpisode {}\tScore: {:.2f}\tAverage Score: {:.2f}\tMin Score: {:.2f}\tMax Score: {:.2f}' .format(episode, score, np.mean(scores_window), np.min(scores), np.max(scores)), end="\n") update_csv(agent_name, episode, np.mean(scores_window), np.max(scores)) if episode % 20 == 0: agent.save_agent(agent_name, score, episode, save_history=True) else: agent.save_agent(agent_name, score, episode) episode += 1 scores = np.zeros(num_agents) break timesteps += 1 with torch.no_grad(): next_values, _, _, _ = agent.act(next_states) rollouts.compute_returns(next_values, USE_GAE, GAMMA, GAE_LAMBDA) agent.update(rollouts) score = np.mean(scores) scores_window.append(score) # save most recent score scores_episode.append(score) return scores_episode