Beispiel #1
0
    def __init__(self):
        name = traffic.TYPES[np.random.randint(0, len(traffic.TYPES))]

        if obstacle.NOISE:
            x = add_noise(obstacle.X, obstacle.STD_X)
            y = add_noise(obstacle.Y, obstacle.STD_Y)
            speed = add_noise(obstacle.SPEED, obstacle.STD_SPEED)
        else:
            x = obstacle.X
            y = obstacle.Y
            speed = obstacle.SPEED

        self.init_x = x
        self.init_y = y
        self.init_speed = speed
        self.init_theta = obstacle.THETA
        self.stuck_time = 0
        self.total_stuck_time = obstacle.TOTAL_STUCK_TIME

        super(Obstacle, self).__init__(self.init_x, self.init_y,
                                       self.init_theta, self.init_speed, name)

        self.crash = obstacle.CRASH and np.random.random(
        ) < obstacle.PROB_CRASH
        self.crash_y = obstacle.CRASH_Y
        self.crashing = False
def experiment(indlinks_obs, delaytype, noise=0.0, display=False, soft=1000.0):
    """find parameters that minimizes the distance between x^obs_true in NOISY case
    and x^obs generated by each candidate function with PARTIAL observation
    
    Parameters
    ----------
    indlinks_obs: indices of the observed links
    delaytype: type of the delay
    noise: std of the noise added to the measured link flows, ff delays, OD demands
    display: if True, display results
    soft: weight put on the observation
    """
    if delaytype == 'Polynomial': true_theta = coef
    if delaytype == 'Hyperbolic': true_theta = (a,b)
    print 'generate graph...'
    g1, g2, g3, g4 = los_angeles(true_theta, delaytype)
    print 'compute ue...'
    l1, l2, l3, l4 = ue.solver(g1, update=True), ue.solver(g2, update=True), \
        ue.solver(g3, update=True), ue.solver(g4, update=True)
    c1 = sum([link.delay*link.flow for link in g1.links.values()])
    c2 = sum([link.delay*link.flow for link in g2.links.values()])
    c3 = sum([link.delay*link.flow for link in g3.links.values()])
    c4 = sum([link.delay*link.flow for link in g4.links.values()])
    print 'ue costs: ', c1, c2, c3, c4
    obs = [g1.indlinks[id] for id in indlinks_obs]
    obs = [int(i) for i in list(np.sort(obs))]
    x1,x2,x3,x4 = l1,l2,l3,l4
    if noise > 0.0:
        x1, x2, x3, x4 = add_noise(l1,noise), add_noise(l2,noise), add_noise(l3,noise), add_noise(l4,noise)
        g1, g2, g3, g4 = los_angeles(true_theta, 'Polynomial', noise)
    theta, xs = invopt.main_solver([g1,g2,g3,g4], [x1[obs],x2[obs],x3[obs],x4[obs]], obs, degree, soft)
    u, v = matrix([l1,l2,l3,l4]), matrix(xs)
    error = np.linalg.norm(u-v, 1) / np.linalg.norm(u, 1)
    if display: display_results(error, true_theta, [theta], delaytype)
    return error, theta
Beispiel #3
0
    def train_epoch(self,
                    device,
                    dataloader,
                    validation_dataloader,
                    loss_fn,
                    optimizer,
                    noise_factor=0.5):
        mean_loss = []
        for i, AE in enumerate(self.AEs):
            # Set train mode for both the encoder and the decoder
            AE.train()
            train_loss = []
            # Iterate the dataloader (we do not need the label values, this is unsupervised learning)
            for image_batch, _ in dataloader:
                # Move tensor to the proper device
                image_noisy = add_noise(image_batch, noise_factor)
                image_batch = image_batch.to(device)
                image_noisy = image_noisy.to(device)
                # Encode data
                reconstuction = AE(image_noisy)
                # Evaluate loss
                loss = loss_fn(reconstuction, image_batch)
                # Backward pass
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                # Print batch loss
                print(
                    f'\t AE{i}  :  partial train loss (single batch): {loss.data}'
                )
                train_loss.append(loss.detach().cpu().numpy())

            with torch.no_grad():
                AE.eval()
                val_loss = []
                for image_batch, _ in validation_dataloader:
                    # Move tensor to the proper device
                    image_noisy = add_noise(image_batch, noise_factor)
                    image_batch = image_batch.to(device)
                    image_noisy = image_noisy.to(device)
                    # Encode data
                    reconstuction = AE(image_noisy)
                    # Evaluate loss
                    loss = loss_fn(reconstuction, image_batch)
                    # Print batch loss
                    print(
                        f'\t AE{i}  :  partial validation loss (single batch): {loss.data}'
                    )
                    val_loss.append(loss.detach().cpu().numpy())

            mean_loss.append({
                'train': np.mean(train_loss),
                'valid': np.mean(val_loss)
            })

        return mean_loss
Beispiel #4
0
    def reset(self, noise=True):
        theta = obstacle.THETA
        if obstacle.NOISE and noise:
            x = add_noise(obstacle.X, obstacle.STD_X)
            y = add_noise(obstacle.Y, obstacle.STD_Y)
            speed = add_noise(obstacle.SPEED, obstacle.STD_SPEED)
        else:
            x = self.init_x
            y = self.init_y
            speed = self.init_speed

        self.crash = obstacle.CRASH and np.random.random(
        ) < obstacle.PROB_CRASH
        self.crashing = False
        self.set_state(x, y, theta, speed)
        self.bounding_box.move_to(x, y, theta)
Beispiel #5
0
def committe(solver, solver_name, intervals, reps):
    np.random.seed()
    X, y = util.basic_data()
    polls = util.add_noise(y)
    curr_labels = np.random.choice(range(len(X)), size=4, replace=False)
    X_train = X[curr_labels]
    square_errors = np.zeros([2, len(intervals)])
    for i in range(len(intervals)):
        print("interval: ", intervals[i])
        for j in range(reps):
            while len(curr_labels) <= intervals[i]:
                next_points = next_countys(solver, curr_labels, X, polls)
                curr_labels = np.append(curr_labels, next_points)
            curr_labels = curr_labels[:intervals[i]]
            preds = solver(X, X[curr_labels], polls[curr_labels])
            square_errors[:, i] += util.square_error(y, preds)
        square_errors[:, i] /= reps
    square_errors = np.vstack(
        (square_errors.mean(axis=0), util.performance(solver, intervals,
                                                      reps).mean(axis=0)))
    util.plot("committe",
              intervals / len(X),
              square_errors,
              legend=[solver_name, "random"],
              x_label="% counties",
              y_label="MSE",
              title="Committe")
Beispiel #6
0
def CIO(goals,
        world,
        p,
        single=False,
        start_stage=0,
        traj_data=None,
        gif_tag=''):
    if single:
        # FOR TESTING A SINGLE traj
        S = world.traj_func(world, goals, p, traj_data)
        S = add_noise(S)
        visualize_result(world, goals, p, 'initial' + gif_tag + '.gif', S)
        tot_cost = L(S, goals, world, p, start_stage)
        print_final(*function_costs)
        return {}

    S = world.traj_func(world, goals, p, traj_data)
    if start_stage == 0:
        S = add_noise(S)
    visualize_result(world, goals, p, 'initial' + gif_tag + '.gif', S)
    tot_cost = L(S, goals, world, p)
    print_final(*function_costs)

    bounds = get_bounds(world, p)
    ret_info = {}
    x_init = S
    for stage in range(start_stage, len(p.stage_weights)):
        print('BEGINNING PHASE:', stage)
        p.print_stage_weights(stage)
        res = minimize(fun=L,
                       x0=x_init,
                       args=(goals, world, p, stage),
                       method='L-BFGS-B',
                       bounds=bounds,
                       options={'eps': 10**-3})
        x_final = res['x']
        nit = res['nit']
        final_cost = res['fun']

        visualize_result(world, goals, p,
                         'stage_{}'.format(stage) + gif_tag + '.gif', x_final)
        print_final(*function_costs)
        all_final_costs = function_costs
        ret_info[stage] = world.s0, x_final, final_cost, nit, all_final_costs
        x_init = x_final
    return ret_info
Beispiel #7
0
    def reset(self, noise=True):
        """Resets the agent to the initial position.

        Args:
            noise: Whether to add noise when resetting. Defaults to True.
        """
        if agent.NOISE and noise:
            x = add_noise(self.init_x, agent.STD_X)
            y = add_noise(self.init_y, agent.STD_Y)
            theta = add_noise(self.init_theta, agent.STD_THETA)
            speed = add_noise(self.init_speed, agent.STD_SPEED)
        else:
            x = self.init_x
            y = self.init_y
            theta = self.init_theta
            speed = self.init_speed

        self.set_state(x, y, theta, speed)
        self.bounding_box.move_to(x, y, theta)
Beispiel #8
0
    def __init__(self,
                 x=agent.X,
                 y=agent.Y,
                 theta=agent.THETA,
                 speed=agent.SPEED,
                 name="red_car"):
        """Initializes the agent function.

        Args:
            x: Initial agent x position (pixels). Optional.
            y: Initial agent y position (pixels). Optional.
            theta: Initial agent angle (radians). Optional.
            speed: Initial agent speed (pixels/second). Optional.
            name: Name of the car image. Optional.
        """
        self.init_x = x
        self.init_y = y
        self.init_theta = theta
        self.init_speed = speed

        if agent.NOISE:
            x = add_noise(x, agent.STD_X)
            y = add_noise(y, agent.STD_Y)
            theta = add_noise(theta, agent.STD_THETA)
            speed = add_noise(speed, agent.STD_SPEED)

        super(Agent, self).__init__(x, y, theta, speed)

        self.name = name

        self.img = pygame.image.load(
            os.path.join(global_var.PATH, "media", name + ".png"))
        self.width = self.img.get_width()
        self.height = self.img.get_height()

        # Setup a simple bounding box of the correct size.
        self.bounding_box = Rectangle([[0, 0], [0, self.width],
                                       [self.height, self.width],
                                       [self.height, 0]])

        # Shift the bounding box to the correct orientation and position.
        self.bounding_box.move_to(x, y, theta)
 def get_time_series(self, t_total, n_pt, random_seed=None):
     """
     Simulate the heating by convection
     Add random noise as given by sigma attribute of this class instance.
     :param t_total: total elapse time
     :param n_pt: number of time points, including the zero-time.
     :param random_seed: random seed (integer)
     :return: TimeSeries. An array of temperatures and times
     """
     times = self.times(t_total, n_pt)
     temps = temperature(
         t=times,
         a=self.t_hot,
         b=(self.t_init - self.t_hot),
         c=self.rate_const
     )
     if random_seed is not None:
         set_random_seed(random_seed)
     add_noise(temps, self.sigma)
     return TimeSeries.from_time_temp(times, temps)
Beispiel #10
0
def experiment(indlinks_obs, delaytype, noise=0.0, display=False, soft=1000.0):
    """find parameters that minimizes the distance between x^obs_true in NOISY case
    and x^obs generated by each candidate function with PARTIAL observation
    
    Parameters
    ----------
    indlinks_obs: indices of the observed links
    delaytype: type of the delay
    noise: std of the noise added to the measured link flows, ff delays, OD demands
    display: if True, display results
    soft: weight put on the observation
    """
    if delaytype == 'Polynomial': true_theta = coef
    if delaytype == 'Hyperbolic': true_theta = (a, b)
    print 'generate graph...'
    g1, g2, g3, g4 = los_angeles(true_theta, delaytype)
    print 'compute ue...'
    l1, l2, l3, l4 = ue.solver(g1, update=True), ue.solver(g2, update=True), \
        ue.solver(g3, update=True), ue.solver(g4, update=True)
    c1 = sum([link.delay * link.flow for link in g1.links.values()])
    c2 = sum([link.delay * link.flow for link in g2.links.values()])
    c3 = sum([link.delay * link.flow for link in g3.links.values()])
    c4 = sum([link.delay * link.flow for link in g4.links.values()])
    print 'ue costs: ', c1, c2, c3, c4
    obs = [g1.indlinks[id] for id in indlinks_obs]
    obs = [int(i) for i in list(np.sort(obs))]
    x1, x2, x3, x4 = l1, l2, l3, l4
    if noise > 0.0:
        x1, x2, x3, x4 = add_noise(l1, noise), add_noise(l2, noise), add_noise(
            l3, noise), add_noise(l4, noise)
        g1, g2, g3, g4 = los_angeles(true_theta, 'Polynomial', noise)
    theta, xs = invopt.main_solver([g1, g2, g3, g4],
                                   [x1[obs], x2[obs], x3[obs], x4[obs]], obs,
                                   degree, soft)
    u, v = matrix([l1, l2, l3, l4]), matrix(xs)
    error = np.linalg.norm(u - v, 1) / np.linalg.norm(u, 1)
    if display: display_results(error, true_theta, [theta], delaytype)
    return error, theta
Beispiel #11
0
    def __getitem__(self, idx):
        pose = genfromtxt(self.pose_filepaths[self.file_idxs[idx]],
                          delimiter=',')[1:]
        pose = torch.from_numpy(pose).float()

        colour = io_image.read_RGB_image(
            self.colour_filepaths[self.file_idxs[idx]])
        mask = io_image.read_RGB_image(
            self.mask_filepaths[self.file_idxs[idx]])
        cropped_img = self.crop_image(colour, mask)
        colour = io_image.change_res_image(colour, self.img_res)
        mask = io_image.change_res_image(mask, self.img_res)
        cropped_img = io_image.change_res_image(cropped_img, self.img_res)

        with_imagenet = io_image.read_RGB_image(
            self.with_imagenet_filepaths[self.file_idxs[idx]],
            new_res=self.img_res)

        data_image = with_imagenet
        if (not self.noise_channel) and self.num_channels > 3:
            depth = io_image.read_RGB_image(
                self.depth_filepaths[self.file_idxs[idx]],
                new_res=self.img_res)
            depth = np.reshape(depth, (depth.shape[0], depth.shape[1], 1))
            data_image = np.concatenate((data_image, depth),
                                        axis=-1).astype(float)

        #cropped_img_non_noisy = np.copy(cropped_img)
        #cropped_img_noisy, noise_idxs = util.add_noise(cropped_img, 0.2)

        data_image_noisy, noise_idxs = util.add_noise(data_image,
                                                      self.noise_level)
        #colour = np.concatenate((colour, noise_idxs), axis=-1).astype(float)
        if self.transform:
            data_image_noisy = self.transform(data_image_noisy).float()
            noise_idxs = self.transform(noise_idxs).float()
            #cropped_img_noisy = self.transform(cropped_img_noisy)
            #cropped_img_non_noisy = self.transform(cropped_img_non_noisy)
            colour = self.transform(colour).float()
        data_image_noisy = torch.cat((data_image_noisy, noise_idxs), 0)
        #vis.plot_image((data_image_noisy.numpy()[0:3, :, :] + 0.5) * 255)
        #vis.show()
        #vis.plot_image((colour.numpy() + 0.5) * 255)
        #vis.show()

        return data_image_noisy, colour
Beispiel #12
0
def tax_mst(entities, relfile, clusfile=None, minscore=0., noise=0., wn=False):
    '''
    :param lmda : Lambda hyperparameter (set to negative value to use raw scores)
    '''
    ## Retrieve nodes, and IS-A relation weight between each pair
    d = util.get_relspecific_graph(entities, relfile, minscore=minscore)
    if wn:
        d = util.add_wn_relations(d)

    if noise > 0:
        d = util.add_noise(d, noise)

    g = util.dct2nx(d)

    ## Consolidate same-cluster nodes
    if clusfile is not None:
        g = util.consolidate_clusters(g, clusfile, resolveweights=np.mean)

    ## Initialize solution
    g_ = nx.DiGraph()
    g_.add_nodes_from(g)
    stats = {'node_cnt': 0, 'runtime': 0, 'keptedge_cnt': 0}

    ## Decompose
    ccomps = cc(g)
    for v_i in ccomps:
        ## Get subgraph
        g_i = g.subgraph(v_i)

        ## Solve MST problem restricted to g_i
        keptedges, g_i_stats = solve_mst(g_i)

        g_.add_edges_from(keptedges)
        #         g_.add_edges_from([(i,j,{'relation': reltype, 'weight': g[i].get(j,{}).get('weight',-1000)})
        #                           for i,j in keptedges])
        stats = util.update_stats(stats, g_i_stats)

    pruned = util.nx2dct(g_)

    # checks
    assert len(g_.edges()) == stats['keptedge_cnt']
    assert len(g_.nodes()) == stats['node_cnt']

    return pruned, stats
Beispiel #13
0
def tax_nocyc(entities, relfile, clusfile=None, minscore=0., noise=0., lmda=0.5, wn=False):
    '''
    :param lmda : Lambda hyperparameter (set to negative value to use raw scores)
    '''
    ## Retrieve nodes, and IS-A relation weight between each pair
    d = util.get_relspecific_graph(entities, relfile, minscore=minscore)
    
    if wn:
        d = util.add_wn_relations(d)
    
    if noise > 0:
        d = util.add_noise(d, noise)
    
    for lnk in d['links']:
        lnk['weight'] = lnk['weight'] - lmda
    
    g = util.dct2nx(d)
    
    ## Consolidate same-cluster nodes
    if clusfile is not None:
        g = util.consolidate_clusters(g, clusfile, resolveweights=np.mean)
    
    ## Initialize solution
    g_ = nx.DiGraph()
    g_.add_nodes_from(g)
    stats = {'node_cnt': 0,
             'runtime': 0,
             'keptedge_cnt': 0
            }
    
    keptedges, g_stats = solve_nocyc(g)
    g_.add_edges_from(keptedges)
    
    stats = util.update_stats(stats, g_stats)
    
    pruned = util.nx2dct(g_)
    
    # checks
    assert len(g_.edges()) == stats['keptedge_cnt']
    assert len(g_.nodes()) == stats['node_cnt']
    
    return pruned, stats
def main():
    options = parse_args()
    import theano.sandbox.cuda
    theano.sandbox.cuda.use(options.device)

    plt.rcParams['image.cmap'] = 'gray'
    if not os.path.exists(options.output_folder):
        os.mkdir(options.output_folder)
    sys.path.insert(0, options.experiment)
    generate_fn, objective_fn = compile(options)
    import util
    X, ids = util.load_dataset(options.input_folder, False)
    mem_file = open(os.path.join(options.output_folder, 'mem.csv'), 'w')
    print("id,from,to", file=mem_file)
    for id, img in zip(ids, X):
        input_img = util.preprocess(np.expand_dims(img, axis=0))
        output_img = generate_fn(util.add_noise(input_img))
        plt.imsave(os.path.join(options.output_folder, id),
                   np.squeeze(util.deprocess(output_img)))
        line = "%s,%s,%s" % (id, np.squeeze(
            objective_fn(input_img)), np.squeeze(objective_fn(output_img)))
        print(line)
        print(line, file=mem_file)
    mem_file.close()
Beispiel #15
0
def main(argv):
    learnfile = "ngrams.txt"
    testfile = "europarl-v7.es-en.en"
    verbose = False
    noise = 0.05
    numIterations = 0
    minLength = 10
    maxLength = 60

    def printHelpMessage():
        print 'decryptor.py [-i <n-gram file> -t <testfile> -n <noise level>]'
        print '-v verbose'
        print '-h help'
    try:
        opts, args = getopt.getopt(argv,"hvi:t:n:")
    except getopt.GetoptError:
        printHelpMessage()
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            printHelpMessage()
            sys.exit()
        elif opt in ("-i"): learnfile = arg
        elif opt in ("-t"): testfile = arg
        elif opt in ("-n"): noise = float(arg)
        elif opt in ("-v"): verbose = True

    print "Learning..."
    sys.stdout.flush()
    languagemodel = LanguageModel.LanguageModel(learnfile)
    original_text_file = open(testfile, "r")

    cipher_solver = solver.Solver(languagemodel)
    cipher_baseline = baseline.Baseline()
    solver_accuracy = []
    baseline_accuracy = []
    max_counts = []

    for original_text in original_text_file:
        if len(original_text) < minLength: continue
        if len(original_text) > maxLength: continue
        numIterations += 1
        encryption_key = util.generateKey()
        original_text_noised = util.add_noise(original_text, noise)
        cipher_text = util.encryptCase(original_text_noised, encryption_key)
        startTime = datetime.datetime.now()

        if verbose:
            print "============================"
            print "Iteration ", numIterations
            print "Length ", len(original_text)
            print "Start Time", startTime
            print "Original Text", original_text
            print "Original Text Noised", original_text_noised
            print "Key", encryption_key
            print "Cipher Text Noised", cipher_text
            
        
        baseline_text, baseline_decryption_key = cipher_baseline.decrypt(cipher_text)
        guess_text, guess_decryption_key, num_guesses = cipher_solver.decrypt(cipher_text)

        baseline_score = score_accuracy(encryption_key, baseline_decryption_key, cipher_text, original_text)
        baseline_accuracy.append(baseline_score)
        solver_score = score_accuracy(encryption_key, guess_decryption_key, cipher_text, original_text)
        solver_accuracy.append(solver_score)
        max_counts.append(num_guesses)

        if verbose:
            print "End Time", datetime.datetime.now()
            print "Duration", datetime.datetime.now() - startTime
            print "Length, Accuracy, Duration,", len(original_text), ',', solver_score, ',', datetime.datetime.now() - startTime
            print "Baseline Accuracy: ", baseline_score
            print "Average Accuracy of Baseline: ", sum(baseline_accuracy)/len(baseline_accuracy)
            print "Solver Accuracy: ", solver_score
            print "Average Accuracy of Solver: ", sum(solver_accuracy)/len(solver_accuracy)
            print "Reached same thing many times", max_counts

    print "Average Accuracy of Baseline: ", sum(baseline_accuracy)/len(baseline_accuracy)
    print "Average Accuracy of Solver: ", sum(solver_accuracy)/len(solver_accuracy)
    print "Over %d cipher texts" % len(solver_accuracy)
Beispiel #16
0
def onpolicy_main():
    print("onpolicy main")

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    if args.cuda and torch.cuda.is_available() and args.cuda_deterministic:
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
    torch.set_num_threads(1)
    device = torch.device("cuda:0" if args.cuda else "cpu")

    summary_name = args.log_dir + '{0}_{1}'
    writer = SummaryWriter(summary_name.format(args.env_name, args.save_name))

    # Make vector env
    envs = make_vec_envs(
        args.env_name,
        args.seed,
        args.num_processes,
        args.gamma,
        args.log_dir,
        device,
        False,
        env_kwargs=env_kwargs,
    )

    # agly ways to access to the environment attirubutes
    if args.env_name.find('doorenv') > -1:
        if args.num_processes > 1:
            visionnet_input = envs.venv.venv.visionnet_input
            nn = envs.venv.venv.nn
            env_name = envs.venv.venv.xml_path
        else:
            visionnet_input = envs.venv.venv.envs[
                0].env.env.env.visionnet_input
            nn = envs.venv.venv.envs[0].env.env.env.nn
            env_name = envs.venv.venv.envs[0].env.env.env.xml_path
        dummy_obs = np.zeros(nn * 2 + 3)
    else:
        dummy_obs = envs.observation_space
        visionnet_input = None
        nn = None

    if pretrained_policy_load:
        print("loading", pretrained_policy_load)
        actor_critic, ob_rms = torch.load(pretrained_policy_load)
    else:
        actor_critic = Policy(dummy_obs.shape,
                              envs.action_space,
                              base_kwargs={'recurrent': args.recurrent_policy})

    if visionnet_input:
        visionmodel = load_visionmodel(env_name, args.visionmodel_path,
                                       VisionModelXYZ())
        actor_critic.visionmodel = visionmodel.eval()
    actor_critic.nn = nn
    actor_critic.to(device)

    #disable normalizer
    vec_norm = get_vec_normalize(envs)
    vec_norm.eval()

    if args.algo == 'a2c':
        agent = algo.A2C_ACKTR(actor_critic,
                               args.value_loss_coef,
                               args.entropy_coef,
                               lr=args.lr,
                               eps=args.eps,
                               alpha=args.alpha,
                               max_grad_norm=args.max_grad_norm)
    elif args.algo == 'ppo':
        agent = algo.PPO(actor_critic,
                         args.clip_param,
                         args.ppo_epoch,
                         args.num_mini_batch,
                         args.value_loss_coef,
                         args.entropy_coef,
                         lr=args.lr,
                         eps=args.eps,
                         max_grad_norm=args.max_grad_norm)

    rollouts = RolloutStorage(args.num_steps, args.num_processes,
                              dummy_obs.shape, envs.action_space,
                              actor_critic.recurrent_hidden_state_size)

    full_obs = envs.reset()
    initial_state = full_obs[:, :envs.action_space.shape[0]]

    if args.env_name.find('doorenv') > -1 and visionnet_input:
        obs = actor_critic.obs2inputs(full_obs, 0)
    else:
        if knob_noisy:
            obs = add_noise(full_obs, 0)
        else:
            obs = full_obs

    rollouts.obs[0].copy_(obs)
    rollouts.to(device)

    episode_rewards = deque(maxlen=10)

    start = time.time()
    num_updates = int(
        args.num_env_steps) // args.num_steps // args.num_processes

    for j in range(num_updates):

        if args.use_linear_lr_decay:
            # decrease learning rate linearly
            utils.update_linear_schedule(agent.optimizer, j, num_updates,
                                         args.lr)

        pos_control = False
        total_switches = 0
        prev_selection = ""
        for step in range(args.num_steps):
            with torch.no_grad():
                value, action, action_log_prob, recurrent_hidden_states = actor_critic.act(
                    rollouts.obs[step], rollouts.recurrent_hidden_states[step],
                    rollouts.masks[step])
                next_action = action

            if pos_control:
                frame_skip = 2
                if step % (512 / frame_skip - 1) == 0:
                    current_state = initial_state
                next_action = current_state + next_action
                for kk in range(frame_skip):
                    full_obs, reward, done, infos = envs.step(next_action)

                current_state = full_obs[:, :envs.action_space.shape[0]]
            else:
                full_obs, reward, done, infos = envs.step(next_action)

            # convert img to obs if door_env and using visionnet
            if args.env_name.find('doorenv') > -1 and visionnet_input:
                obs = actor_critic.obs2inputs(full_obs, j)
            else:
                if knob_noisy:
                    obs = add_noise(full_obs, j)
                else:
                    obs = full_obs

            for info in infos:
                if 'episode' in info.keys():
                    episode_rewards.append(info['episode']['r'])

            masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                       for done_ in done])
            bad_masks = torch.FloatTensor(
                [[0.0] if 'bad_transition' in info.keys() else [1.0]
                 for info in infos])
            rollouts.insert(obs, recurrent_hidden_states, action,
                            action_log_prob, value, reward, masks, bad_masks)

        with torch.no_grad():
            next_value = actor_critic.get_value(
                rollouts.obs[-1], rollouts.recurrent_hidden_states[-1],
                rollouts.masks[-1]).detach()

        rollouts.compute_returns(next_value, args.use_gae, args.gamma,
                                 args.gae_lambda, args.use_proper_time_limits)

        value_loss, action_loss, dist_entropy = agent.update(rollouts)
        rollouts.after_update()

        writer.add_scalar("Value loss", value_loss, j)
        writer.add_scalar("action loss", action_loss, j)
        writer.add_scalar("dist entropy loss", dist_entropy, j)
        writer.add_scalar("Episode rewards", np.mean(episode_rewards), j)

        # save for every interval-th episode or for the last epoch
        if (j % args.save_interval == 0
                or j == num_updates - 1) and args.save_dir != "":
            save_path = os.path.join(args.save_dir, args.algo)
            try:
                os.makedirs(save_path)
            except OSError:
                pass
            torch.save([
                actor_critic,
                getattr(utils.get_vec_normalize(envs), 'ob_rms', None)
            ],
                       os.path.join(
                           save_path, args.env_name +
                           "_{}.{}.pt".format(args.save_name, j)))

        if j % args.log_interval == 0 and len(episode_rewards) > 1:
            total_num_steps = (j + 1) * args.num_processes * args.num_steps
            end = time.time()
            print(
                "Updates {}, num timesteps {}, FPS {} \n Last {} training episodes: mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}\n"
                .format(j, total_num_steps,
                        int(total_num_steps / (end - start)),
                        len(episode_rewards), np.mean(episode_rewards),
                        np.median(episode_rewards), np.min(episode_rewards),
                        np.max(episode_rewards), dist_entropy, value_loss,
                        action_loss))

        if (args.eval_interval is not None and len(episode_rewards) > 1
                and j % args.eval_interval == 0):
            ob_rms = utils.get_vec_normalize(envs).ob_rms
            evaluate(actor_critic, ob_rms, args.env_name, args.seed,
                     args.num_processes, eval_log_dir, device)

        DR = True  #Domain Randomization
        ################## for multiprocess world change ######################
        if DR:
            print("changing world")

            envs.close_extras()
            envs.close()
            del envs

            envs = make_vec_envs(
                args.env_name,
                args.seed,
                args.num_processes,
                args.gamma,
                args.log_dir,
                device,
                False,
                env_kwargs=env_kwargs,
            )

            full_obs = envs.reset()
            if args.env_name.find('doorenv') > -1 and visionnet_input:
                obs = actor_critic.obs2inputs(full_obs, j)
            else:
                obs = full_obs
Beispiel #17
0
def mtg(entities,
        relfile,
        reltype='hypernym',
        minscore=1.5,
        lmda=1.0,
        noise=0.,
        wn=False):
    '''
    :param lmda : Lambda hyperparameter (set to negative value to use raw scores)
    '''
    ## Retrieve nodes, and reltype weight between each pair
    d = util.get_relspecific_graph(entities,
                                   relfile,
                                   minscore=minscore,
                                   reltypes=[reltype],
                                   equivrel=reltype)

    ## Convert weights to logodds and subtract lambda
    for lnk in d['links']:
        lnk['weight'] = lnk['weight'] - lmda

    if wn:
        d = util.add_wn_relations(d)

    if noise > 0:
        d = util.add_noise(d, noise)

    g = util.dct2nx(d)

    ## Initialize solution
    g_ = nx.DiGraph()
    g_.add_nodes_from(g)
    stats = {
        'node_cnt': 0,
        'num_vars': 0,
        'num_constrs': 0,
        'runtime': 0,
        'keptedge_cnt': 0,
        'possedge_cnt': 0,
        'gt0edge_cnt': 0,
        'timeout': 0
    }

    ## Decompose
    ccomps = cc(g)
    for v_i in ccomps:
        ## Get subgraph
        g_i = g.subgraph(v_i)

        ## Solve ILP problem restricted to g_i
        keptedges, g_i_stats = solve_exact(g_i)

        g_.add_edges_from([(i, j, {
            'relation': reltype,
            'weight': g[i].get(j, {}).get('weight', 0)
        }) for i, j in keptedges])
        stats = util.update_stats(stats, g_i_stats)

    pruned = util.nx2dct(g_)

    # checks
    assert len(g_.edges()) == stats['keptedge_cnt']
    assert len(g_.nodes()) == stats['node_cnt']

    return pruned, stats
Beispiel #18
0
num_channels_out = 3
img_path = 'C:/Users/Administrator/Documents/Datasets/ycb_unreal_colour (493).png'
img_save_path = results_dir + 'output_img.png'
img_res = (640, 480)
noise_level = 0.01
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize(
                                    (0.5, 0.5, 0.5, 0.5),
                                    (0.5, 0.5, 0.5, 0.5))])

# load image
input_image = io_image.read_RGB_image(img_path, img_res)
if input_image.shape[2] == 4:
    input_image = input_image[:, :, 0:3]

data_image_noisy, noise_idxs = util.add_noise(input_image, noise_level)
data_image_noisy = transform(data_image_noisy).float()
noise_idxs = transform(noise_idxs).float()
data_image_noisy = torch.cat((data_image_noisy, noise_idxs), 0)

# load model
checkpoint = torch.load(results_dir + 'ycb_checkpoint.pth.tar')
args = checkpoint['args']
epoch = checkpoint['epoch']
model = VQ_CVAE(d=hidden, k=k, num_channels_in=num_channels_in, num_channels_out=num_channels_out)
model.load_state_dict(checkpoint['state_dict'])
if use_cuda:
    print('Using Cuda')
    model.cuda()
    data_image_noisy = data_image_noisy.cuda()
def train(options):
    print("Compiling...")
    G_train_fn, D_train_fn, generate_fn, G, lr, obj_coef = compile(options)
    import util
    print("Loading dataset...")
    X, _ = util.load_dataset(options.dataset, True)
    print("Training...")
    log_file = open(os.path.join(options.experiment_folder, 'log.txt'), 'w')
    log_str = ("Experiment params: %s" % (options.__dict__, ))
    print(log_str)
    print(log_str, file=log_file)
    buffer = ExperienceBuffer(buffer_size=100,
                              batch_shape=(options.batch_size, 3, 256, 256))
    for epoch in range(options.num_iter):
        # if (epoch + 1) % 20 == 0:
        #     lr.set_value(np.array(lr.get_value() * 0.3, dtype='float32'))

        # if (epoch + 1) % 3 == 0:
        #     obj_coef.set_value(np.array(obj_coef.get_value() + 0.1 * options.obj_coef, dtype='float32'))

        discriminator_order = np.arange(len(X))
        generator_order = np.arange(len(X))
        np.random.shuffle(discriminator_order)
        np.random.shuffle(generator_order)

        discriminator_loss_list = []
        generator_loss_list = []

        for start in tqdm(range(0, len(X), options.batch_size)):
            end = min(start + options.batch_size, len(X))

            generator_batch = util.preprocess(X[generator_order[start:end]])
            discriminator_batch = util.preprocess(
                X[discriminator_order[start:end]], True)

            generator_batch_with_noise = util.add_noise(generator_batch)

            #print (generate_fn(generator_batch_with_noise))
            generator_output = G_train_fn(generator_batch_with_noise,
                                          generator_batch, discriminator_batch)
            generated_batch, generator_loss = generator_output[
                0], generator_output[1:]
            #buffer.push_to_buffer(generator_batch)

            discriminator_loss = D_train_fn(generated_batch,
                                            discriminator_batch)

            discriminator_loss_list.append(discriminator_loss)
            generator_loss_list.append(generator_loss)

            log_str = (
                ("Epoch %i" % epoch) + '\n' +
                ("Discriminator loss %f" %
                 tuple(np.mean(np.array(discriminator_loss_list), axis=0))) +
                '\n' +
                ("Generator loss %f, obj_loss %f, cont_loss %f, disc_loss %f, total_variation loss %f"
                 % tuple(np.mean(np.array(generator_loss_list), axis=0))))
            print(log_str)

        img_for_ploting = util.preprocess(X[0:options.num_img_to_show])
        plot(options, epoch, img_for_ploting,
             generate_fn(util.add_noise(img_for_ploting)))

        log_str = (
            ("Epoch %i" % epoch) + '\n' +
            ("Discriminator loss %f" %
             tuple(np.mean(np.array(discriminator_loss_list), axis=0))) +
            '\n' +
            ("Generator loss %f, obj_loss %f, cont_loss %f, disc_loss %f, total_variation loss %f"
             % tuple(np.mean(np.array(generator_loss_list), axis=0))))
        print(log_str)
        print(log_str, file=log_file)
        if epoch % options.save_mode_it == 0:
            save_model(options, epoch, G)
        log_file.flush()

    log_file.close()
    save_model(options, options.num_iter - 1, G)

    return G
Beispiel #20
0
    save_vars = [W_conv1, b_conv1, W_conv2, b_conv2, W_fc1, b_fc1]
    saver = tf.train.Saver(var_list=save_vars, max_to_keep=3)

    saver.restore(sess, 'your_model_path')

    # merge所有的summary,这里是为了在tensorboard里可以看到更清晰的层次
    merged = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter(event_path, graph=sess.graph)

    # 增加错误样本,提高模型容错/看模型效果
    noise_indexes = [
        random.randint(0, train_samples_count)
        for _ in range(int(train_samples_count * noise * 0.01))
    ]
    for index in noise_indexes:
        mnist.train.labels[index] = util.add_noise(mnist.train.labels[index])
    print('noises size: {}, and are added.'.format(len(noise_indexes)))

    start_time = datetime.datetime.now()
    fp = open(
        './model/mnist/noise{}_poison{}/result.txt'.format(noise, poison),
        'ab+')
    for i in range(1, epoch + 1):
        batch = mnist.train.next_batch(50)
        sess.run(train_step,
                 feed_dict={
                     x_raw: batch[0],
                     y: batch[1],
                     keep_prob: 0.5
                 })
def main(raw_args=None):

    # If this is being called as a function from another python script
    if raw_args is not None:
        args = get_args(raw_args)
    else:
        args = main_args

    if args.algo != 'ipo':
        raise NotImplementedError

    # Total number of envs (both domains)
    args.num_processes = args.num_envs1 + args.num_envs2

    knob_noisy = args.knob_noisy
    pretrained_policy_load = args.pretrained_policy_load

    args.world_path_domain1 = os.path.expanduser(args.world_path_domain1)
    args.world_path_domain2 = os.path.expanduser(args.world_path_domain2)

    # Env kwargs for domain 1
    env_kwargs1 = dict(port = args.port,
                    visionnet_input = args.visionnet_input,
                    unity = args.unity,
                    world_path = args.world_path_domain1)

    # Env kwargs for domain 2
    env_kwargs2 = dict(port = args.port,
                    visionnet_input = args.visionnet_input,
                    unity = args.unity,
                    world_path = args.world_path_domain2)


    print("Training with IPO.")

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    if args.cuda and torch.cuda.is_available() and args.cuda_deterministic:
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
    torch.set_num_threads(1)
    device = torch.device("cuda:0" if args.cuda else "cpu")

    summary_name = args.log_dir + '{0}_{1}'
    writer = SummaryWriter(summary_name.format(args.env_name, args.save_name))

    # Make vector env for two domains (each contains num_processes/2 envs)
    envs1 = make_vec_envs(args.env_name,
                         args.seed,
                         args.num_envs1,
                         args.gamma, 
                         args.log_dir, 
                         device, 
                         False, 
                         env_kwargs=env_kwargs1)

    envs2 = make_vec_envs(args.env_name,
                         args.seed,
                         args.num_envs2,
                         args.gamma, 
                         args.log_dir, 
                         device, 
                         False, 
                         env_kwargs=env_kwargs2)


    # agly ways to access to the environment attirubutes
    if args.env_name.find('doorenv')>-1:
        visionnet_input = envs1.venv.venv.visionnet_input
        nn = envs1.venv.venv.nn
        env_name = envs1.venv.venv.xml_path
            
        dummy_obs = np.zeros(nn*2+3)
    else:
        dummy_obs = envs1.observation_space
        visionnet_input = None
        nn = None

    if pretrained_policy_load:
        print("loading", pretrained_policy_load)
        actor_critic, ob_rms = torch.load(pretrained_policy_load)
    else:
        actor_critic = Policy_av(
            dummy_obs.shape,
            envs1.action_space,
            base_kwargs={'recurrent': args.recurrent_policy})

        # actor_critic = Policy(
        #     dummy_obs.shape,
        #     envs1.action_space,
        #     base_kwargs={'recurrent': args.recurrent_policy})
    
    if visionnet_input: 
        raise NotImplementedError
        visionmodel = load_visionmodel(env_name, args.visionmodel_path, VisionModelXYZ())  
        actor_critic.visionmodel = visionmodel.eval()

    actor_critic.nn = nn
    actor_critic.to(device)

    #disable normalizer
    vec_norm1 = get_vec_normalize(envs1)
    vec_norm1.eval()
    vec_norm2 = get_vec_normalize(envs2)
    vec_norm2.eval()
    
    # Create two agents (one for each domain)
    params1 = [{'params': actor_critic.base.actor1.parameters()}, 
    {'params': actor_critic.base.critic1.parameters()}, 
    {'params': actor_critic.base.critic_linear1.parameters()},
    {'params': actor_critic.base.fc_mean1.parameters()},
    {'params': actor_critic.base.logstd1.parameters()}]

    params2 = [{'params': actor_critic.base.actor2.parameters()}, 
    {'params': actor_critic.base.critic2.parameters()}, 
    {'params': actor_critic.base.critic_linear2.parameters()},
    {'params': actor_critic.base.fc_mean2.parameters()},
    {'params': actor_critic.base.logstd2.parameters()}]

    # params1 = None
    # params2 = None

    agent1 = algo.PPO(
        actor_critic,
        args.clip_param,
        args.ppo_epoch,
        args.num_mini_batch,
        args.value_loss_coef,
        args.entropy_coef,
        lr=args.lr,
        eps=args.eps,
        max_grad_norm=args.max_grad_norm,
        optim_params = params1)

    agent2 = algo.PPO(
        actor_critic,
        args.clip_param,
        args.ppo_epoch,
        args.num_mini_batch,
        args.value_loss_coef,
        args.entropy_coef,
        lr=args.lr,
        eps=args.eps,
        max_grad_norm=args.max_grad_norm,
        optim_params = params2)


    # Rollout storage for each domain
    rollouts1 = RolloutStorage(args.num_steps, args.num_envs1,
                              dummy_obs.shape, envs1.action_space,
                              actor_critic.recurrent_hidden_state_size)

    rollouts2 = RolloutStorage(args.num_steps, args.num_envs2,
                              dummy_obs.shape, envs2.action_space,
                              actor_critic.recurrent_hidden_state_size)


    full_obs1 = envs1.reset()
    initial_state1 = full_obs1[:,:envs1.action_space.shape[0]]

    full_obs2 = envs2.reset()
    initial_state2 = full_obs2[:,:envs2.action_space.shape[0]]

    if args.env_name.find('doorenv')>-1 and visionnet_input:
        obs1 = actor_critic.obs2inputs(full_obs1, 0)
        obs2 = actor_critic.obs2inputs(full_obs2, 0)
    else:
        if knob_noisy:
            obs1 = add_noise(full_obs1, 0)
            obs2 = add_noise(full_obs2, 0)
        else:
            obs1 = full_obs1
            obs2 = full_obs2

    rollouts1.obs[0].copy_(obs1)
    rollouts1.to(device)

    rollouts2.obs[0].copy_(obs2)
    rollouts2.to(device)

    episode_rewards1 = deque(maxlen=10)
    episode_rewards2 = deque(maxlen=10)

    start = time.time()
    num_updates = int(
        args.num_env_steps) // args.num_steps // args.num_processes

    num_updates = int(num_updates/2) # Since have two domains per iteration

    best_training_reward = -np.inf

    for j in range(num_updates):

        if args.use_linear_lr_decay:
            # decrease learning rate linearly
            utils.update_linear_schedule(
                agent1.optimizer, j, num_updates, args.lr)
            utils.update_linear_schedule(
                agent2.optimizer, j, num_updates, args.lr)

        ################## Do rollouts and updates for domain 1 ##################

        pos_control = False
        total_switches = 0
        prev_selection = ""
        for step in range(args.num_steps):
            with torch.no_grad():
                value, action, action_log_prob, recurrent_hidden_states = actor_critic.act(
                    rollouts1.obs[step], rollouts1.recurrent_hidden_states[step],
                    rollouts1.masks[step])
                next_action = action 

            try:
                # print(next_action)
                full_obs, reward, done, infos = envs1.step(next_action)
            except:
                ipy.embed()

            if knob_noisy:
                obs = add_noise(full_obs, j)
            else:
                obs = full_obs

            for info in infos:
                if 'episode' in info.keys():
                    episode_rewards1.append(info['episode']['r'])

            masks = torch.FloatTensor(
                [[0.0] if done_ else [1.0] for done_ in done])
            bad_masks = torch.FloatTensor(
                [[0.0] if 'bad_transition' in info.keys() else [1.0]
                 for info in infos])
            rollouts1.insert(obs, recurrent_hidden_states, action,
                            action_log_prob, value, reward, masks, bad_masks)
            
        with torch.no_grad():
            next_value = actor_critic.get_value(
                rollouts1.obs[-1], rollouts1.recurrent_hidden_states[-1],
                rollouts1.masks[-1]).detach()

        rollouts1.compute_returns(next_value, args.use_gae, args.gamma,
                                 args.gae_lambda, args.use_proper_time_limits)

        value_loss, action_loss, dist_entropy = agent1.update(rollouts1)
        rollouts1.after_update()
        value_loss1 = value_loss
        action_loss1 = action_loss
        dist_entropy1 = dist_entropy

        ################## Do rollouts and updates for domain 2 ##################

        pos_control = False
        total_switches = 0
        prev_selection = ""
        for step in range(args.num_steps):
            with torch.no_grad():
                value, action, action_log_prob, recurrent_hidden_states = actor_critic.act(
                    rollouts2.obs[step], rollouts2.recurrent_hidden_states[step],
                    rollouts2.masks[step])
                next_action = action 

            try:
                # print(next_action)
                full_obs, reward, done, infos = envs2.step(next_action)
            except:
                ipy.embed()

            if knob_noisy:
                obs = add_noise(full_obs, j)
            else:
                obs = full_obs

            for info in infos:
                if 'episode' in info.keys():
                    episode_rewards2.append(info['episode']['r'])

            masks = torch.FloatTensor(
                [[0.0] if done_ else [1.0] for done_ in done])
            bad_masks = torch.FloatTensor(
                [[0.0] if 'bad_transition' in info.keys() else [1.0]
                 for info in infos])
            rollouts2.insert(obs, recurrent_hidden_states, action,
                            action_log_prob, value, reward, masks, bad_masks)
            
        with torch.no_grad():
            next_value = actor_critic.get_value(
                rollouts2.obs[-1], rollouts2.recurrent_hidden_states[-1],
                rollouts2.masks[-1]).detach()

        rollouts2.compute_returns(next_value, args.use_gae, args.gamma,
                                 args.gae_lambda, args.use_proper_time_limits)

        value_loss, action_loss, dist_entropy = agent2.update(rollouts2)
        rollouts2.after_update()
        value_loss2 = value_loss
        action_loss2 = action_loss
        dist_entropy2 = dist_entropy

        ###################### Logs and storage ########################

        value_loss = (value_loss1 + value_loss2)/2
        action_loss = (action_loss1 + action_loss2)/2
        dist_entropy = (dist_entropy1 + dist_entropy2)/2
        episode_rewards = []
        for ii in range(len(episode_rewards1)):
            episode_rewards.append((episode_rewards1[ii]+episode_rewards2[ii])/2)
        # episode_rewards = episode_rewards1

        writer.add_scalar("Value loss", value_loss, j)
        writer.add_scalar("action loss", action_loss, j)
        writer.add_scalar("dist entropy loss", dist_entropy, j)
        writer.add_scalar("Episode rewards", np.mean(episode_rewards), j)

        if np.mean(episode_rewards) > best_training_reward:
            best_training_reward = np.mean(episode_rewards)
            current_is_best = True
        else:
            current_is_best = False

        # save for every interval-th episode or for the last epoch or for best so far
        if (j % args.save_interval == 0
                or j == num_updates - 1 or current_is_best) and args.save_dir != "":
            save_path = os.path.join(args.save_dir, args.algo)
            try:
                os.makedirs(save_path)
            except OSError:
                pass
            torch.save([
                    actor_critic,
                    None
                ], os.path.join(save_path, args.env_name + "_{}.{}.pt".format(args.save_name,j)))

            if current_is_best:
                torch.save([
                    actor_critic,
                    None
                ], os.path.join(save_path, args.env_name + "_{}.best.pt".format(args.save_name)))
            
            # torch.save([
            #     actor_critic,
            #     getattr(utils.get_vec_normalize(envs1), 'ob_rms', None)
            # ], os.path.join(save_path, args.env_name + "_{}.{}.pt".format(args.save_name,j)))

        if j % args.log_interval == 0 and len(episode_rewards) > 1:
            total_num_steps = (j + 1) * args.num_processes * args.num_steps
            end = time.time()
            print(
                "Updates {}, num timesteps {}, FPS {} \n Last {} training episodes: mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}\n"
                .format(j, total_num_steps,
                        int(total_num_steps / (end - start)),
                        len(episode_rewards), np.mean(episode_rewards),
                        np.median(episode_rewards), np.min(episode_rewards),
                        np.max(episode_rewards), dist_entropy, value_loss,
                        action_loss))

        if (args.eval_interval is not None and len(episode_rewards) > 1
                and j % args.eval_interval == 0):
            raise NotImplementedError
            ob_rms = utils.get_vec_normalize(envs).ob_rms
            evaluate(actor_critic, ob_rms, args.env_name, args.seed,
                     args.num_processes, eval_log_dir, device)

        DR=False # True #Domain Randomization
        ################## for multiprocess world change ######################
        if DR:
            raise NotImplementedError

            print("changing world")

            envs.close_extras()
            envs.close()
            del envs

            envs = make_vec_envs_domains(args.env_name,
                         args.seed,
                         args.num_processes,
                         args.gamma, 
                         args.log_dir, 
                         device, 
                         False, 
                         env_kwargs1=env_kwargs1,
                         env_kwargs2=env_kwargs2)

            full_obs = envs.reset()
            if args.env_name.find('doorenv')>-1 and visionnet_input:
                obs = actor_critic.obs2inputs(full_obs, j)
            else:
                obs = full_obs