예제 #1
0
    def __init__(self, ga):
        # ga reference
        self.ga = ga

        # compressor model
        self.vae = ga.compressor

        # controller model; trained on the go
        self.controller = Controller(ga.input_size, ga.output_size).cuda()
예제 #2
0
def get_player(current_time,
               version,
               file_model,
               solver_version=None,
               sequence=1):
    """ Load the models of a specific player """

    path = os.path.join(os.path.dirname(os.path.realpath(__file__)), \
                            '..', 'saved_models', str(current_time))
    try:
        mod = os.listdir(path)
        models = list(filter(lambda model: (model.split('-')[0] == str(version) \
                        and file_model in model), mod))
        models.sort()
        if len(models) == 0:
            return False, version
    except FileNotFoundError:
        return False, version

    if file_model == "vae":
        model = ConvVAE((HEIGHT, WIDTH, 3), LATENT_VEC).to(DEVICE)
    elif file_model == "lstm":
        model = LSTM(sequence, HIDDEN_UNITS, LATENT_VEC,\
                     NUM_LAYERS, GAUSSIANS, HIDDEN_DIM).to(DEVICE)
    elif file_model == "controller":
        model = Controller(PARAMS_CONTROLLER, ACTION_SPACE).to(DEVICE)

    checkpoint = load_torch_models(path, model, models[0])
    if file_model == "controller":
        file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), \
                    '..', 'saved_models', current_time, "{}-solver.pkl".format(solver_version))
        solver = pickle.load(open(file_path, 'rb'))
        return checkpoint, model, solver
    return model, checkpoint
예제 #3
0
class RolloutGenerator(object):
    def __init__(self, ga):
        # ga reference
        self.ga = ga

        # compressor model
        self.vae = ga.compressor

        # controller model; trained on the go
        self.controller = Controller(ga.input_size, ga.output_size).cuda()

    def get_action(self, obs, bodystate, brushstate, pulse):
        bodystate_comp = torch.cat(
            (bodystate, brushstate,
             pulse)) if self.ga.cpg_enabled else torch.cat(
                 (bodystate, brushstate))
        latent_mu, _ = self.vae.cuda().encoder(obs.cuda())
        action = self.controller.cuda().forward(
            latent_mu.flatten(),
            bodystate_comp.cuda().flatten())

        return action.squeeze().cpu().numpy()

    def do_rollout(self, generation, id, early_termination=True):
        with torch.no_grad():
            client = Client(ClientType.ROLLOUT, self.ga.obs_size)
            client.start(generation, id, rollout=self)
예제 #4
0
def init_models(current_time, load_vae=False, load_lstm=False, load_controller=True, sequence=SEQUENCE):

    vae = lstm = best_controller = solver = None
    if load_vae:
        vae, checkpoint = load_model(current_time, -1, model="vae")
        if not vae:
            vae = ConvVAE((HEIGHT, WIDTH, 3), LATENT_VEC).to(DEVICE)
    
    if load_lstm:
        lstm, checkpoint = load_model(current_time, -1, model="lstm", sequence=sequence)
        if not lstm:
            lstm = LSTM(sequence, HIDDEN_UNITS, LATENT_VEC,\
                        NUM_LAYERS, GAUSSIANS, HIDDEN_DIM).to(DEVICE)

    if load_controller:    
        res = load_model(current_time, -1, model="controller")
        checkpoint = res[0]
        if len(res) > 2:
            best_controller = res[1]
            solver = res[2]
            current_ctrl_version = checkpoint['version']
        else:
            best_controller = Controller(LATENT_VEC, PARAMS_FC1, ACTION_SPACE).to(DEVICE)
            solver = CMAES(PARAMS_FC1 + LATENT_VEC + 512,
                        sigma_init=SIGMA_INIT,
                        popsize=POPULATION)

    return vae, lstm, best_controller, solver, checkpoint
예제 #5
0
    def init(self):
        # get the models
        self.models = {
            json_model["name"]: Model(json_data=json_model)
            for json_model in self.get_data(self.models_endpoint)
        }
        log_str = "Loaded " + str(len(self.models)) + " models: " + str(
            [model.name for model in self.models.values()])
        self.logs.append({
            "ts": time.time(),
            "date": str(datetime.datetime.now()),
            "msg": log_str
        })

        # get the containers
        self.containers = [
            Container(json_data=json_container)
            for json_container in self.get_data(self.containers_endpoint)
        ]

        # group containers by nodes
        self.nodes = set(map(lambda c: c.node, self.containers))
        self.containers_on_node = {}
        for node in self.nodes:
            self.containers_on_node[node] = list(
                filter(lambda c: c.node == node, self.containers))
        log_str = "Containers by node: " + str([{
            node: [c.to_json() for c in self.containers_on_node[node]]
        } for node in self.containers_on_node])
        self.logs.append({
            "ts": time.time(),
            "date": str(datetime.datetime.now()),
            "msg": log_str
        })

        # init controllers
        self.controllers = []
        t = time.time()
        for container in list(
                filter(lambda c: c.device == Device.CPU and c.active,
                       self.containers)):
            c = Controller(container)
            c.next_action = t
            self.controllers.append(c)
예제 #6
0
	transforms.Resize((RED_SIZE, RED_SIZE)),
	transforms.ToTensor(),
])


trained=0
#model = VAE(3, LSIZE).to(device)
model=VAE(3, LSIZE)
model=torch.nn.DataParallel(model,device_ids=range(8))
model.cuda()
optimizer = optim.Adam(model.parameters(),lr=learning_rate,betas=(0.9,0.999))
model_p=VAE_a(7, LSIZE)
model_p=torch.nn.DataParallel(model_p,device_ids=range(8))
model_p.cuda()
optimizer_p = optim.Adam(model_p.parameters(),lr=learning_rate,betas=(0.9,0.999))
controller=Controller(LSIZE,3)
controller=torch.nn.DataParallel(controller,device_ids=range(8))
controller=controller.cuda()
optimizer_a = optim.SGD(controller.parameters(),lr=learning_rate*10)
# scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5)
# earlystopping = EarlyStopping('min', patience=30)

vis = visdom.Visdom(env='pa_train')

current_window = vis.image(
	np.random.rand(64, 64),
	opts=dict(title='current!', caption='current.'),
)
recon_window = vis.image(
	np.random.rand(64, 64),
	opts=dict(title='Reconstruction!', caption='Reconstruction.'),
         A.RGBShift(p=1),
         A.RandomBrightness(p=1),
         A.RandomContrast(p=1)
     ],
             p=0.5),
     A.OneOf([A.ElasticTransform(p=1.0),
              A.IAAPiecewiseAffine(p=1.0)],
             p=0.5),
     A.Normalize(p=1.0),
     pytorch.ToTensorV2(),
 ])
 val_transform = A.Compose([
     A.Resize(height=512, width=512, p=1.0),
     A.Normalize(p=1.0),
     pytorch.ToTensorV2(),
 ])
 train_loader, val_loader = generate_train_validation_dataloader(
     train_df, val_df, config["train_parameters"]["batch_size"],
     "data/data/images/", train_transform, val_transform)
 EF_Net = TL_EfficientNet(config["network_parameters"], True).to(device)
 Optimizer = optim.Adam(EF_Net.parameters(),
                        lr=config["train_parameters"]["learning_rate"])
 Loss = nn.CrossEntropyLoss()
 Control = Controller(model=EF_Net,
                      optimizer=Optimizer,
                      loss=Loss,
                      train_data=train_loader,
                      val_data=val_loader,
                      epochs=config["train_parameters"]["epochs"],
                      device=device)
 Control.train()
예제 #8
0
def main():
    global args

    np.random.seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    if args.fixed_arc:
        sys.stdout = Logger(filename='logs/' + args.output_filename + '_fixed.log')
    else:
        sys.stdout = Logger(filename='logs/' + args.output_filename + '.log')

    print(args)

    data_loaders = load_datasets()

    controller = Controller(search_for=args.search_for,
                            search_whole_channels=True,
                            num_layers=args.child_num_layers,
                            num_branches=args.child_num_branches,
                            out_filters=args.child_out_filters,
                            lstm_size=args.controller_lstm_size,
                            lstm_num_layers=args.controller_lstm_num_layers,
                            tanh_constant=args.controller_tanh_constant,
                            temperature=None,
                            skip_target=args.controller_skip_target,
                            skip_weight=args.controller_skip_weight)
    controller = controller.cuda()

    shared_cnn = SharedCNN(num_layers=args.child_num_layers,
                           num_branches=args.child_num_branches,
                           out_filters=args.child_out_filters,
                           keep_prob=args.child_keep_prob)
    shared_cnn = shared_cnn.cuda()

    # https://github.com/melodyguan/enas/blob/master/src/utils.py#L218
    controller_optimizer = torch.optim.Adam(params=controller.parameters(),
                                            lr=args.controller_lr,
                                            betas=(0.0, 0.999),
                                            eps=1e-3)

    # https://github.com/melodyguan/enas/blob/master/src/utils.py#L213
    shared_cnn_optimizer = torch.optim.SGD(params=shared_cnn.parameters(),
                                           lr=args.child_lr_max,
                                           momentum=0.9,
                                           nesterov=True,
                                           weight_decay=args.child_l2_reg)

    # https://github.com/melodyguan/enas/blob/master/src/utils.py#L154
    shared_cnn_scheduler = CosineAnnealingLR(optimizer=shared_cnn_optimizer,
                                             T_max=args.child_lr_T,
                                             eta_min=args.child_lr_min)

    if args.resume:
        if os.path.isfile(args.resume):
            print("Loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            start_epoch = checkpoint['epoch']
            # args = checkpoint['args']
            shared_cnn.load_state_dict(checkpoint['shared_cnn_state_dict'])
            controller.load_state_dict(checkpoint['controller_state_dict'])
            shared_cnn_optimizer.load_state_dict(checkpoint['shared_cnn_optimizer'])
            controller_optimizer.load_state_dict(checkpoint['controller_optimizer'])
            shared_cnn_scheduler.optimizer = shared_cnn_optimizer  # Not sure if this actually works
            print("Loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        else:
            raise ValueError("No checkpoint found at '{}'".format(args.resume))
    else:
        start_epoch = 0

    if not args.fixed_arc:
        train_enas(start_epoch,
                   controller,
                   shared_cnn,
                   data_loaders,
                   shared_cnn_optimizer,
                   controller_optimizer,
                   shared_cnn_scheduler)
    else:
        assert args.resume != '', 'A pretrained model should be used when training a fixed architecture.'
        train_fixed(start_epoch,
                    controller,
                    shared_cnn,
                    data_loaders)
import numpy as np
from models.model import TL_ResNet50
from utils.utils import get_device, read_parameters, separate_train_val
from models.controller import Controller
from PIL import Image

def load_image(image_name, transformer=None):
    image = Image.open(image_name)
    if transformer == None:
        image = np.array(image).transpose(2, 0, 1)/255
        image = image.astype(np.float32)
    else:
        image = transformer(image)
    return image

def evaluate(model, data):
    out = model(data)
    return out

if __name__ == "__main__":
    configs = read_parameters()
    device = get_device
    ResNet50 = TL_ResNet50(configs["network_parameters"], pretrained = True).to(device)
    Control = Controller(ResNet50)
    Control.load()
    ResNet50.eval()
예제 #10
0
trained = 0
#model = VAE(3, LSIZE).to(device)
model = VAE(3, LSIZE)
model = torch.nn.DataParallel(model, device_ids=range(8))
model.cuda()
optimizer = optim.Adam(model.parameters(),
                       lr=learning_rate,
                       betas=(0.9, 0.999))
model_p = VAE_a(7, LSIZE)
model_p = torch.nn.DataParallel(model_p, device_ids=range(8))
model_p.cuda()
optimizer_p = optim.Adam(model_p.parameters(),
                         lr=learning_rate,
                         betas=(0.9, 0.999))
controller = Controller(LSIZE, 4)
controller = torch.nn.DataParallel(controller, device_ids=range(8))
controller = controller.cuda()
optimizer_a = optim.Adam(controller.parameters(),
                         lr=learning_rate,
                         betas=(0.9, 0.999))
# scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5)
# earlystopping = EarlyStopping('min', patience=30)

vis = visdom.Visdom(env='pa_train')

current_window = vis.image(
    np.random.rand(64, 64),
    opts=dict(title='current!', caption='current.'),
)
recon_window = vis.image(
예제 #11
0
validation_split = 0.1
shuffle_dataset = True
test_split = 0.1
random_seed = 42
class_num = 10

# LOSS = FocalLoss()
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

controller = Controller(search_for=args.search_for,
                            search_whole_channels=True,
                            num_layers=args.child_num_layers,
                            num_branches=args.child_num_branches,
                            out_filters=args.child_out_filters,
                            lstm_size=args.controller_lstm_size,
                            lstm_num_layers=args.controller_lstm_num_layers,
                            tanh_constant=args.controller_tanh_constant,
                            temperature=None,
                            skip_target=args.controller_skip_target,
                            skip_weight=args.controller_skip_weight)
controller = controller.cuda()

shared_cnn = SharedCNN(num_layers=args.child_num_layers,
                           num_branches=args.child_num_branches,
                           out_filters=args.child_out_filters,
                           keep_prob=args.child_keep_prob)
shared_cnn = shared_cnn.cuda()

controller.eval()
controller()
예제 #12
0
  if (seed >= 0):
    env.seed(seed)

  return env
transform = transforms.Compose([
  transforms.ToPILImage(),
  transforms.Resize((64, 64)),
  # transforms.RandomHorizontalFlip(),
  transforms.ToTensor(),
])
# from https://github.com/openai/gym/blob/master/gym/envs/box2d/car_racing.py
if __name__=="__main__":
  model=VAE(3, 64)
  model=torch.nn.DataParallel(model,device_ids=range(1))
  model.cuda()
  controller=Controller(64,3)
  controller=torch.nn.DataParallel(controller,device_ids=range(1))
  controller=controller.cuda()
  state = torch.load('/home/ld/gym-car/log/vae/contorl_checkpoint_52.pkl')
  controller.load_state_dict(state['state_dict'])
  print('contorller load success')
  state = torch.load('/home/ld/gym-car/log/vae/vae_checkpoint_52.pkl')
  model.load_state_dict(state['state_dict'])
  print('vae load success')
  model_p=VAE_a(7, 64)
  model_p=torch.nn.DataParallel(model_p,device_ids=range(1))
  model_p.cuda()
  state = torch.load('/home/ld/gym-car/log/vae/pre_checkpoint_52.pkl')
  model_p.load_state_dict(state['state_dict'])
  print('prediction load success')
def train_controller(current_time):
    """
    Train the controllers by using the CMA-ES algorithm to improve candidature solutions
    by testing them in parallel using multiprocessing
    """

    current_time = str(current_time)
    number_generations = 1
    games = GAMES
    levels = LEVELS
    current_game = False
    result_queue = Queue()

    vae, lstm, best_controller, solver, checkpoint = init_models(
        current_time,
        sequence=1,
        load_vae=True,
        load_controller=True,
        load_lstm=True)
    if checkpoint:
        current_ctrl_version = checkpoint["version"]
        current_solver_version = checkpoint["solver_version"]
        new_results = solver.result()
        current_best = new_results[1]
    else:
        current_ctrl_version = 1
        current_solver_version = 1
        current_best = 0

    while True:
        solutions = solver.ask()
        fitlist = np.zeros(POPULATION)
        eval_left = 0

        ## Once a level is beaten, remove it from the training set of levels
        if current_best > SCORE_CAP or not current_game:
            if not current_game or len(levels[current_game]) == 0:
                current_game = games[0]
                games.remove(current_game)
                current_best = 0
            current_level = np.random.choice(levels[current_game])
            levels[current_game].remove(current_level)

        print("[CONTROLLER] Current game: %s and level is: %s" %
              (current_game, current_level))
        while eval_left < POPULATION:
            jobs = []
            todo = PARALLEL if eval_left + PARALLEL <= POPULATION else (
                eval_left + PARALLEL) % POPULATION

            ## Create the child processes to evaluate in parallel
            print("[CONTROLLER] Starting new batch")
            for job in range(todo):
                process_id = eval_left + job

                ## Assign new weights to the controller, given by the CMA
                controller = Controller(LATENT_VEC, PARAMS_FC1,
                                        ACTION_SPACE).to(DEVICE)
                init_controller(controller, solutions[process_id])

                ## Start the evaluation
                new_game = VAECGame(process_id, vae, lstm, controller,
                                    current_game, current_level, result_queue)
                new_game.start()
                jobs.append(new_game)

            ## Wait for the evaluation to be completed
            for p in jobs:
                p.join()

            eval_left = eval_left + todo
            print("[CONTROLLER] Done with batch")

        ## Get the results back from the processes
        times = create_results(result_queue, fitlist)

        ## For display
        current_score = np.max(fitlist)
        average_score = np.mean(fitlist)

        ## Update solver with results
        max_idx = np.argmax(fitlist)
        fitlist = rankmin(fitlist)
        solver.tell(fitlist)
        new_results = solver.result()

        ## Display
        print("[CONTROLLER] Total duration for generation: %.3f seconds, average duration:"
            " %.3f seconds per process, %.3f seconds per run" % ((np.sum(times), \
                    np.mean(times), np.mean(times) / REPEAT_ROLLOUT)))
        print("[CONTROLLER] Creating generation: {} ...".format(
            number_generations + 1))
        print("[CONTROLLER] Current best score: {}, new run best score: {}".
              format(current_best, current_score))
        print(
            "[CONTROLLER] Best score ever: {}, current number of improvements: {}"
            .format(current_best, current_ctrl_version))
        print(
            "[CONTROLLER] Average score on all of the processes: {}\n".format(
                average_score))

        ## Save the new best controller
        if current_score > current_best:
            init_controller(best_controller, solutions[max_idx])
            state = {
                'version': current_ctrl_version,
                'solver_version': current_solver_version,
                'score': current_score,
                'level': current_level,
                'game': current_game,
                'generation': number_generations
            }
            save_checkpoint(best_controller, "controller", state, current_time)
            current_ctrl_version += 1
            current_best = current_score

        ## Save solver and change level to a random one
        if number_generations % SAVE_SOLVER_TICK == 0:
            dir_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), \
                        'saved_models', current_time, "{}-solver.pkl".format(current_solver_version))
            pickle.dump(solver, open(dir_path, 'wb'))
            current_solver_version += 1
            current_level = np.random.choice(levels[current_game])

        number_generations += 1
trained = 0
#model = VAE(3, LSIZE).to(device)
model = VAE(3, LSIZE)
model = torch.nn.DataParallel(model, device_ids=range(7))
model.cuda()
optimizer = optim.Adam(model.parameters(),
                       lr=learning_rate,
                       betas=(0.9, 0.999))
model_p = VAE_a(7, LSIZE)
model_p = torch.nn.DataParallel(model_p, device_ids=range(7))
model_p.cuda()
optimizer_p = optim.Adam(model_p.parameters(),
                         lr=learning_rate,
                         betas=(0.9, 0.999))
controller = Controller(LSIZE, 3)
controller = torch.nn.DataParallel(controller, device_ids=range(7))
controller = controller.cuda()
optimizer_a = optim.Adam(controller.parameters(),
                         lr=learning_rate,
                         betas=(0.9, 0.999))
# scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5)
# earlystopping = EarlyStopping('min', patience=30)

vis = visdom.Visdom(env='pa_train')

current_window = vis.image(
    np.random.rand(64, 64),
    opts=dict(title='current!', caption='current.'),
)
recon_window = vis.image(
예제 #15
0
     A.Normalize(p = 1.0),
     pytorch.ToTensorV2(),
 ])
 KF = StratifiedKFold()
 for train, val in KF.split(df["labels"].values, df["classes"].values):
     x_train, x_val = np.array(df["labels"].values)[train], np.array(df["labels"].values)[val]
     y_train, y_val = np.array(df["classes"].values)[train], np.array(df["classes"].values)[val],
     train_loader, val_loader = generate_train_validation_dataloader(x_train, 
                                                                     y_train,
                                                                     transform,
                                                                     x_val,
                                                                     y_val,
                                                                     transform_val_test
                                                                     configs["train_parameters"]["batch_size"], 
                                                                     "data/images/")
     EFNet = EF_Net().to(device)
     Loss = nn.CrossEntropyLoss()
     Optimizer = optim.Adam(EFNet.parameters(),
                         lr = configs["train_parameters"]["learning_rate"])
     Stepper = optim.lr_scheduler.ReduceLROnPlateau(Optimizer, patience = configs["train_parameters"]["patience"])
     Metrics = Accuracy_Metric()
     Control = Controller(model = EFNet,
                 optimizer = Optimizer,
                 loss = Loss,
                 metric = Metrics,
                 train_data = train_gen,
                 validation_data = val_gen,
                 epochs = configs["train_parameters"]["epochs"],
                 device = device,
                 lr_scheduler = Stepper)
     Control.train()
예제 #16
0
    def run(self, max_generations, folder, ga_id='', init_solution_id=''):
        if (ga_id == ''):
            ga_id = self.init_time

        # disk
        results_dir = os.path.join(folder, ga_id)
        if not os.path.exists(results_dir):
            os.makedirs(results_dir)

        fitness_path = os.path.join(
            results_dir, 'fitness.txt'
        )  # most important fitness results per run (for plotting)
        ind_fitness_path = os.path.join(
            results_dir,
            'ind_fitness.txt')  # more detailed fitness results per individual
        solver_path = os.path.join(
            results_dir, "solver.pkl")  # contains the current population
        best_solver_path = os.path.join(
            results_dir, "best_solver.pkl")  # contains the current population
        init_solution_path = os.path.join(
            os.path.join(folder, init_solution_id),
            "solver.pkl")  # path to initial solution solver

        current_generation = 0
        P = self.P
        best_f = -sys.maxsize

        # initialize controller instance to be saved
        from models.controller import Controller
        best_controller = Controller(P[0].input_size, P[0].output_size)

        # initialize cma es (start from scratch or load previously saved solver/population)
        resume = False
        if os.path.exists(solver_path):
            resume = True
            self.solver = pickle.load(open(solver_path, 'rb'))
            new_results = self.solver.result()
            best_f = new_results[1]

            if os.path.exists(fitness_path):
                with open(fitness_path, 'r') as f:
                    lines = f.read().splitlines()
                    last_line = lines[-1]
                    current_generation = int(last_line.split('/')[0])
        # start from scratch but with an initial solution param
        elif os.path.exists(init_solution_path):
            tmp_solver = pickle.load(open(init_solution_path, 'rb'))
            self.solver = CMAES(num_params=self.num_controller_params,
                                solution_init=tmp_solver.best_param(),
                                sigma_init=0.1,
                                popsize=self.pop_size)
        # completely start from scratch
        else:
            self.solver = CMAES(num_params=self.num_controller_params,
                                sigma_init=0.1,
                                popsize=self.pop_size)

        if not resume:
            with open(fitness_path, 'a') as file:
                file.write('gen/avg/cur/best\n')
            with open(ind_fitness_path, 'a') as file:
                file.write(
                    'gen/id/fitness/coverage/coverageReward/IC/PC/PCt0/PCt1\n')

        while current_generation < max_generations:

            fitness = np.zeros(self.pop_size)
            results_full = np.zeros(self.pop_size)

            print(f'Generation {current_generation}')
            print(f'Evaluating individuals: {len(P)}')

            # ask the ES to give us a set of candidate solutions
            solutions = self.solver.ask()

            # evaluate all candidates
            for i, s in enumerate(P):
                set_controller_weights(s.controller, solutions[i])
                s.run_solution(generation=current_generation, local_id=i)

            # request fitness from simulator
            results_full = Client(ClientType.REQUEST).start()
            fitness = results_full[:, 0]

            for i, s in enumerate(P):
                s.fitness = fitness[i]

            current_f = np.max(fitness)
            average_f = np.mean(fitness)
            print(
                f'Current best: {current_f}\nCurrent average: {average_f}\nAll-time best: {best_f}'
            )

            # return rewards to ES for param update
            self.solver.tell(fitness)

            max_index = np.argmax(fitness)
            new_results = self.solver.result()

            # process results
            pickle.dump(self.solver, open(solver_path, 'wb'))
            if current_f > best_f:
                set_controller_weights(best_controller, solutions[max_index])
                torch.save(best_controller,
                           os.path.join(results_dir, 'best_controller.pth'))

                # Save solver and change level to a random one
                pickle.dump(self.solver, open(best_solver_path, 'wb'))
                best_f = current_f

            for i, s in enumerate(P):
                # fitness/coverage/coverageReward/IC/PC/PCt0/PCt1
                res = results_full[i, :]
                res_str = ('/'.join(['%.6f'] * len(res))) % tuple(res)

                with open(ind_fitness_path, 'a') as file:
                    file.write('%d/%d/%s\n' % (current_generation, i, res_str))

            res_str = '%d/%f/%f/%f' % (current_generation, average_f,
                                       current_f, best_f)
            print(f'gen/avg/cur/best : {res_str}')
            with open(fitness_path, 'a') as file:
                file.write(f'{res_str}\n')

            if (i > max_generations):
                break

            gc.collect()
            current_generation += 1

        print('Finished')
예제 #17
0
m_optimizer = torch.optim.RMSprop(m_model.parameters(), lr=1e-3,
                                  alpha=.9)  # pipaek : hardmaru 는 lr=1e-4 이다.
m_scheduler = ReduceLROnPlateau(m_optimizer, 'min', factor=0.5, patience=5)
m_earlystopping = EarlyStopping('min', patience=30)  # patience 30 -> 5

# 3-3. MDN-RNN 모델(M) 훈련
m_model_train_proc(rnn_dir,
                   m_model,
                   v_model,
                   m_dataset_train,
                   m_dataset_test,
                   m_optimizer,
                   m_scheduler,
                   m_earlystopping,
                   skip_train=True,
                   max_train_epochs=30)
m_model_cell = get_mdrnn_cell(rnn_dir).to(device)

# 4-1. Controller 모델(C) 생성
controller = Controller(LSIZE, RSIZE, ASIZE).to(device)

# 4-2. Controller 모델(C) 훈련
controller_train_proc(ctrl_dir,
                      controller,
                      v_model,
                      m_model_cell,
                      skip_train=False)

# 4-3. Controller 모델(C) 시연 (optional)
controller_test_proc(controller, v_model, m_model_cell)