Beispiel #1
0
def main():
    args = parse()
    save_model = Saver(args.modelfolder)
    use_cuda = args.cuda

    model_G = SpeechEggEncoder()
    model_D = Discriminator()
    model_G, model_D, _, _, _, _ = save_model.load_checkpoint(
        model_G, model_D, file_name=args.modelfile)

    speechfiles = glob(os.path.join(args.speechfolder, "*.npy"))
    eggfiles = glob(os.path.join(args.eggfolder, "*.npy"))
    reconstruction_save_path = args.outputfolder

    test_data = AudioFileDataset(speechfiles,
                                 eggfiles,
                                 args.window,
                                 args.stride,
                                 transform=detrend)
    test_dataloader = DataLoader(test_data, 1, num_workers=4, shuffle=False)

    os.makedirs(reconstruction_save_path, exist_ok=True)

    for egg_reconstructed, f in test(model_G,
                                     model_D,
                                     test_dataloader,
                                     args.window,
                                     args.stride,
                                     use_cuda=use_cuda):
        outputfile = os.path.join(reconstruction_save_path, f[0])

        np.save(outputfile, egg_reconstructed)
Beispiel #2
0
    def saveLabels(self, dirname):
        if not dirname:
            return
        saver = Saver()
        shapes = [item.shape() for item in self.allLabelList]
        saver.saveLabels(dirname, shapes, self.loader.image_path)

        self.saver = saver
        self.setClean()
def expo(args):
    def filename_fn(args):
        rs = 'N({}, {})'.format(args.radius, args.sigma)
        return rs

    def fpath(fname):
        _fpath = os.path.join(args.output_dir, fname)
        return _fpath

    length = 5 * args.radius
    linspace, data = SyntheticDataset.grid_data(args.num_points, length=length)

    #    loader = dataset[args.dataset](args)
    #    trainData = loader.train
    #    for batch_idx, samples in enumerate(trainData):
    #        data,labels = samples[DatasetType.InD]

    plt.xlim(-1 * length, length)
    plt.ylim(-1 * length, length)

    for scale in tqdm([1, 2, 3, 4]):
        sigma = scale * args.sigma

        scale_args = deepcopy(args)
        scale_args.sigma = sigma
        fname = filename_fn(scale_args)

        checkpoint_dir = os.path.join(args.work_dir, 'checkpoints')
        saver = Saver(checkpoint_dir)  # makes directory if already not present
        payload = saver.load(hash_args(
            scale_args))  #hash_args(scale_args) generates the hex string

        def run_and_save(scale_args):
            export = main(scale_args)  #Model creation??

            payload = export['model']
            saver.save(hash_args(scale_args), payload)
            return payload

        export = payload or run_and_save(scale_args)

        with torch.no_grad():
            scores = inference(export, data)
            np_x = data.cpu().numpy()
            for key in scores:
                score = scores[key].cpu().numpy()
                plot_pcolormesh(np_x, linspace, score)
                score_fname = '{}_{}'.format(fname, key)
                plt.title(score_fname)
                flush_plot(plt, fpath(score_fname) + '.png')
def main(args):
    tf = '_tf' if args.tf else ''
    run_dir = os.path.join(
        'results/', args.config + tf + '_%.4f' % args.lambda_ient +
        '_%.4f' % args.lambda_tent)
    word_dict = json.load(open(args.data + '/word_dict.json', 'r'))
    vocabulary_size = len(word_dict)
    encoder = Encoder(args.network, args.config)
    decoder = Decoder(vocabulary_size, encoder.dim, args.tf)
    optimizer = optim.Adam(decoder.parameters(), lr=args.lr)
    scheduler = optim.lr_scheduler.StepLR(optimizer, args.step_size)
    encoder.cuda()
    decoder.cuda()
    cross_entropy_loss = nn.CrossEntropyLoss().cuda()
    saver = Saver(Trainer(encoder, decoder, optimizer, scheduler), run_dir)
    writer = SummaryWriter(saver.log_dir)
    train_loader = torch.utils.data.DataLoader(ImageCaptionDataset(
        data_transforms, args.data),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=1)

    val_loader = torch.utils.data.DataLoader(ImageCaptionDataset(
        data_transforms, args.data, split_type='val'),
                                             batch_size=args.batch_size,
                                             shuffle=True,
                                             num_workers=1)

    test_loader = torch.utils.data.DataLoader(ImageCaptionDataset(
        data_transforms, args.data, split_type='test'),
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=1)

    print('Starting training with {}'.format(args))
    for epoch in range(saver.start_epoch, args.epochs + 1):
        train(epoch, encoder, decoder, optimizer, cross_entropy_loss,
              train_loader, word_dict, args.alpha_c, args.log_interval, writer,
              saver, val_loader, args)
        saver.save_model(epoch)
        validate(epoch, encoder, decoder, cross_entropy_loss, val_loader,
                 word_dict, args.alpha_c, args.log_interval, writer, saver)
        test(epoch, encoder, decoder, cross_entropy_loss, test_loader,
             word_dict, args.alpha_c, args.log_interval, writer, saver)
        old_lr = optimizer.param_groups[0]['lr']
        scheduler.step()
        lr = optimizer.param_groups[0]['lr']
        print('learning rate %.7f -> %.7f' % (old_lr, lr))
    writer.close()
Beispiel #5
0
 def loadJson(self, file):
     if file:
         with open(file, encoding='UTF-8') as f:
             j = json.load(f, strict=False)
         if str.endswith(file, 'tags.json'):
             self.colorTableWidget.loadFromJson(j)
             self._config['tags'] = j
         else:
             self.clearLabels()
             self.view.clear()
             saver = Saver()
             shapes = saver.loadLabels(j)
             [self.addLabel(s) for s in shapes]
             for canvas in self.view:
                 canvas.updateToCenter()
         self._json_file = file
Beispiel #6
0
 def __init__(self, device):
     model_path = 'weights/itemscorer_action_generator_32'
     encoder_hidden_dim = 32
     self.model = ItemScorerModel(device=device,
                                  encoder_hidden_dim=encoder_hidden_dim,
                                  linear_hidden_dim=encoder_hidden_dim)
     self.saver = Saver(model=self.model,
                        load_pretrained=True,
                        pretrained_model_path=_FILE_PREFIX + model_path,
                        device=device)
Beispiel #7
0
    def __init__(self, args):
        self.args = args

        # determine the type of tensor (GPU or CPU)
        self.is_cuda = True if torch.cuda.is_available() else False
        self.Tensor = torch.cuda.FloatTensor if self.is_cuda else torch.FloatTensor

        # network initialization
        img_shape = (args.channels, args.img_size, args.img_size)
        self.generator = Generator(args, img_shape)
        self.discriminator = Discriminator(img_shape)

        # to GPU
        if self.is_cuda:
            self.generator.cuda()
            self.discriminator.cuda()

        # init dirs
        self.save_dir = os.path.join(args.exp_dir, args.exp_name)
        self.gen_dir = os.path.join(self.save_dir, args.gen_dir)
        self.result_dir = os.path.join(self.save_dir, args.result_dir)
        if not os.path.exists(self.gen_dir):
            os.makedirs(self.gen_dir)
        if not os.path.exists(self.result_dir):
            os.makedirs(self.result_dir)

        # save
        self.saver = Saver(self.save_dir)
        self.save_dict = {
            'generator': self.generator,
            'discriminator': self.discriminator,
        }

        # Info
        print('\n------ Model Info ------')
        print('amount of parameters:', self.network_paras())
        print('Using GPU:', self.is_cuda)
        print('{:=^40}'.format(' Completed '))
Beispiel #8
0
def main():
    test_data = create_dataloader(
        64,
        "CMU_new/bdl_test/speech",
        "CMU_new/bdl_test/egg_detrended",
        # "Childers/M_test/speech",
        # "Childers/M_test/egg_detrended",
        # "Temp1/speech",
        # "Temp1/egg_detrended",
        200,
        200,
        select=1,
    )

    # save_model = Saver('Models/DotModel/Childers_clean')
    save_model = Saver("checkpoints/clean300")
    use_cuda = True

    model_G = SpeechEggEncoder()
    model_D = Discriminator()
    model_G, model_D, _, _, _, _ = save_model.load_checkpoint(
        model_G, model_D, file_name="bce_epoch_45.pt")

    test(model_G, model_D, test_data, use_cuda=use_cuda)
Beispiel #9
0
                    help='Validation image path')
parser.add_argument('--val-label-path', type=str, default='/path/to/VesselNN/train/label', metavar='N',
                    help='Validation label path')

parser.add_argument('--validate', action='store_true',
                    help='validate')

# checking point
parser.add_argument('--resume', type=str, default=None,
                    help='put the path to resuming file if needed')
parser.add_argument('--checkname', type=str, default='VesselNN_Unsupervised',
                    help='set the checkpoint name')
args = parser.parse_args()

# Define Saver
saver = Saver(args)
saver.save_experiment_config()

# Define Tensorboard Summary
summary = TensorboardSummary(saver.experiment_dir)
writer = summary.create_summary()

# Data
dataset = Directory_Image_Train(images_path=args.train_images_path,
                                labels_path=args.train_labels_path,
                                data_shape=(32, 128, 128),
                                lables_shape=(32, 128, 128),
                                range_norm=args.range_norm)
dataloader = DataLoader(dataset, batch_size=torch.cuda.device_count() * args.batch_size, shuffle=True, num_workers=2)

# Data - validation
Beispiel #10
0
import json
import numpy as np
from utils import Mapper, LinearLearning, Saver, PendulumEnv

parameters_file = "experiments/exp_1_linear_learning.json"
with open(parameters_file) as j:
    parameters = json.loads(j.read())

mapping = Mapper()
env = PendulumEnv()
saver = Saver()

state_map, state_reverse_map = mapping.get_state_map(
    parameters["step_state"], parameters["decimal_state"])
action_map, action_reverse_map = mapping.get_action_map(
    parameters["step_action"], parameters["decimal_action"])

steps = []
rewards = []
final_mean_reward = []

for i in range(parameters["n_simulations"]):
    lr_learner = LowRankLearning(env=env,
                                 state_set=parameters["state_set"],
                                 state_map=state_map,
                                 action_map=action_map,
                                 state_reverse_map=state_reverse_map,
                                 action_reverse_map=action_reverse_map,
                                 decimal_state=parameters["decimal_state"],
                                 decimal_action=parameters["decimal_action"],
                                 step_state=parameters["step_state"],
Beispiel #11
0
from matplotlib import rcParams
from utils import Saver, TestUtils

saver = Saver()
test_utils = TestUtils()

rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Tahoma']
rcParams['font.size'] = 16

medians_q_learning = saver.load_from_pickle(
    "results/q_learning_medians.pickle")
stds_q_learning = saver.load_from_pickle("results/q_learning_stds.pickle")
frob_errors_q_learning = saver.load_from_pickle(
    "results/q_learning_frob_errors.pickle")

colors = ['b', 'r', 'g', 'y']
epsilons = sorted([float(epsilon) for epsilon in medians_q_learning.keys()])

medians_lr_learning = saver.load_from_pickle(
    "results/lr_learning_medians.pickle")
stds_lr_learning = saver.load_from_pickle("results/lr_learning_stds.pickle")
frob_errors_lr_learning = saver.load_from_pickle(
    "results/lr_learning_frob_errors.pickle")

test_utils.plot_smoothed_steps(medians_q=medians_q_learning,
                               medians_lr=medians_lr_learning,
                               epsilons=epsilons,
                               colors=colors)

test_utils.plot_sfe(epsilons=epsilons,
Beispiel #12
0
def restore(args, model):
    # load
    saver = Saver(args)
    return saver.load(model)
Beispiel #13
0
class Trainer:
    def __init__(self, args, n_workers=4, memory_ram=2, time_saving=45):
        # Getting Ray
        self.Ray = args.ray
        if self.Ray and not RAY:
            print(
                "-- ray is not available --\nNormal initialization in progress"
            )
            self.Ray = False

        # Some parameters
        self.time_save = time_saving
        self.seed = args.seed
        self.exp_steps = args.steps
        self.test_steps = args.test_steps
        self.test_freq = args.test_freq
        self.memory_start = args.memory_start
        self.update_online = args.update_online
        self.play_steps = args.play
        self.game = args.game
        self.double = args.double
        self.game_actions = args.game_actions
        self.seed = args.seed

        # Other variables to save progress
        self.time = Tocker()
        self.ckTime = Tocker()
        self.acc_test = 0
        self.mean_test = []
        self.std_test = []
        self.actor_test_episodes = 0
        self.test_episodes = 0
        name_temp = args.game
        name_temp += '_double' if self.double else ''
        name_temp += '_' + args.optimizer + '_lr_' + str(args.learning_rate)
        self.saver = Saver(name_temp)
        name_sum = os.path.join(self.saver.dir,
                                "tensorboard_{}".format(name_temp))
        self.writer = SummaryWriter(name_sum)

        #Generating the actors
        self.policy = atariDQN(args.lhist, args.game_actions, args.dropouts)

        self.memoryReplay = MemoryReplay(
            capacity=args.memory_size,
            LHist=args.lhist,
        )

        self.main_actor = ActorDQN(
            self.game,
            args.game_actions,
            self.policy,
            lHist=args.lhist,
            steps_per_update=args.update_online,
            buffer_size=args.buffer_size,
            test_steps=self.test_steps,
            start_steps=self.memory_start,
            device=DEVICE,
            seed=args.seed,
            writer=self.writer,
        )

        self.steps_to_fill_mem = math.ceil(self.memory_start /
                                           (args.buffer_size * args.lhist))

        self.n_actors = NCPUS if n_workers >= NCPUS else n_workers

        if NCPUS > 1 and self.Ray:
            # Actors with ray are created to speed up
            # the filling and testing of the buffer and net
            actors_start_steps = math.ceil(self.memory_start / self.n_actors)
            self.steps_to_fill_mem = math.ceil(actors_start_steps /
                                               (args.buffer_size * args.lhist))
            actors_test_steps = math.ceil(self.test_steps / self.n_actors)

            # ---- Initialize Ray ----
            ray.init(num_cpus=self.n_actors,
                     _memory=memory_ram * GIGA,
                     object_store_memory=400 * MEGA)

            # Buffers for the actors
            #self.buffers = [Buffer(capacity=actors_buffer_size) for _ in range(self.n_actors)]
            # Actors of the ActorDQN to fill and evaluate-only Access to their buffers only
            actor = ray.remote(ActorDQN)
            self.actors = [actor.remote(self.game,
                                        args.game_actions,
                                        self.policy,
                                        lHist=args.lhist,
                                        buffer_size=args.buffer_size,
                                        test_steps = actors_test_steps,
                                        start_steps = actors_start_steps,
                                        seed = args.seed,
                                        ray_actor = True) \
                                                for i in range(self.n_actors)]
            time.sleep(10)
            print(
                "Trainer set with Ray\nRay resources {} workers with {} GB of RAM"
                .format(self.n_actors, memory_ram))
        else:
            print(timeFormated(), "Trainer set")

        self.main_learner = LearnerDQN(
            self.policy,
            self.memoryReplay,
            args.mini_batch_size,
            learning_rate=args.learning_rate,
            update_target=args.update_target,
            device=DEVICE,
            double=args.double,
            optimizer=args.optimizer,
            seed=args.seed,
            writer=self.writer,
        )

    def fillMemory(self):
        I = tqdm(range(self.steps_to_fill_mem), desc='Filling Memory')
        self.time.tick
        if self.Ray:
            print("Ray has started. Filling Memory . . .")
            for i in I:
                buffers = ray.get(
                    [actor.fillBuffer.remote() for actor in self.actors])
                self.memoryReplay.combineBuffers(*buffers)
            self.main_actor.steps = self.memory_start
        else:
            for i in I:
                buffer = self.main_actor.fillBuffer()
                self.memoryReplay.combineBuffers(buffer)
        print(
            timeFormated(),
            "Memory Filled to {} in {}".format(self.memory_start,
                                               self.time.tock))
        # Saving a fixed ammount of frames to Test on.
        s, a, r, s2, t = self.memoryReplay.Sample(500)
        self.main_actor.passTestHistories(s)
        del a, r, s2, t

    def sampleBuffer(self, samples: int = 20):
        print("Displaying {} sample histories from the buffer".format(samples))
        try:
            self.memoryReplay.showBuffer(samples)
        except:
            print("Display samples Stopped")

    def __del__(self):
        print("Trainer Terminated")

    def close(self):
        if self.ckTime.tocktock > 5: self.saveAll()
        self.writer.flush()
        self.writer.close()
        ray.shutdown()

    def train(self):
        self.main_actor.newGame()
        I = tqdm(
            range(0, self.exp_steps),
            desc='Executing and learning',
            unit='updates',
        )
        for i in I:
            bufferReady = self.main_actor.autoStep()
            if bufferReady:
                self.memoryReplay.combineBuffers(self.main_actor.getBuffer())
            self.main_learner.trainStep()
            #self.main_actor.updateModel(self.main_learner.onlineModel()) # They got the same object now
            self.writer.flush()
            if i % self.test_freq == 0:
                self.test()
            if self.ckTime.tocktock >= self.time_save:
                self.saveAll()
                self.ckTime.tick

    def test(self):
        #self.memoryReplay.add(*self.memoryReplay.zeroe)
        # ---- Testing the perfomance ----
        self.time.tick
        q_mean = self.main_actor.testQHistories()
        if self.Ray:
            print("Ray starting test . . . ")
            # ---- Dividing the total test_steps per actor ----
            # Update the online model in the actors
            updatedOnline = self.main_learner.onlineModel(cpu=True)
            ray.get([
                actor.updateModel.remote(updatedOnline.copy())
                for actor in self.actors
            ])
            # Get test results
            testRes = ray.get(
                [actor.testRun.remote() for actor in self.actors])
            # Consolidate results
            episodeRwd = []
            for rE in testRes:
                episodeRwd += rE
            print("Ray testing done")
        else:
            # ------ main actor perfoms all the steps sequentially ------
            episodeRwd = self.main_actor.testRun()
        # Saving results and logging
        len_episodeRwd = len(episodeRwd)
        tot_episodeRwd = sum(episodeRwd)
        self.actor_test_episodes += len(episodeRwd)
        self.mean_test += [np.mean(episodeRwd if len_episodeRwd != 0 else 0.0)]
        self.std_test += [np.std(episodeRwd if len_episodeRwd != 0 else 0.0)]
        self.acc_test += tot_episodeRwd
        self.writer.add_scalar('test/accumulated_reward', tot_episodeRwd,
                               self.test_episodes)
        self.writer.add_scalar('test/mean_reward', self.mean_test[-1],
                               self.test_episodes)
        self.writer.add_scalar('test/std_reward', self.std_test[-1],
                               self.test_episodes)
        self.writer.add_scalar('test/actor_episodes', len_episodeRwd,
                               self.test_episodes)
        self.writer.add_scalar('test/Q-mean', q_mean, self.test_episodes)
        self.writer.flush()
        self.test_episodes += 1
        print(
            timeFormated(),
            "Test done in {}. Reward Accumulated:{}, Mean:{}. Q_mean {}".
            format(self.time.tock, np.round(tot_episodeRwd, 2),
                   np.round(self.mean_test[-1], 2), np.round(q_mean, 3)))

    def saveAll(self):
        # --- Saving buffer and trainer ----
        models = dict()
        self.saver.saveObject(self.dictToSave(), "trainer")
        self.saver.saveObject(self.memoryReplay.dictToSave(), "memory")
        models['online'] = self.main_learner.onlineModel()
        models['target'] = self.main_learner.targetModel()
        models['optimizer'] = self.main_learner.optimizerState()
        self.saver.saveModel(models, "models")

    def loadActor(self, Dir):
        os.chdir(Dir)
        files = os.listdir()
        print("Files on direction:")
        for n, File in enumerate(files):
            print("{} : {}".format(n, File))
        while 1:
            choice = input("Enter the number for the model to load :")
            choice = int(choice)
            if choice > len(files) or not isinstance(choice,
                                                     int) or choice < 0:
                print("Number not valid. Please try again.")
            else:
                break
        model = os.path.join(Dir, files[choice])
        models = self.saver.loadModel(model, DEVICE)
        self.main_actor.updateModel(models['online'])
        print("Actor restored from", Dir)

    def dictToSave(self):
        this = dict()
        res = dict()
        res['means'] = self.mean_test
        res['stds'] = self.std_test
        res['acc'] = self.acc_test
        res['episodes'] = self.actor_test_episodes
        this['results'] = res
        this['seed'] = self.seed
        this['Steps'] = self.exp_steps
        this['testSteps'] = self.test_steps
        this['memoryStart'] = self.memory_start
        this['double'] = self.double
        this['game'] = self.game
        this['game_actions'] = self.game_actions
        return this

    def loadFromDict(self, this):
        try:
            res = this['results']
            self.mean_test = res['means']
            self.sdt_test = res['stds']
            self.acc_test = res['acc']
            self.actor_test_episodes = res['episodes']
            self.seed = this['seed']
            self.exp_steps = this['Steps']
            self.test_steps = this['testSteps']
            self.memory_start = this['memoryStart']
            self.double = this['double']
            self.game_actions = this['game_actions']
            print("Successfully loading Trainer from dict")
        except:
            print("Error loading Trainer loaded from dict")

    def playTest(self):
        try:
            import imageio
            GIF = True
            bufferFrame = []
        except:
            GIF = False
            print(
                "imageio is missing from the packages. A .gif from the run won't be made."
            )
        if self.play_steps > 0:
            # Start playing sequence
            # --- wait user to watch ----
            a = input("Press any key to initialize test . . .")
            self.main_actor.isTest = True
            self.main_actor.updateModel(self.main_learner.onlineModel())
            self.main_actor.newGame()
            env = self.main_actor.env
            game = self.main_actor.game
            print("Test of the agent in {}".format(game))
            episodes, reward = 0, 0
            I = tqdm(range(0, self.play_steps),
                     'Test in progress',
                     unit=' plays')
            for _ in I:
                self.time.tick
                if GIF:
                    bufferFrame.append(env._get_image())
                env.render()
                stepRwd = self.main_actor.step()
                #60Hz / Skip_Frame as the environment will do
                self.time.lockHz(15 if game != 'space_invaders' else 20)
                if self.main_actor.done:
                    episodes += 1
                reward += stepRwd
            env.close()
            if GIF:
                imageio.mimsave(
                    "./testPlay {} frames {} episodes {} points {} - {}.gif".
                    format(game, self.play_steps, episodes, reward,
                           timeFormated()),
                    bufferFrame,
                    fps=15 if game != 'space_invaders' else 20)
            print(
                timeFormated(),
                "Test play done. Completed {} episodes and accumulated {} points"
                .format(episodes, reward))
        else:
            None
Beispiel #14
0
from slgep_lib import wrap_config
from utils import Saver
from mfea import mfea
import argparse
import yaml

# Load configuration
config = yaml.load(open('config.yaml').read())

# Load benchmark
benchmark = yaml.load(open('atari_benchmark/multitask-benchmark.yaml').read())

instances = []
for i in range(1, 41):
    if i not in [100]:
        instances.append('multi-' + str(i))

seeds = range(1, 21)

for seed in seeds:
    for instance in instances:
        data = benchmark[instance]
        config.update(data)

        config = wrap_config(config)
        saver = Saver(config, instance, seed)

        mfea(config, saver.append)
        saver.save()
class CustomAgent:
    def __init__(self, verbose=False, **kwargs) -> None:
        # Load the config file
        config_file = kwargs['config_file_path'] if 'config_file_path' in kwargs else "config/config.yaml"
        with open(config_file) as reader:
            self.config = yaml.safe_load(reader)
        if 'update_config_fun' in kwargs and kwargs['update_config_fun'] is not None:
            self.config = kwargs['update_config_fun'](self.config)
        if verbose:
            pprint.pprint(self.config, width=1)

        # choose device
        self.device = 'cuda' if torch.cuda.device_count() > 0 else 'cpu'
        if 'gpu' in kwargs and kwargs['gpu'] is not None:
            self.device = 'cuda:{}'.format(kwargs['gpu'])

        # training settings
        self.batch_size = self.config['training']['batch_size']
        self.max_nb_steps_per_episode = self.config['training']['max_nb_steps_per_episode']
        self.nb_epochs = self.config['training']['nb_epochs']

        # set the statistics
        self._episode_has_started = False
        self.last_done = None
        self.mode = "test"
        self.counter = StepCounter(self.batch_size, self.max_nb_steps_per_episode)

        # Init the models and its optimizer
        self.model = Model(hidden_size=self.config['model']['hidden_size'],
                           device=self.device,
                           bidirectional=self.config['model']['bidirectional'],
                           hidden_linear_size=self.config['model']['hidden_linear_size'])
        self.item_scorer = ItemScorer(device=self.device)
        self.navigation_model = Navigation(device=self.device)
        if 'optimizer' in self.config['training']:
            self.optimizer = optim.Adam(self.model.parameters(),
                                        self.config['training']['optimizer']['learning_rate'])
        self.model_updates = 0
        self.model_loss = 0.

        if verbose:
            print(self.model)
            print('Total Model Parameters: {}'.format(count_parameters(self.model)))

        # choose the agent
        self.agent = lambda device, model: HAgent(device=device, model=model, item_scorer=self.item_scorer,
                                                  hcp=self.config['general']['hcp'], navigation_model=self.navigation_model)
        # Command Queue
        self.command_q = None

        # Saving and Loading
        self.experiment_tag = self.config['checkpoint'].get('experiment_tag', 'NONAME')
        self.saver = Saver(model=self.model,
                           ckpt_path=self.config['checkpoint'].get('model_checkpoint_path', 'NOPATH'),
                           experiment_tag=self.experiment_tag,
                           load_pretrained=len(self.config['checkpoint']['pretrained_experiment_path']) > 0,
                           pretrained_model_path=os.path.join(_FILE_PREFIX, self.config['checkpoint']['pretrained_experiment_path']),
                           device=self.device,
                           save_frequency=self.config['checkpoint'].get('save_frequency', 1E10))

        # Logging Statistics
        tb_dir = None if 'tensorboard' not in self.config else os.path.join(self.config['tensorboard']['directory'],
                                                                            self.experiment_tag)
        self.statistics = StatisticsTracker(tb_dir=tb_dir)

        # EventHandler
        self.event_handler = EventHandler()
        self.event_handler.add(self.statistics.stats_episode_clear, Event.NEWEPISODE)
        self.event_handler.add(self.counter.new_episode, Event.NEWEPISODE)

    def _init_episode(self):
        """
        Initialize settings for the start of a new game.
        """
        self.event_handler(Event.NEWEPISODE)

        self._episode_has_started = True
        self.transitions = [[] for _ in range(self.batch_size)]
        self.model.reset_hidden()
        self.last_score = np.array([0] * self.batch_size)
        self.last_done = [False] * self.batch_size
        self.model_updates = 0
        self.model_loss = 0.

        self.agents = [self.agent(device=self.device, model=self.model) for _ in range(self.batch_size)]
        self.command_q = [[] for _ in range(self.batch_size)]



    def act_eval(self, obs: List[str], scores: List[int], dones: List[bool], infos: List[Dict]):
        """
        Agent step if its in test mode.
        """
        if all(dones):
            self._end_episode(obs, scores)
            return

        # individually for every agent in the batch
        for idx, (observation, score, done, info, cmd_q) in enumerate(zip(obs, scores, dones, infos, self.command_q)):
            if done:
                # placeholder command
                self.command_q[idx] = ['look']

            if len(cmd_q) == 0:
                # only if add new command if there is nothing left in the queue for this agent
                new_cmds, _ = self.agents[idx].step(observation=observation, info=info)
                [self.command_q[idx].append(cmd) for cmd in new_cmds]

        self.counter.step()
        return [cmd_q.pop(0) for cmd_q in self.command_q]


    def act(self, obs: List[str], scores: List[int], dones: List[bool], infos: Dict[str, List[Any]]) -> Optional[List[str]]:
        """
        Step of the agent.
        """
        # re-structure infos
        infos = [{k: v[i] for k, v in infos.items()} for i in range(len(obs))]

        if not self._episode_has_started:
            self._init_episode()

        if self.mode == 'test':
            return self.act_eval(obs, scores, dones, infos)
        elif self.mode == 'manual_eval':
            return self.manual_eval(obs, scores, dones, infos)

        current_score = []
        # individually for every agent in the batch
        for idx, (observation, score, done, last_done, info, cmd_q) in enumerate(zip(obs, scores, dones, self.last_done, infos, self.command_q)):
            just_finished = (last_done != done)
            if not done or just_finished:
                self.counter.increase_steps_taken(idx)

            if len(cmd_q) > 0:
                # has still commands to fire
                current_score.append(0.)
                continue

            if done and not just_finished:
                self.command_q[idx] = ['look']
                current_score.append(0.)
                continue
            else:
                self.agents[idx].update_score(score)

            # update score
            current_score.append(self.agents[idx].current_score)

            # add new command
            new_cmds, learning_info = self.agents[idx].step(observation=observation, info=info)
            [self.command_q[idx].append(cmd) for cmd in new_cmds]

            # update the model
            self.model_update(done=done,
                              index=learning_info.index,
                              output=learning_info.score,
                              value=learning_info.value,
                              score=self.agents[idx].current_score,
                              batch_idx=idx)

        self.last_done = dones
        self.statistics.stats_episode_append(score=np.mean(current_score))

        if all(dones):
            self._end_episode(obs, scores, cmds=[agent.cmd_memory for agent in self.agents])
            return
        self.saver.save(epoch=self.counter('epoch'), episode=self.counter('episode'))
        self.counter.step()
        return [cmd_q.pop(0) for cmd_q in self.command_q]


    def model_update(self, done, index, output, value, score, batch_idx):
        """
        Store the information for the model update. After invoking it 'update_frequency' times for a specific agent
        the a2c update is performed.
        """
        if self.transitions[batch_idx]:
            self.transitions[batch_idx][-1].reward = torch.Tensor([score])[0].type(torch.float).to(self.device)

        if len(self.transitions[batch_idx]) >= self.config['training']['update_frequency'] or done: # done == just_finished
            # do the update
            self._a2c_update(value, batch_idx)
        else:
            # add the transition
            self.transitions[batch_idx].append(Transition(reward=None,
                                                          index=index,
                                                          output=output,
                                                          value=value,
                                                          done=done))

    def _a2c_update(self, value, batch_idx):
        """
        Uses the stored model information from the last 'update_frequency' steps to perform an A2C update.
        """
        # compute the returns and advantages from the last 'update_frequency' model steps
        returns, advantages = self._discount_rewards(value, self.transitions[batch_idx])

        for transition, _return, advantage in zip(self.transitions[batch_idx], returns, advantages):
            reward, index, output, value, done = transition
            if done:
                continue

            advantage = advantage.detach()
            probs = F.softmax(output, dim=-1)
            log_probs = torch.log(probs)
            log_action_prob = log_probs[index]
            policy_loss = -log_action_prob * advantage
            value_loss = (.5 * (value - _return)**2)
            entropy = (-log_probs * probs).mean()

            # add up the loss over time
            self.model_loss += policy_loss + 0.5 * value_loss - 0.1 * entropy

            self.statistics.stats_episode_append(
                reward=reward,
                policy=policy_loss.item(),
                value=value_loss.item(),
                entropy=entropy.item(),
                confidence=torch.mean(torch.exp(log_action_prob)).item()
            )
        self.model_updates += 1

        self.transitions[batch_idx] = []

        if self.model_loss == 0 or self.model_updates % self.batch_size != 0:
            # print('skipped')
            return

        # Only if all of the agents in the batch have performed their update the backpropagation is invoked to reduce
        # computational complexity

        self.statistics.stats_episode_append(loss=self.model_loss.item())
        self.optimizer.zero_grad()
        self.model_loss.backward(retain_graph=True)
        nn.utils.clip_grad_norm_(self.model.parameters(), self.config['training']['optimizer']['clip_grad_norm'])
        self.optimizer.step()

        self.model_loss = 0.



    def _discount_rewards(self, last_value, transitions):
        """
        Discounts the rewards of the agent over time to compute the returns and advantages.
        """
        returns, advantages = [], []
        R = last_value.data
        for t in reversed(range(len(transitions))):
            rewards, _, _, values, done = transitions[t]
            R = rewards + self.config['general']['discount_gamma'] * R
            adv = R - values
            returns.append(R)
            advantages.append(adv)

        return returns[::-1], advantages[::-1]

    def _end_episode(self, observation, scores, **kwargs):
        self._episode_has_started = False

        if self.mode != 'test':
            points, possible_points = self._get_points(observation, scores)
            self.statistics.flush_episode_statistics(possible_points=possible_points,
                                                     episode_no=self.counter('episode'),
                                                     steps=np.mean(self.counter('steps_taken')),
                                                     points=points,
                                                     **kwargs)

    def _get_points(self, obs, scores):
        """
        Parses the obtained points from the last observation.
        """
        batch_size = len(obs)
        points = []
        possible_points = None
        for i in range(batch_size):
            try:
                points.append(int(obs[i].split('You scored ')[1].split(' out of a possible')[0]))
                possible_points = int(obs[i].split('out of a possible ')[1].split(',')[0])
            except:
                points.append(scores[i])
        possible_points = possible_points if possible_points is not None else 5
        return points, possible_points

    def train(self) -> None:
        """ Tell the agent it is in training mode. """
        self.mode = 'train'

    def eval(self) -> None:
        """ Tell the agent it is in evaluation mode. """
        self.mode = 'test'
        self.model.reset_hidden()

    def select_additional_infos(self) -> EnvInfos:
        request_infos = EnvInfos()
        request_infos.description = True
        request_infos.inventory = True
        if self.config['general']['hcp'] >= 2:
            request_infos.entities = True
            request_infos.verbs = True
        if self.config['general']['hcp'] >= 4:
            request_infos.extras = ["recipe"]
        if self.config['general']['hcp'] >= 5:
            request_infos.admissible_commands = True


        # TEST
        request_infos.entities = True
        request_infos.verbs = True
        request_infos.extras = ["recipe", "walkthrough"]
        request_infos.admissible_commands = True

        return request_infos

    def started_new_epoch(self):
        """
        Call this function from outside to let the agent know that a new epoch has started.
        """
        self.counter.new_epoch()
Beispiel #16
0
import json
import numpy as np
from utils import Dqn, Buffer, Saver

import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

parameters_file = "experiments/exp_dqn_learning.json"
with open(parameters_file) as j:
    parameters = json.loads(j.read())

env = gym.make('Acrobot-v1')
env._max_episode_steps = np.inf
saver = Saver()

rewards = []
steps = []

for _ in range(parameters["n_simulations"]):

    keras.backend.clear_session()

    alpha = 0.001

    model = Sequential()
    model.add(
        Dense(parameters["hidden_size"],
              input_dim=env.observation_space.shape[0],
              activation='tanh'))
Beispiel #17
0
class Trainer(object):
    def __init__(self, mode):
        # Define Saver
        self.saver = Saver(opt, mode)

        # visualize
        self.summary = TensorboardSummary(self.saver.experiment_dir)
        self.writer = self.summary.create_summary()

        # Dataset dataloader
        self.train_dataset, self.train_loader = make_data_loader(opt)
        self.nbatch_train = len(self.train_loader)
        self.val_dataset, self.val_loader = make_data_loader(opt, mode="val")
        self.nbatch_val = len(self.val_loader)

        # model
        if opt.sync_bn is None and len(opt.gpu_id) > 1:
            opt.sync_bn = True
        else:
            opt.sync_bn = False
        model = DeepLab(opt)
        # model = CSRNet()
        self.model = model.to(opt.device)

        # Define Optimizer
        train_params = [{
            'params': model.get_1x_lr_params(),
            'lr': opt.lr
        }, {
            'params': model.get_10x_lr_params(),
            'lr': opt.lr * 10
        }]
        self.optimizer = torch.optim.SGD(train_params,
                                         momentum=opt.momentum,
                                         weight_decay=opt.decay)

        # loss
        if opt.use_balanced_weights:
            classes_weights_file = os.path.join(opt.root_dir,
                                                'train_classes_weights.npy')
            if os.path.isfile(classes_weights_file):
                weight = np.load(classes_weights_file)
            else:
                weight = calculate_weigths_labels(self.train_loader,
                                                  opt.root_dir,
                                                  opt.num_classes)
            weight = torch.from_numpy(weight.astype(np.float32))
            print(weight)
        opt.loss['weight'] = weight
        self.loss = build_loss(opt.loss)

        # Define Evaluator
        self.evaluator = Evaluator()

        # Define lr scheduler
        self.scheduler = LR_Scheduler(mode=opt.lr_scheduler,
                                      base_lr=opt.lr,
                                      num_epochs=opt.epochs,
                                      iters_per_epoch=self.nbatch_train,
                                      lr_step=140)

        # Resuming Checkpoint
        self.best_pred = 0.0
        self.start_epoch = opt.start_epoch
        if opt.resume:
            if os.path.isfile(opt.pre):
                print("=> loading checkpoint '{}'".format(opt.pre))
                checkpoint = torch.load(opt.pre)
                opt.start_epoch = checkpoint['epoch']
                self.best_pred = checkpoint['best_pred']
                self.model.load_state_dict(checkpoint['state_dict'])
                print("=> loaded checkpoint '{}' (epoch {})".format(
                    opt.pre, checkpoint['epoch']))
            else:
                print("=> no checkpoint found at '{}'".format(opt.pre))

        if len(opt.gpu_id) > 1:
            print("Using multiple gpu")
            self.model = torch.nn.DataParallel(self.model,
                                               device_ids=opt.gpu_id)

        self.loss_hist = collections.deque(maxlen=500)
        self.timer = Timer(opt.epochs, self.nbatch_train, self.nbatch_val)
        self.step_time = collections.deque(maxlen=opt.print_freq)

    def train(self, epoch):
        self.model.train()
        if opt.freeze_bn:
            self.model.module.freeze_bn() if len(opt.gpu_id) > 1 \
                else self.model.freeze_bn()
        last_time = time.time()
        for iter_num, sample in enumerate(self.train_loader):
            # if iter_num >= 1: break
            try:
                imgs = sample["image"].to(opt.device)
                labels = sample["label"].to(opt.device)

                output = self.model(imgs)

                loss = self.loss(output, labels)
                # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 3)
                loss.backward()
                self.loss_hist.append(float(loss))

                self.optimizer.step()
                self.optimizer.zero_grad()
                self.scheduler(self.optimizer, iter_num, epoch)

                # Visualize
                global_step = iter_num + self.nbatch_train * epoch + 1
                self.writer.add_scalar('train/loss',
                                       loss.cpu().item(), global_step)
                batch_time = time.time() - last_time
                last_time = time.time()
                eta = self.timer.eta(global_step, batch_time)
                self.step_time.append(batch_time)
                if global_step % opt.print_freq == 0:
                    printline = ('Epoch: [{}][{}/{}] '
                                 'lr: (1x:{:1.5f}, 10x:{:1.5f}), '
                                 'eta: {}, time: {:1.3f}, '
                                 'Loss: {:1.4f} '.format(
                                     epoch, iter_num + 1, self.nbatch_train,
                                     self.optimizer.param_groups[0]['lr'],
                                     self.optimizer.param_groups[1]['lr'], eta,
                                     np.sum(self.step_time),
                                     np.mean(self.loss_hist)))
                    print(printline)
                    self.saver.save_experiment_log(printline)
                    last_time = time.time()

                del loss

            except Exception as e:
                print(e)
                continue

    def validate(self, epoch):
        self.model.eval()
        self.evaluator.reset()
        test_loss = 0.0
        with torch.no_grad():
            tbar = tqdm(self.val_loader, desc='\r')
            for i, sample in enumerate(tbar):
                # if i > 3: break
                imgs = sample['image'].to(opt.device)
                labels = sample['label'].to(opt.device)
                path = sample["path"]

                output = self.model(imgs)

                loss = self.loss(output, labels)
                test_loss += loss.item()
                tbar.set_description('Test loss: %.4f' % (test_loss / (i + 1)))

                # Visualize
                global_step = i + self.nbatch_val * epoch + 1
                if global_step % opt.plot_every == 0:
                    # pred = output.data.cpu().numpy()
                    if output.shape[1] > 1:
                        pred = torch.argmax(output, dim=1)
                    else:
                        pred = torch.clamp(output, min=0)
                    self.summary.visualize_image(self.writer, opt.dataset,
                                                 imgs, labels, pred,
                                                 global_step)

                # metrics
                pred = output.data.cpu().numpy()
                target = labels.cpu().numpy() > 0
                if pred.shape[1] > 1:
                    pred = np.argmax(pred, axis=1)
                pred = (pred > opt.region_thd).reshape(target.shape)
                self.evaluator.add_batch(target, pred, path, opt.dataset)

            # Fast test during the training
            Acc = self.evaluator.Pixel_Accuracy()
            Acc_class = self.evaluator.Pixel_Accuracy_Class()
            mIoU = self.evaluator.Mean_Intersection_over_Union()
            FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union()
            RRecall = self.evaluator.Region_Recall()
            RNum = self.evaluator.Region_Num()
            mean_loss = test_loss / self.nbatch_val
            result = 2 / (1 / mIoU + 1 / RRecall)
            self.writer.add_scalar('val/mean_loss_epoch', mean_loss, epoch)
            self.writer.add_scalar('val/mIoU', mIoU, epoch)
            self.writer.add_scalar('val/Acc', Acc, epoch)
            self.writer.add_scalar('val/Acc_class', Acc_class, epoch)
            self.writer.add_scalar('val/fwIoU', FWIoU, epoch)
            self.writer.add_scalar('val/RRecall', RRecall, epoch)
            self.writer.add_scalar('val/RNum', RNum, epoch)
            self.writer.add_scalar('val/Result', result, epoch)

            printline = ("Val[Epoch: [{}], mean_loss: {:.4f}, mIoU: {:.4f}, "
                         "Acc: {:.4f}, Acc_class: {:.4f}, fwIoU: {:.4f}, "
                         "RRecall: {:.4f}, RNum: {:.1f}]").format(
                             epoch, mean_loss, mIoU, Acc, Acc_class, FWIoU,
                             RRecall, RNum)
            print(printline)
            self.saver.save_eval_result(printline)

        return result
    def __init__(self, verbose=False) -> None:

        # Loading the config file
        config_file = "config/config.yaml"
        with open(config_file) as reader:
            self.config = yaml.safe_load(reader)
        if verbose:
            pprint.pprint(self.config, width=1)

        # Choose device
        self.device = 'cuda' if torch.cuda.device_count() > 0 else 'cpu'

        # Training settings
        self.batch_size = self.config['training']['batch_size']
        self.max_nb_steps_per_episode = self.config['training'][
            'max_nb_steps_per_episode']
        self.nb_epochs = self.config['training']['nb_epochs']

        # Set stats
        self._episode_has_started = False
        self.last_done = None
        self.mode = 'test'
        self.counter = StepCounter(self.batch_size,
                                   self.max_nb_steps_per_episode)

        # Init the models and its optimizer
        self.model = Model(
            hidden_size=self.config['model']['hidden_size'],
            device=self.device,
            bidirectional=self.config['model']['bidirectional'],
            hidden_linear_size=self.config['model']['hidden_linear_size'])
        self.item_scorer = ItemScorer(device=self.device)
        self.navigation_model = Navigation(device=self.device)
        if 'optimizer' in self.config['training']:
            self.optimizer = optim.Adam(
                self.model.parameters(),
                self.config['training']['optimizer']['learning_rate'])
        self.model_updates = 0
        self.model_loss = 0.0

        if verbose:
            print(self.model)
            print('Total Model Parameters: {}'.format(
                count_parameters(self.model)))

        # choose the agent
        self.agent = lambda device, model: HAgent(
            device=device,
            model=model,
            item_scorer=self.item_scorer,
            hcp=self.config['general']['hcp'],
            navigation_model=self.navigation_model)
        # Command Queue
        self.command_q = None

        # Saving and Loading
        self.experiment_tag = self.config['checkpoint'].get(
            'experiment_tag', 'NONAME')
        self.saver = Saver(
            model=self.model,
            ckpt_path=self.config['checkpoint'].get('model_checkpoint_path',
                                                    'NOPATH'),
            experiment_tag=self.experiment_tag,
            load_pretrained=len(
                self.config['checkpoint']['pretrained_experiment_path']) > 0,
            pretrained_model_path=os.path.join(
                _FILE_PREFIX,
                self.config['checkpoint']['pretrained_experiment_path']),
            device=self.device,
            save_frequency=self.config['checkpoint'].get(
                'save_frequency', 1E10))
Beispiel #19
0
def main():
    train_data, test_data, _ = train_validate_test_loader(
        "../data/Childers/M/speech",
        "../data/Childers/M/egg",
        split={
            "train": 0.7,
            "validate": 0.1,
            "test": 0.2
        },
        batch_size=1,
        workers=2,
        stride={
            "train": 2,
            "validate": 20
        },
        pin_memory=False,
        model_folder="data/irish_clean_data",
    )

    model_G = SpeechEggEncoder()
    model_D = Discriminator()
    save_model = Saver("checkpoints/vmodels/childers_clean_l2")

    encoder = EGGEncoder()
    save_encoder = Saver_Encoder("encoder")
    encoder, _, _ = save_encoder.load_checkpoint(encoder,
                                                 file_name="epoch_65.pt")

    use_cuda = True
    epochs = 100

    optimizer_G = optim.Adam(list(model_G.parameters())[:12], lr=2e-3)
    optimizer_R = optim.Adam(model_G.parameters(), lr=2e-3)
    optimizer_D = optim.Adam(model_D.parameters(), lr=2e-3)
    scheduler_G = optim.lr_scheduler.StepLR(optimizer_G, 10, 0.9)
    scheduler_D = optim.lr_scheduler.StepLR(optimizer_D, 10, 0.9)
    scheduler_R = optim.lr_scheduler.StepLR(optimizer_D, 10, 0.5)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        for i in range(1, epochs + 1):

            net_loss, D_loss, G_loss, R_loss, D_real_prob, D_fake_prob = train(
                model_G,
                model_D,
                encoder,
                optimizer_G,
                optimizer_R,
                optimizer_D,
                train_data,
                use_cuda,
            )
            print(
                "Train loss {:4.4} D_loss {:4.4} G_loss {:4.4} reconstruction loss {:4.4} Real D prob. {:4.4} Fake D prob. {:4.4} @epoch {}"
                .format(net_loss, D_loss, G_loss, R_loss, D_real_prob,
                        D_fake_prob, i))
            if i % 5 == 0:
                checkpoint = save_model.create_checkpoint(
                    model_G,
                    model_D,
                    optimizer_G,
                    optimizer_R,
                    optimizer_D,
                    {
                        "win": 100,
                        "stride": 3
                    },
                )

                save_model.save_checkpoint(checkpoint,
                                           file_name="epoch_{}.pt".format(i),
                                           append_time=False)
                test(model_G, model_D, encoder, test_data, use_cuda)

            if scheduler_G is not None:
                scheduler_G.step()
                scheduler_D.step()
                scheduler_R.step()
Beispiel #20
0
import json
import numpy as np
from utils import LowRankLearning, Saver, TestUtils, get_env

parameters_file = "experiments/exp_lr_learning.json"
env = get_env()
saver = Saver()
test_utils = TestUtils()
Q_optimal = saver.load_from_pickle("results/Q_optimal.pickle")

with open(parameters_file) as j:
    parameters = json.loads(j.read())

medians = {}
standard_devs = {}
frob_errors = {}

for epsilon in parameters["epsilons"]:

    medians_temp = []
    standard_devs_temp = []
    frob_errors_temp = []

    for i in range(parameters["n_simulations"]):
        lr_learner = LowRankLearning(
            env=env,
            episodes=parameters["episodes"],
            max_steps=parameters["max_steps"],
            epsilon=epsilon,
            gamma=parameters["gamma"],
            k=parameters["k"],
Beispiel #21
0
def test(**kwargs):
    opt._parse(kwargs)
    saver = Saver(opt, "test")

    # imgs_name = os.listdir(opt.test_dir)
    imgs_set = opt.root_dir + "ImageSets/Main/val.txt"
    with open(imgs_set, 'r') as f:
        imgs_name = [x.strip() + '.jpg' for x in f.readlines()]

    resize = Letterbox(input_size=(opt.min_size, opt.max_size))
    normalize = Normalizer(mean=opt.mean, std=opt.std)

    # Define Network
    # initilize the network here.
    model = Model(opt, num_classes=10)
    model = model.to(opt.device)
    post_pro = PostProcess(**opt.nms)

    if os.path.isfile(opt.pre):
        print("=> loading checkpoint '{}'".format(opt.pre))
        checkpoint = torch.load(opt.pre)

        model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint '{}' (epoch {})".format(
            opt.pre, checkpoint['epoch']))
    else:
        raise FileNotFoundError

    results = []
    model.eval()
    with torch.no_grad():
        for ii, img_name in enumerate(tqdm(imgs_name)):
            # if ii >= 3: break;
            # data read and transforms
            img_path = osp.join(opt.test_dir, img_name)
            img = cv2.imread(img_path)[:, :, ::-1]
            sample = {'img': img, 'annot': None}
            sample = normalize(sample)
            sample = resize(sample)
            input = sample['img'].unsqueeze(0).to(opt.device).permute(
                0, 3, 1, 2)

            # predict
            scores, labels, boxes = model(input)
            scores_bt, labels_bt, boxes_bt = post_pro(scores, labels, boxes,
                                                      input.shape[-2:])

            boxes_bt[0] = re_resize(boxes_bt[0], sample['scale'],
                                    opt.resize_type)

            if show:
                # draw
                labels = labels_bt[0].float().view(-1, 1)
                scores = scores_bt[0].float().view(-1, 1)
                output = torch.cat((boxes_bt[0], labels, scores), dim=1)
                output = output.numpy()
                img = plot_img(img, output, classes)

                plt.figure(figsize=(10, 10))
                plt.subplot(1, 1, 1).imshow(img)
                plt.show()

            for box, label, score in zip(boxes_bt[0], labels_bt[0],
                                         scores_bt[0]):
                box[2:] = box[2:] - box[:2]
                results.append({
                    "image_id": img_name,
                    "category_id": label.numpy(),
                    "bbox": box[:4].numpy(),
                    "score": score.numpy()
                })

        saver.save_test_result(results)
Beispiel #22
0
    def __init__(self, mode):
        # Define Saver
        self.saver = Saver(opt, mode)
        self.logger = self.saver.logger

        # Visualize
        self.summary = TensorboardSummary(self.saver.experiment_dir)
        self.writer = self.summary.create_summary()

        # Dataset dataloader
        self.train_dataset, self.train_loader = make_data_loader(opt)
        self.nbatch_train = len(self.train_loader)
        self.val_dataset, self.val_loader = make_data_loader(opt, mode="val")
        self.nbatch_val = len(self.val_loader)

        # Model
        if opt.sync_bn is None and len(opt.gpu_id) > 1:
            opt.sync_bn = True
        else:
            opt.sync_bn = False
        # model = DeepLab(opt)
        # model = CSRNet()
        model = CRGNet(opt)
        model_info(model, self.logger)
        self.model = model.to(opt.device)

        # Loss
        if opt.use_balanced_weights:
            classes_weights_file = osp.join(opt.root_dir, 'train_classes_weights.npy')
            if os.path.isfile(classes_weights_file):
                weight = np.load(classes_weights_file)
            else:
                weight = calculate_weigths_labels(
                    self.train_loader, opt.root_dir)
            print(weight)
            opt.loss['weight'] = weight
        self.loss = build_loss(opt.loss)

        # Define Evaluator
        self.evaluator = Evaluator()  # use region to eval: class_num is 2

        # Resuming Checkpoint
        self.best_pred = 0.0
        self.start_epoch = 0
        if opt.resume:
            if os.path.isfile(opt.pre):
                print("=> loading checkpoint '{}'".format(opt.pre))
                checkpoint = torch.load(opt.pre)
                self.start_epoch = checkpoint['epoch']
                self.best_pred = checkpoint['best_pred']
                self.model.load_state_dict(checkpoint['state_dict'])
                print("=> loaded checkpoint '{}' (epoch {})"
                      .format(opt.pre, checkpoint['epoch']))
            else:
                print("=> no checkpoint found at '{}'".format(opt.pre))

        if len(opt.gpu_id) > 1:
            print("Using multiple gpu")
            self.model = torch.nn.DataParallel(self.model,
                                               device_ids=opt.gpu_id)

        # Define Optimizer
        # train_params = [{'params': model.get_1x_lr_params(), 'lr': opt.lr},
        #                 {'params': model.get_10x_lr_params(), 'lr': opt.lr * 10}]
        # self.optimizer = torch.optim.SGD(train_params,
        #                                  momentum=opt.momentum,
        #                                  weight_decay=opt.decay)
        self.optimizer = torch.optim.SGD(self.model.parameters(),
                                         lr=opt.lr,
                                         momentum=opt.momentum,
                                         weight_decay=opt.decay)

        # Define lr scheduler
        # self.scheduler = LR_Scheduler(mode=opt.lr_scheduler,
        #                               base_lr=opt.lr,
        #                               num_epochs=opt.epochs,
        #                               iters_per_epoch=self.nbatch_train,
        #                               lr_step=140)
        self.scheduler = optim.lr_scheduler.MultiStepLR(
            self.optimizer,
            milestones=[round(opt.epochs * x) for x in opt.steps],
            gamma=opt.gamma)

        # Time
        self.loss_hist = collections.deque(maxlen=500)
        self.timer = Timer(opt.epochs, self.nbatch_train, self.nbatch_val)
        self.step_time = collections.deque(maxlen=opt.print_freq)
Beispiel #23
0
class Trainer(object):
    def __init__(self, mode):
        # Define Saver
        self.saver = Saver(opt, mode)
        self.logger = self.saver.logger

        # visualize
        self.summary = TensorboardSummary(self.saver.experiment_dir, opt)
        self.writer = self.summary.writer

        # Define Dataloader
        # train dataset
        self.train_dataset, self.train_loader = make_data_loader(opt, train=True)
        self.nbatch_train = len(self.train_loader)
        self.num_classes = self.train_dataset.num_classes

        # val dataset
        self.val_dataset, self.val_loader = make_data_loader(opt, train=False)
        self.nbatch_val = len(self.val_loader)

        # Define Network
        # initilize the network here.
        self.model = Model(opt, self.num_classes)
        self.model = self.model.to(opt.device)

        # Detection post process(NMS...)
        self.post_pro = PostProcess(**opt.nms)

        # Define Optimizer
        if opt.adam:
            self.optimizer = optim.Adam(self.model.parameters(), lr=opt.lr)
        else:
            self.optimizer = optim.SGD(self.model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.decay)

        # Apex
        if opt.use_apex:
            self.model, self.optimizer = amp.initialize(self.model, self.optimizer, opt_level='O1')

        # Resuming Checkpoint
        self.best_pred = 0.0
        self.start_epoch = 0
        if opt.resume:
            if os.path.isfile(opt.pre):
                print("=> loading checkpoint '{}'".format(opt.pre))
                checkpoint = torch.load(opt.pre)
                self.start_epoch = checkpoint['epoch'] + 1
                self.best_pred = checkpoint['best_pred']
                self.model.load_state_dict(checkpoint['state_dict'])
                print("=> loaded checkpoint '{}' (epoch {})"
                      .format(opt.pre, checkpoint['epoch']))
            else:
                print("=> no checkpoint found at '{}'".format(opt.pre))

        # Define lr scherduler
        # self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        #     self.optimizer, patience=3, verbose=True)
        self.scheduler = optim.lr_scheduler.MultiStepLR(
            self.optimizer,
            milestones=[round(opt.epochs * x) for x in opt.steps],
            gamma=opt.gamma)
        self.scheduler.last_epoch = self.start_epoch - 1

        # Using mul gpu
        if len(opt.gpu_id) > 1:
            self.logger.info("Using multiple gpu")
            self.model = torch.nn.DataParallel(self.model,
                                               device_ids=opt.gpu_id)

        # metrics
        if opt.eval_type == 'cocoeval':
            self.eval = COCO_eval(self.val_dataset.coco)
        else:
            self.eval = VOC_eval(self.num_classes)

        self.loss_hist = collections.deque(maxlen=500)
        self.timer = Timer(opt.epochs, self.nbatch_train, self.nbatch_val)
        self.step_time = collections.deque(maxlen=opt.print_freq)

    def training(self, epoch):
        self.model.train()
        epoch_loss = []
        last_time = time.time()
        for iter_num, data in enumerate(self.train_loader):
            # if iter_num >= 0: break
            try:
                self.optimizer.zero_grad()
                inputs = data['img'].to(opt.device)
                targets = data['annot'].to(opt.device)

                losses = self.model(inputs, targets)
                loss, log_vars = parse_losses(losses)

                if bool(loss == 0):
                    continue
                if opt.use_apex:
                    with amp.scale_loss(loss, self.optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), opt.grad_clip)
                self.optimizer.step()
                self.loss_hist.append(float(loss.cpu().item()))
                epoch_loss.append(float(loss.cpu().item()))

                # visualize
                global_step = iter_num + self.nbatch_train * epoch + 1
                loss_logs = ""
                for _key, _value in log_vars.items():
                    loss_logs += "{}: {:.4f}  ".format(_key, _value)
                    self.writer.add_scalar('train/{}'.format(_key),
                                           _value,
                                           global_step)

                batch_time = time.time() - last_time
                last_time = time.time()
                eta = self.timer.eta(global_step, batch_time)
                self.step_time.append(batch_time)
                if global_step % opt.print_freq == 0:
                    printline = ("Epoch: [{}][{}/{}]  "
                                 "lr: {}  eta: {}  time: {:1.1f}  "
                                 "{}"
                                 "Running loss: {:1.5f}").format(
                                    epoch, iter_num + 1, self.nbatch_train,
                                    self.optimizer.param_groups[0]['lr'],
                                    eta, np.sum(self.step_time),
                                    loss_logs,
                                    np.mean(self.loss_hist))
                    self.logger.info(printline)

            except Exception as e:
                print(e)
                continue

        # self.scheduler.step(np.mean(epoch_loss))
        self.scheduler.step()

    def validate(self, epoch):
        self.model.eval()
        # torch.backends.cudnn.benchmark = False
        # self.model.apply(uninplace_relu)
        # start collecting results
        with torch.no_grad():
            for ii, data in enumerate(self.val_loader):
                # if ii > 0: break
                scale = data['scale']
                index = data['index']
                inputs = data['img'].to(opt.device)
                targets = data['annot']

                # run network
                scores, labels, boxes = self.model(inputs)

                scores_bt, labels_bt, boxes_bt = self.post_pro(
                    scores, labels, boxes, inputs.shape[-2:])

                outputs = []
                for k in range(len(boxes_bt)):
                    outputs.append(torch.cat((
                        boxes_bt[k].clone(),
                        labels_bt[k].clone().unsqueeze(1).float(),
                        scores_bt[k].clone().unsqueeze(1)),
                        dim=1))

                # visualize
                global_step = ii + self.nbatch_val * epoch
                if global_step % opt.plot_every == 0:
                    self.summary.visualize_image(
                        inputs, targets, outputs,
                        self.val_dataset.labels,
                        global_step)

                # eval
                if opt.eval_type == "voceval":
                    self.eval.statistics(outputs, targets, iou_thresh=0.5)

                elif opt.eval_type == "cocoeval":
                    self.eval.statistics(outputs, scale, index)

                print('{}/{}'.format(ii, len(self.val_loader)), end='\r')

            if opt.eval_type == "voceval":
                stats, ap_class = self.eval.metric()
                for key, value in stats.items():
                    self.writer.add_scalar('val/{}'.format(key), value.mean(), epoch)
                self.saver.save_voc_eval_result(stats, ap_class, self.val_dataset.labels)
                return stats['AP']

            elif opt.eval_type == "cocoeval":
                stats = self.eval.metirc()
                self.saver.save_coco_eval_result(stats)
                self.writer.add_scalar('val/mAP', stats[0], epoch)
                return stats[0]

            else:
                raise NotImplementedError
Beispiel #24
0
    def __init__(self, mode):
        # Define Saver
        self.saver = Saver(opt, mode)
        self.logger = self.saver.logger

        # visualize
        self.summary = TensorboardSummary(self.saver.experiment_dir, opt)
        self.writer = self.summary.writer

        # Define Dataloader
        # train dataset
        self.train_dataset, self.train_loader = make_data_loader(opt, train=True)
        self.nbatch_train = len(self.train_loader)
        self.num_classes = self.train_dataset.num_classes

        # val dataset
        self.val_dataset, self.val_loader = make_data_loader(opt, train=False)
        self.nbatch_val = len(self.val_loader)

        # Define Network
        # initilize the network here.
        self.model = Model(opt, self.num_classes)
        self.model = self.model.to(opt.device)

        # Detection post process(NMS...)
        self.post_pro = PostProcess(**opt.nms)

        # Define Optimizer
        if opt.adam:
            self.optimizer = optim.Adam(self.model.parameters(), lr=opt.lr)
        else:
            self.optimizer = optim.SGD(self.model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.decay)

        # Apex
        if opt.use_apex:
            self.model, self.optimizer = amp.initialize(self.model, self.optimizer, opt_level='O1')

        # Resuming Checkpoint
        self.best_pred = 0.0
        self.start_epoch = 0
        if opt.resume:
            if os.path.isfile(opt.pre):
                print("=> loading checkpoint '{}'".format(opt.pre))
                checkpoint = torch.load(opt.pre)
                self.start_epoch = checkpoint['epoch'] + 1
                self.best_pred = checkpoint['best_pred']
                self.model.load_state_dict(checkpoint['state_dict'])
                print("=> loaded checkpoint '{}' (epoch {})"
                      .format(opt.pre, checkpoint['epoch']))
            else:
                print("=> no checkpoint found at '{}'".format(opt.pre))

        # Define lr scherduler
        # self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        #     self.optimizer, patience=3, verbose=True)
        self.scheduler = optim.lr_scheduler.MultiStepLR(
            self.optimizer,
            milestones=[round(opt.epochs * x) for x in opt.steps],
            gamma=opt.gamma)
        self.scheduler.last_epoch = self.start_epoch - 1

        # Using mul gpu
        if len(opt.gpu_id) > 1:
            self.logger.info("Using multiple gpu")
            self.model = torch.nn.DataParallel(self.model,
                                               device_ids=opt.gpu_id)

        # metrics
        if opt.eval_type == 'cocoeval':
            self.eval = COCO_eval(self.val_dataset.coco)
        else:
            self.eval = VOC_eval(self.num_classes)

        self.loss_hist = collections.deque(maxlen=500)
        self.timer = Timer(opt.epochs, self.nbatch_train, self.nbatch_val)
        self.step_time = collections.deque(maxlen=opt.print_freq)
    def __init__(self, verbose=False, **kwargs) -> None:
        # Load the config file
        config_file = kwargs['config_file_path'] if 'config_file_path' in kwargs else "config/config.yaml"
        with open(config_file) as reader:
            self.config = yaml.safe_load(reader)
        if 'update_config_fun' in kwargs and kwargs['update_config_fun'] is not None:
            self.config = kwargs['update_config_fun'](self.config)
        if verbose:
            pprint.pprint(self.config, width=1)

        # choose device
        self.device = 'cuda' if torch.cuda.device_count() > 0 else 'cpu'
        if 'gpu' in kwargs and kwargs['gpu'] is not None:
            self.device = 'cuda:{}'.format(kwargs['gpu'])

        # training settings
        self.batch_size = self.config['training']['batch_size']
        self.max_nb_steps_per_episode = self.config['training']['max_nb_steps_per_episode']
        self.nb_epochs = self.config['training']['nb_epochs']

        # set the statistics
        self._episode_has_started = False
        self.last_done = None
        self.mode = "test"
        self.counter = StepCounter(self.batch_size, self.max_nb_steps_per_episode)

        # Init the models and its optimizer
        self.model = Model(hidden_size=self.config['model']['hidden_size'],
                           device=self.device,
                           bidirectional=self.config['model']['bidirectional'],
                           hidden_linear_size=self.config['model']['hidden_linear_size'])
        self.item_scorer = ItemScorer(device=self.device)
        self.navigation_model = Navigation(device=self.device)
        if 'optimizer' in self.config['training']:
            self.optimizer = optim.Adam(self.model.parameters(),
                                        self.config['training']['optimizer']['learning_rate'])
        self.model_updates = 0
        self.model_loss = 0.

        if verbose:
            print(self.model)
            print('Total Model Parameters: {}'.format(count_parameters(self.model)))

        # choose the agent
        self.agent = lambda device, model: HAgent(device=device, model=model, item_scorer=self.item_scorer,
                                                  hcp=self.config['general']['hcp'], navigation_model=self.navigation_model)
        # Command Queue
        self.command_q = None

        # Saving and Loading
        self.experiment_tag = self.config['checkpoint'].get('experiment_tag', 'NONAME')
        self.saver = Saver(model=self.model,
                           ckpt_path=self.config['checkpoint'].get('model_checkpoint_path', 'NOPATH'),
                           experiment_tag=self.experiment_tag,
                           load_pretrained=len(self.config['checkpoint']['pretrained_experiment_path']) > 0,
                           pretrained_model_path=os.path.join(_FILE_PREFIX, self.config['checkpoint']['pretrained_experiment_path']),
                           device=self.device,
                           save_frequency=self.config['checkpoint'].get('save_frequency', 1E10))

        # Logging Statistics
        tb_dir = None if 'tensorboard' not in self.config else os.path.join(self.config['tensorboard']['directory'],
                                                                            self.experiment_tag)
        self.statistics = StatisticsTracker(tb_dir=tb_dir)

        # EventHandler
        self.event_handler = EventHandler()
        self.event_handler.add(self.statistics.stats_episode_clear, Event.NEWEPISODE)
        self.event_handler.add(self.counter.new_episode, Event.NEWEPISODE)
Beispiel #26
0
from matplotlib import rcParams
import matplotlib.pyplot as plt
from utils import Saver
import numpy as np

saver = Saver()

rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Tahoma']
rcParams['font.size'] = 16

rewards_dqn_light = saver.load_from_pickle("results/rewards_1_layer_2000_light.pck")
steps_dqn_light = saver.load_from_pickle("results/steps_1_layer_2000_light.pck")

rewards_dqn_large = saver.load_from_pickle("results/rewards_1_layer_2000_large.pck")
steps_dqn_large = saver.load_from_pickle("results/steps_1_layer_2000_large.pck")

rewards_lr = saver.load_from_pickle("results/rewards_k_2.pck")
steps_lr = saver.load_from_pickle("results/steps_k_2.pck")

rewards_lr_norm = saver.load_from_pickle("results/rewards_k_2_norm.pck")
steps_lr_norm = saver.load_from_pickle("results/steps_k_2_norm.pck")

median_rewards_dqn_light = np.median(rewards_dqn_light, axis=0)
median_steps_dqn_light = np.median(steps_dqn_light, axis=0)

median_rewards_dqn_large = np.median(rewards_dqn_large, axis=0)
median_steps_dqn_large = np.median(steps_dqn_large, axis=0)

median_reward_lr = np.median(rewards_lr, axis=0)
median_steps_lr = np.median(steps_lr, axis=0)
Beispiel #27
0
import numpy as np
from matplotlib import rcParams
from utils import Saver, TestUtils

from matplotlib import rcParams

rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Tahoma']
rcParams['font.size'] = 16

saver = Saver()
test_utils = TestUtils()

q_learner = saver.load_from_pickle("results/q_learner_example.pickle")
lr_learner = saver.load_from_pickle("results/low_rank_learner_example.pickle")

steps_q_large = saver.load_from_pickle("results/exp_1_q_learning_steps.pickle")
rewards_q_large = saver.load_from_pickle(
    "results/exp_1_q_learning_rewards.pickle")
final_mean_reward_q_large = saver.load_from_pickle(
    "results/exp_1_q_learning_final_reward.pickle")

steps_q_small = saver.load_from_pickle("results/exp_2_q_learning_steps.pickle")
rewards_q_small = saver.load_from_pickle(
    "results/exp_2_q_learning_rewards.pickle")
final_mean_reward_q_small = saver.load_from_pickle(
    "results/exp_2_q_learning_final_reward.pickle")

steps_lr = saver.load_from_pickle("results/exp_1_lr_learning_steps.pickle")
rewards_lr = saver.load_from_pickle("results/exp_1_lr_learning_rewards.pickle")
final_mean_reward_lr = saver.load_from_pickle(
Beispiel #28
0
def train():
    olp = OneLinePrint()

    logger.info('start building batch data')

    vocab = Vocab(hps.vocab_file, hps.vocab_size)
    batcher = Batcher(hps.data_path, vocab, hps, hps.single_pass)

    logger.info('end building batch data')
    logger.info('vocab size: %s' % vocab.size())

    criterion = nn.NLLLoss(ignore_index=vocab.pad_id())

    model = Model(vocab, hps)
    if hps.use_cuda:
        model = model.cuda()
    if hps.restore:
        model.load_state_dict(torch.load(hps.restore))

    opt = optimzier(hps.opt, model.parameters())

    if hps.ckpt_name != '':
        saver = Saver(hps.ckpt_path, hps.ckpt_name, model)

    # for store summary
    if hps.store_summary:
        writer = SummaryWriter(comment='_' + hps.ckpt_name)

    # loss_sum = 0

    logger.info('----Start training----')
    timer = Timer()
    timer.start()
    for step in range(hps.start_step, hps.num_iters + 1):
        # # Decay learning rate
        # if step % hps.lr_decay_step == 0:
        #     olp.write(
        #         'decay learning rate to %f' % decay_lr(opt, step))

        # Forward -------------------------------------------------------------
        opt.zero_grad()

        batch = batcher.next_batch()
        (inputs, inp_lens, inp_pad, dec_inps, targets, dec_lens,
         dec_pad) = batch.expand(hps.use_cuda)

        outputs = model(dec_inps, dec_lens)  # output: (B*T*(1~3)U)
        loss = criterion(outputs.view(-1, vocab.size()), targets.view(-1))

        # Backward ------------------------------------------------------------
        loss.backward()
        # gradient clipping
        global_norm = nn.utils.clip_grad_norm(model.parameters(), hps.clip)
        opt.step()

        # loss_sum += loss.data[0]

        # Utils ---------------------------------------------------------------
        # save checkpoint
        if step % hps.ckpt_steps == 0 and hps.ckpt_name != '':
            saver.save(step, loss.data[0])
            olp.write('save checkpoint (step=%d)\n' % step)

        # print the train loss and ppl
        ppl = np.exp(loss.data[0])
        olp.write('step %s train loss: %f, ppl: %8.2f' %
                  (step, loss.data[0], ppl))
        olp.flush()

        # store summary
        if hps.store_summary and (step - 1) % hps.summary_steps == 0:
            writer.add_scalar('loss', loss, step)
            writer.add_scalar('ppl', ppl, step)
            writer.add_scalar('global_norm', global_norm, step)
            if step - 1 != 0:
                lap_time, _ = timer.lap('summary')
                steps = hps.summary_steps
                writer.add_scalar('avg time/step', lap_time / steps, step)

        # print output and target
        # if step % hps.summary_steps == 0:
        #     logger.info('\nstep:%d~%d avg loss: %f', step - hps.summary_steps,
        #                 step, loss_sum / hps.summary_steps)
        #     loss_sum = 0

    if hps.store_summary:
        writer.close()
Beispiel #29
0
    def __init__(self, args, n_workers=4, memory_ram=2, time_saving=45):
        # Getting Ray
        self.Ray = args.ray
        if self.Ray and not RAY:
            print(
                "-- ray is not available --\nNormal initialization in progress"
            )
            self.Ray = False

        # Some parameters
        self.time_save = time_saving
        self.seed = args.seed
        self.exp_steps = args.steps
        self.test_steps = args.test_steps
        self.test_freq = args.test_freq
        self.memory_start = args.memory_start
        self.update_online = args.update_online
        self.play_steps = args.play
        self.game = args.game
        self.double = args.double
        self.game_actions = args.game_actions
        self.seed = args.seed

        # Other variables to save progress
        self.time = Tocker()
        self.ckTime = Tocker()
        self.acc_test = 0
        self.mean_test = []
        self.std_test = []
        self.actor_test_episodes = 0
        self.test_episodes = 0
        name_temp = args.game
        name_temp += '_double' if self.double else ''
        name_temp += '_' + args.optimizer + '_lr_' + str(args.learning_rate)
        self.saver = Saver(name_temp)
        name_sum = os.path.join(self.saver.dir,
                                "tensorboard_{}".format(name_temp))
        self.writer = SummaryWriter(name_sum)

        #Generating the actors
        self.policy = atariDQN(args.lhist, args.game_actions, args.dropouts)

        self.memoryReplay = MemoryReplay(
            capacity=args.memory_size,
            LHist=args.lhist,
        )

        self.main_actor = ActorDQN(
            self.game,
            args.game_actions,
            self.policy,
            lHist=args.lhist,
            steps_per_update=args.update_online,
            buffer_size=args.buffer_size,
            test_steps=self.test_steps,
            start_steps=self.memory_start,
            device=DEVICE,
            seed=args.seed,
            writer=self.writer,
        )

        self.steps_to_fill_mem = math.ceil(self.memory_start /
                                           (args.buffer_size * args.lhist))

        self.n_actors = NCPUS if n_workers >= NCPUS else n_workers

        if NCPUS > 1 and self.Ray:
            # Actors with ray are created to speed up
            # the filling and testing of the buffer and net
            actors_start_steps = math.ceil(self.memory_start / self.n_actors)
            self.steps_to_fill_mem = math.ceil(actors_start_steps /
                                               (args.buffer_size * args.lhist))
            actors_test_steps = math.ceil(self.test_steps / self.n_actors)

            # ---- Initialize Ray ----
            ray.init(num_cpus=self.n_actors,
                     _memory=memory_ram * GIGA,
                     object_store_memory=400 * MEGA)

            # Buffers for the actors
            #self.buffers = [Buffer(capacity=actors_buffer_size) for _ in range(self.n_actors)]
            # Actors of the ActorDQN to fill and evaluate-only Access to their buffers only
            actor = ray.remote(ActorDQN)
            self.actors = [actor.remote(self.game,
                                        args.game_actions,
                                        self.policy,
                                        lHist=args.lhist,
                                        buffer_size=args.buffer_size,
                                        test_steps = actors_test_steps,
                                        start_steps = actors_start_steps,
                                        seed = args.seed,
                                        ray_actor = True) \
                                                for i in range(self.n_actors)]
            time.sleep(10)
            print(
                "Trainer set with Ray\nRay resources {} workers with {} GB of RAM"
                .format(self.n_actors, memory_ram))
        else:
            print(timeFormated(), "Trainer set")

        self.main_learner = LearnerDQN(
            self.policy,
            self.memoryReplay,
            args.mini_batch_size,
            learning_rate=args.learning_rate,
            update_target=args.update_target,
            device=DEVICE,
            double=args.double,
            optimizer=args.optimizer,
            seed=args.seed,
            writer=self.writer,
        )
Beispiel #30
0
def train(args, model, train_set):
    # to cuda
    model.cuda()
    model.train()

    # dataloader
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=args.batch_size,
                                               drop_last=True,
                                               shuffle=True,
                                               num_workers=int(
                                                   args.num_threads))

    # optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer,
        step_size=args.scheduler_step_size,
        gamma=args.scheduler_gamma)

    # saver
    saver = Saver(args)

    # loss function
    criterion = torch.nn.L1Loss()

    # time
    time_start_train = time.time()

    # misc
    num_batch = train_set.__len__() // args.batch_size
    counter = 0
    backup_codes(args)

    # compute paras
    params = network_paras(model)
    log = "num of parameters: {:,}".format(params)
    saver.save_log(log)
    print(log)

    # init weights
    def weights_init(m):
        if isinstance(m, torch.nn.Conv2d):
            init.kaiming_normal_(m.weight.data)

    if not args.is_finetuning:
        model.apply(weights_init)

    # start training
    print('{:=^40}'.format(' training start '))
    for epoch in range(args.epochs):
        scheduler.step(epoch)
        running_loss = 0.0
        for bidx, (_, im_lr, im_hr) in enumerate(train_loader):
            im_lr = Variable(im_lr.cuda(), volatile=False)
            im_hr = Variable(im_hr.cuda())

            # zero the parameter gradients
            model.zero_grad()

            # forward
            output = model(im_lr)

            # loss
            loss = criterion(output, im_hr)

            # backward & update
            loss.backward()
            optimizer.step()

            # accumulate running loss
            running_loss += loss.cpu().item()

            # print for every N batch
            if counter % args.step_print_loss == 0:
                # time
                acc_time = time.time() - time_start_train

                # log
                log = 'epoch: (%d/%d) [%5d/%5d], loss: %.6f | time: %s' % \
                    (epoch, args.epochs, bidx, num_batch, running_loss, str(datetime.timedelta(seconds=acc_time)))

                print(log)
                saver.save_log(log)
                running_loss = 0.0

                print_lr(optimizer)

            if counter and counter % args.step_save == 0:
                # save
                saver.save_model(model)

            # counter increment
            counter += 1

    print('{:=^40}'.format(' Finish '))
    runtime = time.time() - time_start_train
    print('training time:', str(datetime.timedelta(seconds=runtime)) + '\n\n')