Exemple #1
0
def add_target(img):
    # generate augmentations
    poses = [{
        "yaw": -45,
        "pitch": 0,
        "roll": 0
    }, {
        "yaw": -30,
        "pitch": 0,
        "roll": 0
    }, {
        "yaw": -15,
        "pitch": 0,
        "roll": 0
    }, {
        "yaw": 15,
        "pitch": 0,
        "roll": 0
    }, {
        "yaw": 30,
        "pitch": 0,
        "roll": 0
    }, {
        "yaw": 45,
        "pitch": 0,
        "roll": 0
    }]

    folder_name = str(time.time())
    os.mkdir("./tmp/" + folder_name)

    cv2.imwrite("./tmp/" + folder_name + "/" + "original.png",
                cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
    bundle_loc = "./tmp/" + folder_name + ".zip"
    prepare_batch(["./tmp/" + folder_name + "/" + "original.png"], bundle_loc)
    process_id = post_batch(poses=poses, bundle_loc=bundle_loc)

    processed_loc = "./tmp/" + folder_name + "_processed.zip"

    time.sleep(3)
    finished = False
    while not finished:
        finished = get_processed(process_id,
                                 processed_loc)  # downloads the zip
        time.sleep(5)

    unzip_and_move(processed_loc, "./tmp/" + folder_name + "/", ["henk"])

    img_list = []
    for file in sorted(os.listdir("./tmp/" + folder_name))[::-1]:
        img = cv2.cvtColor(cv2.imread("./tmp/" + folder_name + "/" + file),
                           cv2.COLOR_BGR2RGB)
        pipeline.add_to_targets(img)
        img_list.append(np_array_to_string(img))
        os.remove("./tmp/" + folder_name + "/" + file)

    os.rmdir("./tmp/" + folder_name)

    return img_list
Exemple #2
0
def main():
    # get file iterator
    source_loc = "dataset" if ARGS.source == "" else ARGS.source
    target_loc = "dataset_processed"
    bundle_loc = "batch.zip"

    processed_loc = "batch_processed.zip"

    batch_size = 25
    poses = [
        {
            "yaw": -8,
            "pitch": -8,
            "roll": 0
        },
        {
            "yaw": -1,
            "pitch": 8,
            "roll": 0
        },
        {
            "yaw": 5,
            "pitch": 5,
            "roll": 0
        }
    ]

    total_files = number_of_files(source_loc)
    counter = 0
    current_progress = 0
    iterator = get_files(source_loc, batch_size=batch_size)

    for batch_filenames in iterator:
        current_progress += len(batch_filenames)
        pretty_print(counter, "processing: {}/{}".format(current_progress, total_files))
        prepare_batch(batch_filenames, bundle_loc)
        pretty_print(counter, "posting batch request")
        process_id = post_batch(poses=poses, bundle_loc=bundle_loc)
        pretty_print(counter, "request id: {}".format(process_id))

        # do some waiting until it is finished
        finished = False
        while not finished:
            finished = get_processed(process_id, processed_loc)  # downloads the zip
            time.sleep(10)

        # pretty_print(counter, "{} is available".format(process_id))
        # get_processed(process_id, processed_loc)  # downloads the zip
        names = [name.split('/')[1].split('.')[0] for name in batch_filenames]
        # extract the zip, go into the 3duniversum structure and move the files to the target location
        unzip_and_move(processed_loc, target_loc, names=names)
        # Copy the originals
        copy_originals(batch_filenames, target_loc)
        pretty_print(counter, "batch finished")

        counter += 1
    def make_train_inputs(self, x, y):
        inputs, num_inputs = utils.prepare_batch(x)
        targets, num_targets = utils.prepare_batch(y)

        return {
            self.encoder_inputs: inputs,
            self.encoder_inputs_length: num_inputs,
            self.decoder_targets: targets,
            self.decoder_targets_length: num_targets
        }
Exemple #4
0
def evaluate_by_logic_level(args, model, iterator, print_total=False):
    from tqdm import tqdm

    vocab = model.vocab
    model.eval()

    cor_que_n = torch.zeros(5) # level: 1 ~ 4 (0: pad)
    all_que_n = torch.zeros(5) # level: 1 ~ 4 (0: pad)
    all_que_n[0] = 1 # Prevent DivisionByZero

    with torch.no_grad():
        for batch in tqdm(iterator, desc='Calculating accuracy by question logic level'):
            net_inputs, target = prepare_batch(args, batch, vocab)
            q_level_logic = net_inputs['q_level_logic']
            if net_inputs['subtitle'].nelement() == 0:
                import ipdb; ipdb.set_trace()  # XXX DEBUG

            y_pred = model(**net_inputs)   
            _, pred_idx = y_pred.max(dim=1)
            result = pred_idx == target

            for i, lev in enumerate(q_level_logic):
                if result[i]: # correct
                    cor_que_n[lev] += 1
                all_que_n[lev] += 1

    accuracys = cor_que_n / all_que_n.float()

    print('Accuracy by question logic level: ')
    for i in range(1, 5):
        print('Level %d: %.4f' % (i, accuracys[i]))

    if print_total:
        print('Total Accuracy:', cor_que_n.sum().item() / (all_que_n.sum().item() - 1))
Exemple #5
0
 def update_model(trainer, batch):
     args.pretraining = True
     model.train()
     optimizer.zero_grad()
     net_inputs, target = prepare_batch(args, batch, model.module.vocab)
     #net_inputs, target = prepare_batch(args, batch, model.vocab)
     y_pred, char_pred, mask_pred = model(**net_inputs)
     batch_size = y_pred.shape[0] 
     
     # get person ground truth and compute character loss
     n_char = 21
     visual_char = net_inputs['filtered_visual'].view(batch_size, -1, 3)[:,:,0]
     char_target = visual_char.unsqueeze(2).view(batch_size, -1)
     char_target = char_target.view(-1)
     char_pred = char_pred.view(-1, n_char)
     character_loss = nn.CrossEntropyLoss(ignore_index=-1).cuda()(char_pred, char_target)
   
     # get ground truth labels and compute MLM loss
     vocab_size = mask_pred.size(-1)
     mask_target = net_inputs['labels']
     mask_target = mask_target.view(-1)
     mask_pred = mask_pred.view(-1, vocab_size)
     mlm_loss = nn.CrossEntropyLoss(ignore_index=-1).cuda()(mask_pred, mask_target)
     
     # compute QA loss
     loss, stats = loss_fn(y_pred, target)
     
     # compute total loss
     loss = character_loss + mlm_loss
     
     loss.backward()
     optimizer.step()
     return loss.item(), stats, batch_size, y_pred.detach(), target.detach()
Exemple #6
0
def decode():
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    # Load model config
    config = load_config(FLAGS)
    print(config)
    # Load source data to decode
    test_set = TextIterator(source=config['decode_input'],
                            split_sign=config['split_sign'],
                            batch_size=config['decode_batch_size'],
                            source_dict=config['source_vocabulary'],
                            n_words_source=config['num_encoder_symbols'])

    # Load inverse dictionary used in decoding
    target_inverse_dict = load_inverse_dict(config['target_vocabulary'])

    # Initiate TF session
    with tf.Session(config=tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement,
            gpu_options=tf.GPUOptions(allow_growth=True))) as sess:

        # Reload existing checkpoint
        model = load_model(sess, config)
        try:
            print('Decoding {}..'.format(FLAGS.decode_input))
            if FLAGS.write_n_best:
                fout = [open(("%s_%d" % (FLAGS.decode_output, k)), 'w') \
                        for k in range(FLAGS.beam_width)]
            else:
                fout = [open(FLAGS.decode_output, 'w')]

            test_set.reset()

            for idx, source_seq in enumerate(test_set.next()):
                source, source_len = prepare_batch(source_seq)
                print('Get Batch', len(source), len(source_len))

                print('Source', list(source[0]), 'Source Len', source_len[0])
                # predicted_ids: GreedyDecoder; [batch_size, max_time_step, 1]
                # BeamSearchDecoder; [batch_size, max_time_step, beam_width]
                predicted_ids = model.predict(sess,
                                              encoder_inputs=source,
                                              encoder_inputs_length=source_len)
                # Write decoding results
                for k, f in reversed(list(enumerate(fout))):
                    for seq in predicted_ids:
                        result = str(seq2words(seq[:, k], target_inverse_dict))
                        f.write(result + '\n')
                        f.flush()
                    if not FLAGS.write_n_best:
                        break
                print('{}th line decoded'.format(idx *
                                                 FLAGS.decode_batch_size))

            print('Decoding terminated')
        except IOError:
            pass
        finally:
            [f.close() for f in fout]
    def make_infer_inputs(self, x):
        inputs, num_inputs = utils.prepare_batch(x)

        return {
            self.encoder_inputs: inputs,
            self.encoder_inputs_length: num_inputs
        }
def warp_images():
    print('building model')
    layers = vgg16.build_model((None, 3, 227, 227))

    batch_size = 32
    infer_dir = join('data', 'inference')
    weightsfile = join('weights', 'weights.pickle')
    with open(weightsfile, 'rb') as f:
        param_values = pickle.load(f)
    set_all_param_values(layers['trans'], param_values)

    pretrainfile = join('weights', 'vgg16.pkl')
    with open(pretrainfile, 'rb') as f:
        data = pickle.load(f)

    mean = data['mean value']

    image_fpaths = [('Cars_013b.png', 'Cars_009b.png'),
                    ('060_0071.png', '060_0000.png'),
                    ('246_0052.png', '246_0042.png')]

    print('compiling theano functions for inference')
    num_infer_idx = (len(image_fpaths) + batch_size - 1) / batch_size
    infer_func = theano_funcs.create_infer_func(layers)
    infer_iter = utils.get_batch_idx(len(image_fpaths), batch_size)

    for i, idx in tqdm(infer_iter, total=num_infer_idx, leave=False):
        Xa, Xb = utils.prepare_batch(image_fpaths[idx], mean)
        M = infer_func(Xa, Xb)
        utils.plot_samples(Xa,
                           Xb,
                           M,
                           mean,
                           prefix=join(infer_dir, 'infer_%d' % i))
    def update_model(trainer, batch):
        model.train()
        optimizer.zero_grad()

        # to gpu
        net_inputs, target = prepare_batch(args, batch)
        # ** : dictionary input to each argument
        # y_pred : dict {z_i, z_j, p_i, p_j}
        x_i = net_inputs['x_i']
        z_i, p_i = model(x_i)
        del x_i
        x_j = net_inputs['x_j']
        z_j, p_j = model(x_j)
        #y_pred = model(**net_inputs)
        del net_inputs, x_j
        y_pred = {'p_i': p_i, 'p_j': p_j, 'z_i': z_i, 'z_j': z_j}
        batch_size = target.shape[0]  # N
        loss = loss_fn(y_pred)
        #loss = loss.mean() # ddp

        #with amp.scale_loss(loss, optimizer, loss_id=0) as scaled_loss:
        #    scaled_loss.backward()
        loss.backward()
        optimizer.step()
        scheduler.step()

        return loss.item(), batch_size, y_pred.detach()
Exemple #10
0
 def _inference(evaluator, batch):
     model.eval()
     with torch.no_grad():
         net_inputs, target = prepare_batch(args, batch, model.vocab)
         y_pred = model(**net_inputs)
         batch_size = y_pred.shape[0]
         loss, stats = loss_fn(y_pred, target)
         return loss.item(), stats, batch_size, y_pred, target  # TODO: add false_answer metric
Exemple #11
0
def decode():

    # TODO: Config has to be taken from train
    config = OrderedDict(FLAGS.__flags.items())
    '''
    test_set = TextIterator(source=FLAGS.decode_input, batch_size=FLAGS.decode_batch_size,
                            source_dict=source_vocabulary, maxlen=None,
                            n_words_source=30000)
    '''

    test_set = CorpusIterator(sourcepath='../text/validation',
                              batch_size=FLAGS.decode_batch_size,
                              source_vocabulary=source_vocabulary)

    # print 'CHECK', test_set.source_dict['motivazioni']

    target_inverse_dict = utils.load_inverse_dict(target_vocabulary)

    with tf.Session(config=tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement,
            gpu_options=tf.GPUOptions(allow_growth=True))) as sess:

        model = load_model(sess, config)

        try:
            print 'Decoding {}..'.format(FLAGS.decode_input)

            fout = codecs.open(FLAGS.decode_output, 'w', 'utf8')

            for text in test_set:  # For multiple text
                print 'Text: ', text.source
                for idx, source_seq in enumerate(text):
                    source, source_len = prepare_batch(source_seq)
                    predicted_ids = model.predict(
                        sess,
                        encoder_inputs=source,
                        encoder_inputs_length=source_len)
                    for seq in predicted_ids:
                        # print utils.seq2words(seq, target_inverse_dict)
                        # print len(utils.seq2words(seq, target_inverse_dict).split())

                        # TODO: Check if it writes the sentences in the correct order
                        fout.write(
                            utils.seq2words(seq, target_inverse_dict) + '\n')

                    # if not FLAGS.write_n_best:
                    # break

                    print '  {}th line decoded'.format(
                        (idx + 1) * FLAGS.decode_batch_size)

                print 'Decoding terminated'

        except IOError:
            pass
        finally:
            fout.close()
Exemple #12
0
 def update_model(trainer, batch):
     model.train()
     optimizer.zero_grad()
     net_inputs, target = prepare_batch(args, batch, model.vocab)
     y_pred = model(**net_inputs)
     batch_size = y_pred.shape[0]
     loss, stats = loss_fn(y_pred, target)
     loss.backward()
     optimizer.step()
     return loss.item(), stats, batch_size, y_pred.detach(), target.detach()
Exemple #13
0
    def _inference(evaluator, batch):
        model.eval()
        with torch.no_grad():
            qids = batch["qid"]
            net_inputs, _ = prepare_batch(args, batch, model.vocab)
            y_pred = model(**net_inputs)
            y_pred = y_pred.argmax(dim=-1)  # + 1  # 0~4 -> 1~5

            for qid, ans_idx in zip(qids, y_pred):
                engine.answers[qid] = ans_idx.item()

            return
 def _inference(evaluator, batch):
     model.eval()
     with torch.no_grad():
         net_inputs, target = prepare_batch(args, batch, model.vocab)
         if net_inputs['subtitle'].nelement() == 0:
             import ipdb
             ipdb.set_trace()  # XXX DEBUG
         y_pred = model(**net_inputs)
         batch_size = y_pred.shape[0]
         loss, stats = loss_fn(y_pred, target)
         return loss.item(
         ), stats, batch_size, y_pred, target  # TODO: add false_answer metric
Exemple #15
0
    def _inference(evaluator, batch):
        model.eval()
        with torch.no_grad():
            qids = batch["qid"]
            net_inputs, _ = prepare_batch(args, batch, model.module.vocab)
            y_pred, char_pred, mask_pred = model(**net_inputs)
            print("Before argmax:", y_pred.size())
            y_pred = y_pred.argmax(dim=-1)  # + 1  # 0~4 -> 1~5
            print("After argmax:", y_pred.size())
            for qid, ans in zip(qids, y_pred):
                engine.answers[qid] = ans.item()

            return
    def _inference(evaluator, batch):
        nonlocal sample_count

        model.eval()
        with torch.no_grad():
            net_inputs, target = prepare_batch(args, batch, model.vocab)
            if net_inputs['subtitle'].nelement() == 0:
                import ipdb
                ipdb.set_trace()  # XXX DEBUG
            y_pred = model(**net_inputs)
            batch_size = y_pred.shape[0]
            loss, stats = loss_fn(y_pred, target)

            vocab = model.vocab
            '''
            if sample_count < 100:
                print('Batch %d: data and prediction from %d to %d' % (sample_count // batch_size, sample_count, sample_count + batch_size - 1))
                
                que = net_inputs['que']
                answers = net_inputs['answers']
                # visuals = net_inputs['visual']
                script = net_inputs['filtered_sub']
                _, pred_idx = y_pred.max(dim=1)

                for i in range(batch_size):
                    targ = target[i].item()
                    pred = pred_idx[i].item()

                    ans = ['\tans %d: ' % j + ' '.join(indices_to_words(answers[i][j], vocab)) for j in range(5)]
                    if targ != pred:
                        ans[targ] = colored(ans[targ], 'green')
                        ans[pred] = colored(ans[pred], 'red')

                    print('QA', sample_count)
                    print('\tque:', *indices_to_words(que[i], vocab))
                    print('script:', *indices_to_words(script[i], vocab))
                    print(*ans, sep='\n')
                    print('\tcorrect_idx:', targ)
                    print('\tprediction:', pred)
                    # print('\tvisual:')
                    # for vis in visuals[i]:
                    #     print('\t\tspeaker: %s, behavior: %s, emotion: %s' % visual_to_words(vis, vocab))

                    sample_count += 1

                print()
            '''

            return loss.item(
            ), stats, batch_size, y_pred, target  # TODO: add false_answer metric
    def _inference(evaluator, batch):
        model.eval()
        with torch.no_grad():
            new_qids = batch["new_qid"]
            real_ans = batch["correct_idx"]
            net_inputs, _ = prepare_batch(args, batch, model.vocab)

            y_pred = model(**net_inputs)
            y_pred = y_pred.argmax(dim=-1)  # + 1  # 0~4 -> 1~5

            for new_qid, ans, r_ans in zip(new_qids, y_pred, real_ans):
                engine.answers[new_qid] = ans.item()
                if ans.item() == r_ans:
                    engine.count += 1
            return
def interactive(args):
    args, model, iters, vocab, ckpt_available = get_model_ckpt(args)

    vid = args.get('vid', 's02e09_04_153')
    question = args.get('question', 'What does Monica place on the tablo?')

    batch = get_interactive_batch(args, vid, question, iters)
    with torch.no_grad():
        net_inputs, target = prepare_batch(args, batch, model.vocab)
        y_pred = model(**net_inputs)
        top_1 = y_pred.argmax(dim=-1).item()
        top_0_ans = net_inputs['answers'].squeeze(0)[top_1]
    ans = to_sent(vocab, top_0_ans)

    return ans
Exemple #19
0
 def train(self, epoch):
     for i in range(epoch):
         for step, batch in enumerate(self.data.train_iter):
             (x, lengths), l, l_ = prepare_batch(batch)
             gen_logit, dis_logit = self.model((x, lengths), l, l_)
             adv_loss = self._discriminator_step(dis_logit)
             recon_loss = self._generator_step(gen_logit, dis_logit,
                                               (x, lengths))
             # QUESTION: view on loss - (recon+adv) or (gen+dis)?
             if step % 100 == 0:
                 msg = 'loss at epoch {}, step {}: {:.2f}'.format(
                     i, step,self.lambda_ * adv_loss + recon_loss)
                 logger.info(msg)
         # TODO: implement evaluation(inference)
             if step % 1000 == 0:
                 self.evaluate()
Exemple #20
0
 def evaluate(self):
     self.data.valid_iter.shuffle = True
     import random
     a = random.randint(0, len(self.data.valid_iter)) # temp test
     for i, batch in enumerate(self.data.valid_iter):
         if i != a: continue
         (x, lengths), l, l_ = prepare_batch(batch)
         generated = self.model((x, lengths), l, l_, is_gen=True)
         print('=' * 50)
         print('original \t\t -> \t\t changed')
         for idx in random.sample(range(lengths.size(0)), 5):
             ori = reverse(x, self.data.vocab)[idx]
             chg = reverse(generated[0], self.data.vocab)[idx]
             print(' '.join(ori))
             print('\t\t->', ' '.join(chg))
         print('=' * 50)
         return
Exemple #21
0
def infer_batch(seqs, tfserver):
    """
    Returns 3d then 8d
    """
    seq_arr = prepare_batch(seqs)

    # reshape for request
    seq_arr = seq_arr.astype(int)
    seq_arr = seq_arr.tolist()
    seq_arr = [s for s in seq_arr]

    # send data to tf server
    payload = {"inputs": {"input_seq_batch": seq_arr}}
    r = requests.post(tfserver, json=payload)
    pred = json.loads(r.content.decode('utf-8'))
    pred = infer_sgi.generate(pred['outputs'])

    return pred
Exemple #22
0
    def check_dataloader(self, **kwargs):
        from dataloader.dataset_multichoice import modes
        from utils import prepare_batch
        from tqdm import tqdm

        args = self._default_args(**kwargs)
        iters, vocab = get_iterator(args)

        train_iter_test = next(iter(iters['train']))
        for key, value in train_iter_test.items():
            if isinstance(value, torch.Tensor):
                print(key, value.shape)
            else:
                print(key, value)

        for mode in modes:
            print('Test loading %s data' % mode)
            for batch in tqdm(iters[mode]):
                # import ipdb; ipdb.set_trace()  # XXX DEBUG
                batch = prepare_batch(args, batch, vocab)
def run_rep_exp(path,
                model,
                loss,
                tasks,
                device,
                ways,
                shots,
                rep_params=default_params,
                features=None):
    rep_path = path + '/rep_exp'
    if os.path.exists(rep_path):
        ans = input('Overriding previous results! Are you sure? (y/n) ')
        if ans == 'n':
            exit(0)
    else:
        os.mkdir(rep_path)

    # Ignore labels
    sanity_batch, _ = tasks.sample()
    sanity_batch = sanity_batch.to(device)

    # An instance of the model before adaptation
    init_model = model.clone()
    adapt_model = model.clone()

    init_rep_sanity = get_rep_from_batch(init_model, sanity_batch)

    # column 0: adaptation results, column 1: init results
    acc_results = np.zeros((rep_params['n_tasks'], 2))
    # Create a dictionary of layer : results for each metric (e.g cca_results["0"] = [0.3, 0.2, 0.1])
    cca_results = {str(layer): [] for layer in rep_params['layers']}
    cka_l_results = {str(layer): [] for layer in rep_params['layers']}
    cka_k_results = {str(layer): [] for layer in rep_params['layers']}

    for task in tqdm(range(rep_params['n_tasks']), desc="Tasks"):

        batch = tasks.sample()

        adapt_d, adapt_l, eval_d, eval_l = prepare_batch(
            batch, shots, ways, device)

        # Adapt the model
        for step in range(rep_params['adapt_steps']):
            train_error = loss(adapt_model(adapt_d), adapt_l)
            train_error /= len(adapt_d)
            adapt_model.adapt(train_error)

            # Evaluate the adapted model
            a_predictions = adapt_model(eval_d)
            a_valid_acc = accuracy(a_predictions, eval_l)

            # Evaluate the init model
            i_predictions = init_model(eval_d)
            i_valid_acc = accuracy(i_predictions, eval_l)

            acc_results[task, 0] = a_valid_acc
            acc_results[task, 1] = i_valid_acc

        # Get their representations for every layer
        for layer in cca_results.keys():
            adapted_rep_i = get_rep_from_batch(adapt_model, adapt_d,
                                               int(layer))
            init_rep_i = get_rep_from_batch(init_model, adapt_d, int(layer))

            cca_results[layer].append(
                get_cca_similarity(adapted_rep_i.T,
                                   init_rep_i.T,
                                   epsilon=1e-10)[1])
            # NOTE: Currently CKA takes too long to compute so leave it out
            # cka_l_results[layer].append(get_linear_CKA(adapted_rep_i, init_rep_i))
            # cka_k_results[layer].append(get_kernel_CKA(adapted_rep_i, init_rep_i))

    # Average and calculate standard deviation
    cca_mean = []
    cca_std = []
    for layer, values in cca_results.items():
        mean = statistics.mean(values)
        std = statistics.stdev(values)
        cca_mean.append(mean)
        cca_std.append(std)

    cca_plot = dict(title="CCA Evolution",
                    x_legend="Inner loop steps",
                    y_legend="CCA similarity",
                    y_axis=cca_results,
                    path=path + "/inner_CCA_evolution.png")
    cka_l_plot = dict(title="Linear CKA Evolution",
                      x_legend="Inner loop steps",
                      y_legend="CKA similarity",
                      y_axis=cka_l_results,
                      path=path + "/inner_Linear_CKA_evolution.png")
    cka_k_plot = dict(title="Kernel CKA Evolution",
                      x_legend="Inner loop steps",
                      y_legend="CKA similarity",
                      y_axis=cka_k_results,
                      path=path + "/inner_Kernel_CKA_evolution.png")
    # plot_dict(cca_plot, save=True)
    # plot_dict(cka_l_plot, save=True)
    # plot_dict(cka_k_plot, save=True)

    with open(rep_path + '/rep_params.json', 'w') as fp:
        json.dump(rep_params, fp, sort_keys=True, indent=4)

    with open(rep_path + '/cca_results.json', 'w') as fp:
        json.dump(cca_results, fp, sort_keys=True, indent=4)

    x_axis = []
    for i in cca_results.keys():
        if int(i) > 0:
            x_axis.append(f'Conv{i}')
        else:
            x_axis.append('Head')
    cca_plot_2 = dict(title="Layer-wise changes before / after adaptation",
                      x_legend="Layer",
                      y_legend="CCA similarity",
                      x_axis=x_axis,
                      y_axis=cca_mean,
                      std=cca_std)

    cka_plot_2 = dict(title="CKA Evolution layer-wise",
                      x_legend="Layer",
                      y_legend="CKA similarity",
                      x_axis=list(cka_l_results.keys()),
                      y_axis=list(cka_l_results.values()))

    plot_dict_explicit(cca_plot_2)
    return cca_results
Exemple #24
0
def main():
    # parse options
    parser = TrainOptions()
    opts = parser.parse()

    # data loader
    print('--- load parameter ---')
    # outer_iter = opts.outer_iter
    # fade_iter = max(1.0, float(outer_iter / 2))
    epochs = opts.epoch
    batchsize = opts.batchsize
    # datasize = opts.datasize
    # datarange = opts.datarange
    augementratio = opts.augementratio
    centercropratio = opts.centercropratio

    # model
    print('--- create model ---')
    tetGAN = TETGAN(gpu=(opts.gpu != 0))
    if opts.gpu != 0:
        tetGAN.cuda()
    tetGAN.init_networks(weights_init)

    num_params = 0
    for param in tetGAN.parameters():
        num_params += param.numel()
    print('Total number of parameters in TET-GAN: %.3f M' % (num_params / 1e6))

    print('--- training ---')
    texture_class = 'base_gray_texture' in opts.dataset_class or 'skeleton_gray_texture' in opts.dataset_class
    if texture_class:
        tetGAN.load_state_dict(torch.load(opts.model))
        dataset_path = os.path.join(opts.train_path, opts.dataset_class,
                                    'style')
        val_dataset_path = os.path.join(opts.train_path, opts.dataset_class,
                                        'val')
        if 'base_gray_texture' in opts.dataset_class:
            few_size = 6
            batchsize = 2
            epochs = 1500
        elif 'skeleton_gray_texture' in opts.dataset_class:
            few_size = 30
            batchsize = 10
            epochs = 300
        fnames = load_trainset_batchfnames_dualnet(dataset_path,
                                                   batchsize,
                                                   few_size=few_size)
        val_fnames = sorted(os.listdir(val_dataset_path))
        style_fnames = sorted(os.listdir(dataset_path)[:few_size])
    else:
        dataset_path = os.path.join(opts.train_path, opts.dataset_class,
                                    'train')
        fnames = load_trainset_batchfnames_dualnet(dataset_path, batchsize)

    tetGAN.train()

    train_size = os.listdir(dataset_path)
    print('List of %d styles:' % (len(train_size)))

    result_dir = os.path.join(opts.result_dir, opts.dataset_class)
    if not os.path.exists(result_dir):
        os.mkdir(result_dir)

    for epoch in range(epochs):
        for idx, fname in enumerate(fnames):
            x, y_real, y = prepare_batch(fname, 1, 1, centercropratio,
                                         augementratio, opts.gpu)
            losses = tetGAN.one_pass(x[0], None, y[0], None, y_real[0], None,
                                     1, 0)
            if (idx + 1) % 100 == 0:
                print('Epoch [%d/%d], Iter [%d/%d]' %
                      (epoch + 1, epochs, idx + 1, len(fnames)))
                print(
                    'Lrec: %.3f, Ldadv: %.3f, Ldesty: %.3f, Lsadv: %.3f, Lsty: %.3f'
                    % (losses[0], losses[1], losses[2], losses[3], losses[4]))
        if texture_class and ((epoch + 1) % (epochs / 20)) == 0:
            outname = 'save/' + 'val_epoch' + str(
                epoch +
                1) + '_' + opts.dataset_class + '_' + opts.save_model_name
            print('--- save model Epoch [%d/%d] ---' % (epoch + 1, epochs))
            torch.save(tetGAN.state_dict(), outname)

            print('--- validating model [%d/%d] ---' % (epoch + 1, epochs))
            for val_idx, val_fname in enumerate(val_fnames):
                v_fname = os.path.join(val_dataset_path, val_fname)
                random.shuffle(style_fnames)
                sty_fname = style_fnames[0]
                s_fname = os.path.join(dataset_path, sty_fname)
                with torch.no_grad():
                    val_content = load_image_dualnet(v_fname, load_type=1)
                    val_sty = load_image_dualnet(s_fname, load_type=0)
                    if opts.gpu != 0:
                        val_content = val_content.cuda()
                        val_sty = val_sty.cuda()
                    result = tetGAN(val_content, val_sty)
                    if opts.gpu != 0:
                        result = to_data(result)
                    result_filename = os.path.join(
                        result_dir,
                        str(epoch) + '_' + val_fname)
                    print(result_filename)
                    save_image(result[0], result_filename)
        elif not texture_class and ((epoch + 1) % 2) == 0:
            outname = 'save/' + 'epoch' + str(epoch +
                                              1) + '_' + opts.save_model_name
            print('--- save model ---')
            torch.save(tetGAN.state_dict(), outname)
Exemple #25
0
def main():
    # parse options
    parser = FinetuneOptions()
    opts = parser.parse()

    # data loader
    print('--- load parameter ---')
    outer_iter = opts.outer_iter
    epochs = opts.epoch
    batchsize = opts.batchsize
    datasize = opts.datasize
    stylename = opts.style_name

    # model
    print('--- create model ---')
    tetGAN = TETGAN(gpu=(opts.gpu != 0))
    if opts.gpu != 0:
        tetGAN.cuda()
    tetGAN.load_state_dict(torch.load(opts.load_model_name))
    tetGAN.train()

    print('--- training ---')
    # supervised one shot learning
    if opts.supervise == 1:
        for i in range(outer_iter):
            fnames = load_oneshot_batchfnames(stylename, batchsize, datasize)
            for epoch in range(epochs):
                for fname in fnames:
                    x, y_real, y = prepare_batch(fname, 3, 0, 0, 0, opts.gpu)
                    losses = tetGAN.one_pass(x[0], None, y[0], None, y_real[0],
                                             None, 3, 0)
                print('Iter[%d/%d], Epoch [%d/%d]' %
                      (i + 1, outer_iter, epoch + 1, epochs))
                print(
                    'Lrec: %.3f, Ldadv: %.3f, Ldesty: %.3f, Lsadv: %.3f, Lsty: %.3f'
                    % (losses[0], losses[1], losses[2], losses[3], losses[4]))
    # unsupervised one shot learning
    else:
        for i in range(outer_iter):
            fnames = load_oneshot_batchfnames(stylename, batchsize, datasize)
            for epoch in range(epochs):
                for fname in fnames:
                    # no ground truth x provided
                    _, y_real, _ = prepare_batch(fname, 3, 0, 0, 0, opts.gpu)
                    Lsrec = tetGAN.update_style_autoencoder(y_real[0])
                for fname in fnames:
                    # no ground truth x provided
                    _, y_real, y = prepare_batch(fname, 3, 0, 0, 0, opts.gpu)
                    with torch.no_grad():
                        x_auxiliary = tetGAN.desty_forward(y_real[0])
                    losses = tetGAN.one_pass(x_auxiliary, None, y[0], None,
                                             y_real[0], None, 3, 0)
                print('Iter[%d/%d], Epoch [%d/%d]' %
                      (i + 1, outer_iter, epoch + 1, epochs))
                print(
                    'Lrec: %.3f, Ldadv: %.3f, Ldesty: %.3f, Lsadv: %.3f, Lsty: %.3f, Lsrec: %.3f'
                    % (losses[0], losses[1], losses[2], losses[3], losses[4],
                       Lsrec))

    print('--- save ---')
    torch.save(tetGAN.state_dict(), opts.save_model_name)
Exemple #26
0
def run_cl_exp(path,
               maml,
               loss,
               tasks,
               device,
               ways,
               shots,
               cl_params=default_params,
               features=None):
    cl_path = path + '/cl_exp'
    if os.path.exists(cl_path):
        ans = input('Overriding previous results! Are you sure? (y/n)')
        if ans == 'n':
            exit(0)
    else:
        os.mkdir(cl_path)

    # Randomly select some batches for training and evaluation
    tasks_pool = []
    for task_i in range(cl_params['n_tasks']):
        batch = tasks.sample()
        adapt_d, adapt_l, eval_d, eval_l = prepare_batch(batch,
                                                         shots,
                                                         ways,
                                                         device,
                                                         features=features)

        task = {'adapt': (adapt_d, adapt_l)}

        if setting == 1:
            task['eval'] = (adapt_d, adapt_l)
        else:
            task['eval'] = (eval_d, eval_l)

        tasks_pool.append(task)

    # Matrix R NxN of accuracies in tasks j after trained on a tasks i (x_axis = test tasks, y_axis = train tasks)
    acc_matrix = np.zeros((cl_params['n_tasks'], cl_params['n_tasks']))

    # Training loop
    for i, task_i in enumerate(tasks_pool):
        adapt_i_data, adapt_i_labels = task_i['adapt']
        adapt_i_data, adapt_i_labels = adapt_i_data.to(
            device), adapt_i_labels.to(device)

        learner = maml.clone()
        # Adapt to task i
        for step in range(cl_params['adapt_steps']):
            train_error = loss(learner(adapt_i_data), adapt_i_labels)
            learner.adapt(train_error)

        # Evaluation loop
        for j, task_j in enumerate(tasks_pool):
            eval_j_data, eval_j_labels = task_j['eval']
            eval_j_data, eval_j_labels = eval_j_data.to(
                device), eval_j_labels.to(device)

            predictions = learner(eval_j_data)
            valid_accuracy_j = accuracy(predictions, eval_j_labels)

            acc_matrix[
                i,
                j] = valid_accuracy_j  # Accuracy on task j after trained on task i

    cl_res = calc_cl_metrics(acc_matrix)

    save_acc_matrix(cl_path, acc_matrix)
    with open(cl_path + '/cl_params.json', 'w') as fp:
        json.dump(cl_params, fp, sort_keys=True, indent=4)

    with open(cl_path + '/cl_res.json', 'w') as fp:
        json.dump(cl_res, fp, sort_keys=True, indent=4)

    return acc_matrix, cl_res
Exemple #27
0
def main():
    # parse options
    parser = TrainOptions()
    opts = parser.parse()

    # data loader
    print('--- load parameter ---')
    outer_iter = opts.outer_iter
    fade_iter = max(1.0, float(outer_iter / 2))
    epochs = opts.epoch
    batchsize = opts.batchsize
    datasize = opts.datasize
    datarange = opts.datarange
    augementratio = opts.augementratio
    centercropratio = opts.centercropratio

    # model
    print('--- create model ---')
    tetGAN = TETGAN(gpu=(opts.gpu != 0))
    if opts.gpu != 0:
        tetGAN.cuda()
    tetGAN.init_networks(weights_init)
    tetGAN.train()

    print('--- training ---')
    stylenames = os.listdir(opts.train_path)
    print('List of %d styles:' % (len(stylenames)), *stylenames, sep=' ')

    if opts.progressive == 1:
        # proressive training. From level1 64*64, to level2 128*128, to level3 256*256
        # level 1
        for i in range(outer_iter):
            jitter = min(1.0, i / fade_iter)
            fnames = load_trainset_batchfnames(opts.train_path, batchsize * 4,
                                               datarange, datasize * 2)
            for epoch in range(epochs):
                for fname in fnames:
                    x, y_real, y = prepare_batch(fname, 1, jitter,
                                                 centercropratio,
                                                 augementratio, opts.gpu)
                    losses = tetGAN.one_pass(x[0], None, y[0], None, y_real[0],
                                             None, 1, None)
                print('Level1, Iter[%d/%d], Epoch [%d/%d]' %
                      (i + 1, outer_iter, epoch + 1, epochs))
                print(
                    'Lrec: %.3f, Ldadv: %.3f, Ldesty: %.3f, Lsadv: %.3f, Lsty: %.3f'
                    % (losses[0], losses[1], losses[2], losses[3], losses[4]))
        # level 2
        for i in range(outer_iter):
            w = max(0.0, 1 - i / fade_iter)
            fnames = load_trainset_batchfnames(opts.train_path, batchsize * 2,
                                               datarange, datasize * 2)
            for epoch in range(epochs):
                for fname in fnames:
                    x, y_real, y = prepare_batch(fname, 2, 1, centercropratio,
                                                 augementratio, opts.gpu)
                    losses = tetGAN.one_pass(x[0], x[1], y[0], y[1], y_real[0],
                                             y_real[1], 2, w)
                print('Level2, Iter[%d/%d], Epoch [%d/%d]' %
                      (i + 1, outer_iter, epoch + 1, epochs))
                print(
                    'Lrec: %.3f, Ldadv: %.3f, Ldesty: %.3f, Lsadv: %.3f, Lsty: %.3f'
                    % (losses[0], losses[1], losses[2], losses[3], losses[4]))
        # level 3
        for i in range(outer_iter):
            w = max(0.0, 1 - i / fade_iter)
            fnames = load_trainset_batchfnames(opts.train_path, batchsize,
                                               datarange, datasize)
            for epoch in range(epochs):
                for fname in fnames:
                    x, y_real, y = prepare_batch(fname, 3, 1, centercropratio,
                                                 augementratio, opts.gpu)
                    losses = tetGAN.one_pass(x[0], x[1], y[0], y[1], y_real[0],
                                             y_real[1], 3, w)
                print('Level3, Iter[%d/%d], Epoch [%d/%d]' %
                      (i + 1, outer_iter, epoch + 1, epochs))
                print(
                    'Lrec: %.3f, Ldadv: %.3f, Ldesty: %.3f, Lsadv: %.3f, Lsty: %.3f'
                    % (losses[0], losses[1], losses[2], losses[3], losses[4]))
    else:
        # directly train on level3 256*256
        for i in range(outer_iter):
            fnames = load_trainset_batchfnames(opts.train_path, batchsize,
                                               datarange, datasize)
            for epoch in range(epochs):
                for fname in fnames:
                    x, y_real, y = prepare_batch(fname, 3, 1, centercropratio,
                                                 augementratio, opts.gpu)
                    losses = tetGAN.one_pass(x[0], None, y[0], None, y_real[0],
                                             None, 3, 0)
                print('Iter[%d/%d], Epoch [%d/%d]' %
                      (i + 1, outer_iter, epoch + 1, epochs))
                print(
                    'Lrec: %.3f, Ldadv: %.3f, Ldesty: %.3f, Lsadv: %.3f, Lsty: %.3f'
                    % (losses[0], losses[1], losses[2], losses[3], losses[4]))

    print('--- save ---')
    torch.save(tetGAN.state_dict(), opts.save_model_name)
Exemple #28
0
    def _inference(evaluator, batch):
        nonlocal sample_count

        model.eval()
        with torch.no_grad():
            net_inputs, target = prepare_batch(args, batch, model.module.vocab)
            #net_inputs, target = prepare_batch(args, batch, model.vocab)
            if net_inputs['subtitle'].nelement() == 0:
                import ipdb
                ipdb.set_trace()  # XXX DEBUG
            y_pred, char_pred, mask_pred = model(**net_inputs)
            batch_size = y_pred.shape[0]
            """
            # get person ground truth and compute character loss
            n_char = 21
            visual_char = net_inputs['filtered_visual'].view(batch_size, -1, 3)[:,:,0]
            char_target = visual_char.unsqueeze(2).view(batch_size, -1)
            char_target = char_target.view(-1)
            char_pred = char_pred.view(-1, n_char)
            character_loss = nn.CrossEntropyLoss().cuda()(char_pred, char_target)

            # get ground truth labels and compute MLM loss
            vocab_size = mask_pred.size(-1)
            mask_target = net_inputs['labels']
            mask_target = mask_target.view(-1)
            mask_pred = mask_pred.view(-1, vocab_size)
            mlm_loss = nn.CrossEntropyLoss(ignore_index=-1).cuda()(mask_pred, mask_target)
            """

            # compute QA loss
            loss, stats = loss_fn(y_pred, target)

            # compute total loss
            #loss = loss + character_loss + mlm_loss
            #vocab = model.vocab
            '''
            if sample_count < 100:
                print('Batch %d: data and prediction from %d to %d' % (sample_count // batch_size, sample_count, sample_count + batch_size - 1))
                
                que = net_inputs['que']
                answers = net_inputs['answers']
                # visuals = net_inputs['visual']
                script = net_inputs['filtered_sub']
                _, pred_idx = y_pred.max(dim=1)

                for i in range(batch_size):
                    targ = target[i].item()
                    pred = pred_idx[i].item()

                    ans = ['\tans %d: ' % j + ' '.join(indices_to_words(answers[i][j], vocab)) for j in range(5)]
                    if targ != pred:
                        ans[targ] = colored(ans[targ], 'green')
                        ans[pred] = colored(ans[pred], 'red')

                    print('QA', sample_count)
                    print('\tque:', *indices_to_words(que[i], vocab))
                    print('script:', *indices_to_words(script[i], vocab))
                    print(*ans, sep='\n')
                    print('\tcorrect_idx:', targ)
                    print('\tprediction:', pred)
                    # print('\tvisual:')
                    # for vis in visuals[i]:
                    #     print('\t\tspeaker: %s, behavior: %s, emotion: %s' % visual_to_words(vis, vocab))

                    sample_count += 1

                print()
            '''

            return loss.item(
            ), stats, batch_size, y_pred, target  # TODO: add false_answer metric
Exemple #29
0
def train_loop(
    run_id,
    dataset_dir,
    ckpt_run_dir,
    output_dir,
    validation_only=False,
    use_cuda=False,
    light_target=False,
):
    """Train loop"""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    rank = dist.get_rank()
    world_size = dist.get_world_size()

    train_epochs = 8
    train_min_len, train_max_len = 0, 75
    val_min_len, val_max_len = 0, 150
    math_mode = "fp16"  # One of `fp16`, `fp32`
    lang = ("en", "de")

    # Training
    train_global_batch_size = 2048  # Global batch size
    max_bs = 128  # Max batch size for used hardware
    update_freq = int(max(1, train_global_batch_size // (max_bs * world_size)))
    train_batch_size = int(train_global_batch_size // (world_size * update_freq))
    val_batch_size = 64

    # Model attributes
    model_args = {
        "hidden_size": 1024,
        "num_layers": 4,
        "dropout": 0.2,
        "share_embedding": True,
        "fusion": True,
    }

    # Criterion
    criterion_args = {"smoothing": 0.1, "fast_xentropy": True}

    # Loss scaling
    loss_scaling = {"init_scale": 1024, "upscale_interval": 128}

    # Optimizer
    optimizer_args = {
        "lr": 2e-3,
        "grad_clip": 5.0,
    }

    # Scheduler
    scheduler_args = {
        "warmup_steps": 200,
        "remain_steps": 0.4,
        "decay_interval": 0.05,
        "decay_steps": 4,
        "decay_factor": 0.5,
    }

    # Translator
    translator_args = {
        "beam_size": 5,
        "len_norm_factor": 0.6,
        "cov_penalty_factor": 0.1,
        "len_norm_const": 5.0,
        "max_seq_len": 150,
    }

    # Build train/val datsets
    train_set = WMT16Dataset(
        dataset_dir,
        math_precision=math_mode,
        lang=lang,
        train=True,
        download=True,
        preprocessed=True,
        min_len=train_min_len,
        max_len=train_max_len,
    )
    train_set.prepare()
    val_set = WMT16Dataset(
        dataset_dir,
        math_precision=math_mode,
        lang=lang,
        validation=True,
        download=False,
        min_len=val_min_len,
        max_len=val_max_len,
        sort=True,
    )

    tokenizer = train_set.tokenizer

    # Build model
    model = GNMT(vocab_size=train_set.vocab_size, **model_args)

    # Build loss function
    criterion = LabelSmoothing(padding_idx=wmt16_config.PAD, **criterion_args)

    # Bilingual Evaluation Understudy Score
    metrics = [BLEUScore()]

    # Partition data
    train_set = partition_dataset_by_rank(train_set, rank, world_size)
    val_set = partition_dataset_by_rank(val_set, rank, world_size)

    collate_fn = build_collate_fn(sort=True)
    train_loader = DataLoader(
        train_set,
        batch_size=train_batch_size,
        collate_fn=collate_fn,
        num_workers=2,
        pin_memory=True,
        drop_last=False,
        shuffle=True,
    )

    val_loader = DataLoader(
        val_set,
        batch_size=val_batch_size,
        collate_fn=collate_fn,
        num_workers=2,
        pin_memory=True,
        drop_last=False,
    )

    validate_every = update_freq * round(
        len(train_loader) * 0.30 / update_freq
    )  # Validate every 30%

    # Build optimizer & scheduler
    total_train_iters = (len(train_loader) // update_freq) * train_epochs

    print("Number of batches per epoch {}".format(len(train_loader)))
    print("Train iterations per epoch {}".format(total_train_iters / train_epochs))

    if use_cuda:
        model = model.cuda()
        criterion = criterion.cuda()

    use_horovod = math_mode == "fp16" and dist.get_backend() == dist.Backend.MPI

    if use_horovod:
        hvd.init()
        logger.info("Using horovod rank={}".format(hvd.rank()))
        tensor = torch.tensor([1])
        res = hvd.allreduce(tensor, op=hvd.Sum)
        assert res[0] == world_size

    fp_optimizer, optimizer, model = build_optimizer(
        model=model,
        math=math_mode,
        loss_scaling=loss_scaling,
        use_cuda=use_cuda,
        use_horovod=use_horovod,
        **optimizer_args
    )

    # Create a learning rate scheduler for an optimizer
    scheduler = ExponentialWarmupMultiStepLR(
        optimizer, total_train_iters, **scheduler_args
    )

    # Translator
    translator = Translator(model=model, trg_tokenizer=tokenizer, **translator_args)

    checkpointer = Checkpointer(
        ckpt_run_dir=ckpt_run_dir, rank=rank, freq=CheckpointFreq.BEST
    )

    if not validation_only:

        if light_target:
            goal = task4_time_to_bleu_goal(20)
        else:
            goal = task4_time_to_bleu_goal(24)

        num_batches_per_device_train = len(train_loader)
        tracker = Tracker(metrics, run_id, rank, goal=goal)

        dist.barrier()
        tracker.start()

        for epoch in range(0, train_epochs):
            if torch.cuda.is_available():
                torch.cuda.empty_cache()

            model.train()
            tracker.train()
            for batch_idx, (data, target) in enumerate(train_loader):
                tracker.batch_start()
                data, target = prepare_batch(data, target, use_cuda=use_cuda)
                tracker.record_batch_load()

                is_last = batch_idx == len(train_loader)
                update = (batch_idx % update_freq) == update_freq - 1
                init = (batch_idx % update_freq) == 0

                # Clear gradients in the optimizer.
                if init:
                    fp_optimizer.zero_grad()
                    tracker.record_batch_init()

                # Compute the output
                output = compute_model_output(model, data, target)
                tracker.record_batch_fwd_pass()

                # Compute the loss
                loss, loss_per_token = compute_loss(
                    data, target, output, criterion, update_freq
                )
                tracker.record_batch_comp_loss()
                # Backprop
                fp_optimizer.backward_loss(loss)
                tracker.record_batch_backprop()

                # Opt step
                if update or is_last:
                    # For this task, simply sum all gradients
                    updated = fp_optimizer.step(tracker=tracker, denom=1)

                    # Learning rate scheduler
                    if updated:
                        scheduler.step()

                tracker.batch_end()

                record_train_batch_stats(
                    batch_idx=batch_idx,
                    loss=loss_per_token,
                    output=target[0],  # Use target just for the size
                    metric_results={},
                    tracker=tracker,
                    num_batches_per_device_train=num_batches_per_device_train,
                )

                # Validation during training
                if (batch_idx + 1) % validate_every == 0:
                    if torch.cuda.is_available():
                        torch.cuda.empty_cache()

                    metrics_values, loss = validation_round(
                        val_loader,
                        metrics,
                        model,
                        criterion,
                        update_freq,
                        translator,
                        tracker=tracker,
                        use_cuda=use_cuda,
                    )

                    record_validation_stats(metrics_values, loss, tracker, rank)
                    if tracker.goal_reached:
                        break

                    model.train()
                    tracker.train()

            if torch.cuda.is_available():
                torch.cuda.empty_cache()

            metrics_values, loss = validation_round(
                val_loader,
                metrics,
                model,
                criterion,
                update_freq,
                translator,
                use_cuda=use_cuda,
            )

            is_best = record_validation_stats(metrics_values, loss, tracker, rank)

            checkpointer.save(
                tracker,
                model,
                fp_optimizer.optimizer,
                scheduler,
                tracker.current_epoch,
                is_best,
            )

            tracker.epoch_end()

            if tracker.goal_reached:
                print("Goal Reached!")
                dist.barrier()
                time.sleep(10)
                return
    else:
        cecf = CheckpointsEvaluationControlFlow(
            ckpt_dir=ckpt_run_dir,
            rank=rank,
            world_size=world_size,
            checkpointer=checkpointer,
            model=model,
            epochs=train_epochs,
            loss_function=criterion,
            metrics=metrics,
            use_cuda=use_cuda,
            dtype="fp32",
            max_batch_per_epoch=None,
        )

        train_stats = cecf.evaluate_by_epochs(train_loader)
        with open(os.path.join(output_dir, "train_stats.json"), "w") as f:
            json.dump(train_stats, f)

        val_stats = cecf.evaluate_by_epochs(val_loader)
        with open(os.path.join(output_dir, "val_stats.json"), "w") as f:
            json.dump(val_stats, f)
Exemple #30
0
def optimiser(idx, shared_model, SIMULATOR, args, lock):
    try:
        writer = SummaryWriter('runs/{}/optimiser:{:02}'.format(datetime.now().strftime("%d|%m_%H|%M"), idx))
        logging.basicConfig(filename='logs/optimiser:{:02}.log'.format(idx),
                            filemode='w',
                            format='%(message)s',
                            level=logging.DEBUG)

        sgd = t.optim.SGD(params=shared_model.parameters(), lr=args.lr)

        # allocate a device
        n_gpu = t.cuda.device_count()
        if n_gpu > 0:
            Device.set_device(idx % n_gpu)

        q_network = deepcopy(shared_model)
        q_network.to(Device.get_device())
        q_network.train()

        target_network = deepcopy(q_network)
        target_network.to(Device.get_device())
        target_network.eval()

        buffer = deque(maxlen=args.buffer_size)

        simulator = SIMULATOR()
        for itr in tqdm(count(), position=idx, desc='optimiser:{:02}'.format(idx)):

            state = simulator.reset()
            episode_reward = 0
            for e in count():
                if np.random.RandomState().rand() < max(args.eps ** itr, args.min_eps):
                    action = np.random.RandomState().randint(simulator.n_actions())
                else:
                    action = q_network(as_tensor(state)).argmax().item()

                next_state, reward, terminal = simulator.step(action)

                buffer.append(transition_to_tensor(state, action, reward, next_state, terminal))

                episode_reward += reward
                state = next_state

                # Sample a data point from dataset
                batch = prepare_batch(buffer, args.batch_size)

                # Sync local model with shared model
                q_network.load_state_dict(shared_model.state_dict())

                # Calculate loss for the batch
                loss = calculate_loss(q_network, target_network, batch, args)

                # Optimise for the batch
                loss = optimise_model(shared_model, q_network, loss, sgd, args, lock)

                # Log the results
                logging.debug('Batch loss: {:.2f}'.format(loss))
                writer.add_scalar('batch/loss', loss, e)

                if terminal:
                    break

            logging.debug('Episode reward: {:.2f}'.format(episode_reward))
            writer.add_scalar('episode_reward', episode_reward, itr)
            writer.close()

            if itr % args.target_update_frequency == 0:
                target_network.load_state_dict(q_network.state_dict())

    except KeyboardInterrupt:
        print('exiting optimiser:{:02}'.format(idx))