Esempio n. 1
0
File: tests.py Progetto: gchers/cpy
 def test_CP_kde_unlabelled(self):
     utils.set_seed(self.seed)
     N = utils.N
     K = utils.K
     X, x_test = utils.generate_unlabelled_dataset(N, K)
     pred = self.cp_kde.predict_unlabelled(x_test, X, self.epsilon)
     self.assertTrue(pred, 'CP with KDE NCM mispredicted an object in unlabelled setting.')
Esempio n. 2
0
File: tests.py Progetto: gchers/cpy
 def test_CP_kde_labelled(self):
     utils.set_seed(self.seed)
     N = utils.N
     K = utils.K
     X, Y, x_test, y_test = utils.generate_labelled_dataset(N, K)
     pred = self.cp_kde.predict_labelled(x_test, X, Y, self.epsilon)
     self.assertIn(y_test, pred,
                   'CP with KDE NCM mispredicted an object. Objects had dimension {}'.format(K))
Esempio n. 3
0
File: tests.py Progetto: gchers/cpy
 def test_CP_kde_labelled_1(self):
     """Consider objects with dimension 1.
     """
     utils.set_seed(self.seed)
     N = utils.N
     K = 1
     X, Y, x_test, y_test = utils.generate_labelled_dataset(N, K)
     pred = self.cp_kde.predict_labelled(x_test, X, Y, self.epsilon)
     self.assertIn(y_test, pred,
                   'CP with KDE NCM mispredicted an object. Objects had dimension 1.')
Esempio n. 4
0
def main():
    parser = argparse.ArgumentParser(description='Negotiator')
    parser.add_argument('--dataset', type=str, default='./data/negotiate/val.txt',
        help='location of the dataset')
    parser.add_argument('--model_file', type=str,
        help='model file')
    parser.add_argument('--smart_ai', action='store_true', default=False,
        help='to use rollouts')
    parser.add_argument('--seed', type=int, default=1,
        help='random seed')
    parser.add_argument('--temperature', type=float, default=1.0,
        help='temperature')
    parser.add_argument('--domain', type=str, default='object_division',
        help='domain for the dialogue')
    parser.add_argument('--log_file', type=str, default='',
        help='log file')
    args = parser.parse_args()

    utils.set_seed(args.seed)

    model = utils.load_model(args.model_file)
    ai = LstmAgent(model, args)
    logger = DialogLogger(verbose=True, log_file=args.log_file)
    domain = get_domain(args.domain)

    score_func = rollout if args.smart_ai else likelihood

    dataset, sents = read_dataset(args.dataset)
    ranks, n, k = 0, 0, 0
    for ctx, dialog in dataset:
        start_time = time.time()
        # start new conversation
        ai.feed_context(ctx)
        for sent, you in dialog:
            if you:
                # if it is your turn to say, take the target word and compute its rank
                rank = compute_rank(sent, sents, ai, domain, args.temperature, score_func)
                # compute lang_h for the groundtruth sentence
                enc = ai._encode(sent, ai.model.word_dict)
                _, ai.lang_h, lang_hs = ai.model.score_sent(enc, ai.lang_h, ai.ctx_h, args.temperature)
                # save hidden states and the utterance
                ai.lang_hs.append(lang_hs)
                ai.words.append(ai.model.word2var('YOU:'))
                ai.words.append(Variable(enc))
                ranks += rank
                n += 1
            else:
                ai.read(sent)
        k += 1
        time_elapsed = time.time() - start_time
        logger.dump('dialogue %d | avg rank %.3f | raw %d/%d | time %.3f' % (k, 1. * ranks / n, ranks, n, time_elapsed))

    logger.dump('final avg rank %.3f' % (1. * ranks / n))
Esempio n. 5
0
def main():
    parser = argparse.ArgumentParser(description='selfplaying script')
    parser.add_argument('--alice_model_file', type=str,
        help='Alice model file')
    parser.add_argument('--bob_model_file', type=str,
        help='Bob model file')
    parser.add_argument('--context_file', type=str,
        help='context file')
    parser.add_argument('--temperature', type=float, default=1.0,
        help='temperature')
    parser.add_argument('--verbose', action='store_true', default=False,
        help='print out converations')
    parser.add_argument('--seed', type=int, default=1,
        help='random seed')
    parser.add_argument('--score_threshold', type=int, default=6,
        help='successful dialog should have more than score_threshold in score')
    parser.add_argument('--max_turns', type=int, default=20,
        help='maximum number of turns in a dialog')
    parser.add_argument('--log_file', type=str, default='',
        help='log successful dialogs to file for training')
    parser.add_argument('--smart_alice', action='store_true', default=False,
        help='make Alice smart again')
    parser.add_argument('--fast_rollout', action='store_true', default=False,
        help='to use faster rollouts')
    parser.add_argument('--rollout_bsz', type=int, default=100,
        help='rollout batch size')
    parser.add_argument('--rollout_count_threshold', type=int, default=3,
        help='rollout count threshold')
    parser.add_argument('--smart_bob', action='store_true', default=False,
        help='make Bob smart again')
    parser.add_argument('--ref_text', type=str,
        help='file with the reference text')
    parser.add_argument('--domain', type=str, default='object_division',
        help='domain for the dialogue')
    args = parser.parse_args()

    utils.set_seed(args.seed)

    alice_model = utils.load_model(args.alice_model_file)
    alice_ty = get_agent_type(alice_model, args.smart_alice, args.fast_rollout)
    alice = alice_ty(alice_model, args, name='Alice')

    bob_model = utils.load_model(args.bob_model_file)
    bob_ty = get_agent_type(bob_model, args.smart_bob, args.fast_rollout)
    bob = bob_ty(bob_model, args, name='Bob')

    dialog = Dialog([alice, bob], args)
    logger = DialogLogger(verbose=args.verbose, log_file=args.log_file)
    ctx_gen = ContextGenerator(args.context_file)

    selfplay = SelfPlay(dialog, ctx_gen, args, logger)
    selfplay.run()
Esempio n. 6
0
def main():
    parser = argparse.ArgumentParser(description='chat utility')
    parser.add_argument('--model_file', type=str,
        help='model file')
    parser.add_argument('--domain', type=str, default='object_division',
        help='domain for the dialogue')
    parser.add_argument('--context_file', type=str, default='',
        help='context file')
    parser.add_argument('--temperature', type=float, default=1.0,
        help='temperature')
    parser.add_argument('--num_types', type=int, default=3,
        help='number of object types')
    parser.add_argument('--num_objects', type=int, default=6,
        help='total number of objects')
    parser.add_argument('--max_score', type=int, default=10,
        help='max score per object')
    parser.add_argument('--score_threshold', type=int, default=6,
        help='successful dialog should have more than score_threshold in score')
    parser.add_argument('--seed', type=int, default=1,
        help='random seed')
    parser.add_argument('--smart_ai', action='store_true', default=False,
        help='make AI smart again')
    parser.add_argument('--ai_starts', action='store_true', default=False,
        help='allow AI to start the dialog')
    parser.add_argument('--ref_text', type=str,
        help='file with the reference text')
    args = parser.parse_args()

    utils.set_seed(args.seed)

    human = HumanAgent(domain.get_domain(args.domain))

    alice_ty = LstmRolloutAgent if args.smart_ai else LstmAgent
    ai = alice_ty(utils.load_model(args.model_file), args)


    agents = [ai, human] if args.ai_starts else [human, ai]

    dialog = Dialog(agents, args)
    logger = DialogLogger(verbose=True)
    # either take manually produced contextes, or relay on the ones from the dataset
    if args.context_file == '':
        ctx_gen = ManualContextGenerator(args.num_types, args.num_objects, args.max_score)
    else:
        ctx_gen = ContextGenerator(args.context_file)

    chat = Chat(dialog, ctx_gen, logger)
    chat.run()
Esempio n. 7
0
def main():
    parser = argparse.ArgumentParser(description='testing script')
    parser.add_argument('--data', type=str, default='data/negotiate',
        help='location of the data corpus')
    parser.add_argument('--unk_threshold', type=int, default=20,
        help='minimum word frequency to be in dictionary')
    parser.add_argument('--model_file', type=str,
        help='pretrained model file')
    parser.add_argument('--seed', type=int, default=1,
        help='random seed')
    parser.add_argument('--hierarchical', action='store_true', default=False,
        help='use hierarchical model')
    parser.add_argument('--bsz', type=int, default=16,
        help='batch size')
    parser.add_argument('--cuda', action='store_true', default=False,
        help='use CUDA')
    args = parser.parse_args()

    device_id = utils.use_cuda(args.cuda)
    utils.set_seed(args.seed)

    corpus = data.WordCorpus(args.data, freq_cutoff=args.unk_threshold, verbose=True)
    model = utils.load_model(args.model_file)

    crit = Criterion(model.word_dict, device_id=device_id)
    sel_crit = Criterion(model.item_dict, device_id=device_id,
        bad_toks=['<disconnect>', '<disagree>'])


    testset, testset_stats = corpus.test_dataset(args.bsz, device_id=device_id)
    test_loss, test_select_loss = 0, 0

    N = len(corpus.word_dict)
    for batch in testset:
        # run forward on the batch, produces output, hidden, target,
        # selection output and selection target
        out, hid, tgt, sel_out, sel_tgt = Engine.forward(model, batch, volatile=False)

        # compute LM and selection losses
        test_loss += tgt.size(0) * crit(out.view(-1, N), tgt).data[0]
        test_select_loss += sel_crit(sel_out, sel_tgt).data[0]

    test_loss /= testset_stats['nonpadn']
    test_select_loss /= len(testset)
    print('testloss %.3f | testppl %.3f' % (test_loss, np.exp(test_loss)))
    print('testselectloss %.3f | testselectppl %.3f' % (test_select_loss, np.exp(test_select_loss)))
def create_param_space(params, n_runs):
    seed = np.random.randint(1000)
    param_space = []
    for i in range(n_runs):
        set_seed(seed + i)
        param_choice = {}
        for param, value in params.items():
            if isinstance(value, list):
                if len(value) == 2:
                    mode = 'choice'
                    param_choice[param] = sample_param_space(value, mode)
                else:
                    mode = value[-1]
                    param_choice[param] = sample_param_space(value[:-1], mode)
            else:
                param_choice[param] = value
        param_space.append(param_choice)
    return param_space
def main():
    parser = get_parser()
    args = parser.parse_args()

    if not args.model_name:
        args.model_name = args.model_path

    if args.doc_stride >= args.max_seq_length - args.max_query_length:
        logger.warning(
            "WARNING - You've set a doc stride which may be superior to the document length in some "
            "examples. This could result in errors when building features from the examples. Please reduce the doc "
            "stride or increase the maximum length to ensure the features are correctly built."
        )

    if (
        os.path.exists(args.output_dir)
        and os.listdir(args.output_dir)
        and not args.overwrite_output_dir
    ):
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(
                args.output_dir
            )
        )
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    # Set device
    args.device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.DEBUG if args.debug else logging.INFO
    )

    # Set seed
    set_seed(args)

    # Load pretrained model and tokenizer
    config = GPT2Config.from_pretrained(
        args.config_name if args.config_name else args.model_path,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )
    tokenizer = GPT2Tokenizer.from_pretrained(
        args.tokenizer_name if args.tokenizer_name else args.model_path,
        do_lower_case=args.do_lower_case,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )
    tokenizer.add_tokens(['question:', ':question'])
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.sep_token = tokenizer.eos_token
    tokenizer.encode = partial(tokenizer.encode, is_pretokenized=True, truncation=True)
    tokenizer.encode_plus = partial(tokenizer.encode_plus, is_pretokenized=True, truncation=True)

    model = GPT2LMHeadModel.from_pretrained(
        args.model_path,
        config=config,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )
    model.resize_token_embeddings(len(tokenizer))
    model.to(args.device)

    logger.info("Training/evaluation parameters %s", args)

    # Before we do anything with models, we want to ensure that we get fp16 execution of torch.einsum if args.fp16 is set.
    # Otherwise it'll default to "promote" mode, and we'll get fp32 operations. Note that running `--fp16_opt_level="O2"` will
    # remove the need for this code, but it is still valid.
    if args.fp16:
        try:
            import apex
            apex.amp.register_half_function(torch, "einsum")
        except ImportError:
            raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")

    # Training
    train_dataset = load_and_cache_examples(args, tokenizer, 'quest_gen', evaluate=False, gpt=True)
    train_dataset = preprocess_dataset(train_dataset, tokenizer)

    dev_dataset = load_and_cache_examples(args, tokenizer, 'quest_gen', evaluate=True, gpt=True)
    dev_dataset = preprocess_dataset(dev_dataset, tokenizer)

    train(args, train_dataset, dev_dataset, model, tokenizer)
    logging.info('Finished training !')

    # Save a trained model, configuration and tokenizer using `save_pretrained()`.
    # They can then be reloaded using `from_pretrained()`
    # Good practice: save your training arguments together with the trained model
    logger.info("Saving final model checkpoint to %s", args.output_dir)
    model.save_pretrained(args.output_dir)
    tokenizer.save_pretrained(args.output_dir)
    torch.save(args, os.path.join(args.output_dir, "training_args.bin"))
def train(args, train_dataset, dev_dataset, model, tokenizer):
    """ Train the model """

    tb_writer = SummaryWriter(os.path.join(args.output_dir, 'TB_writer'))

    if args.dynamic_batching:
        train_sampler = CustomBatchSampler(train_dataset, args.train_batch_size)
        train_dataloader = DataLoader(
            train_dataset,
            batch_sampler=train_sampler,
            num_workers=0,
            collate_fn=dynamic_padding_collate_fn
        )
    else:
        train_sampler = RandomSampler(train_dataset)
        train_dataloader = DataLoader(train_dataset, sampler=train_sampler,
                                      batch_size=args.train_batch_size, num_workers=0)

    if args.max_steps > 0:
        t_total = args.max_steps
        args.num_train_epochs = args.max_steps // (len(train_dataloader) // args.gradient_accumulation_steps) + 1
    else:
        t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs

    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
            "weight_decay": args.weight_decay,
        },
        {"params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0},
    ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total
    )

    # Check if saved optimizer or scheduler states exist
    if os.path.isfile(os.path.join(args.model_path, "optimizer.pt")) and os.path.isfile(
        os.path.join(args.model_path, "scheduler.pt")
    ):
        # Load in optimizer and scheduler states
        optimizer.load_state_dict(torch.load(os.path.join(args.model_path, "optimizer.pt")))
        scheduler.load_state_dict(torch.load(os.path.join(args.model_path, "scheduler.pt")))

    if args.fp16:
        try:
            from apex import amp
            model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level)
        except ImportError:
            raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")

    # Train!
    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_dataset))
    logger.info("  Num Epochs = %d", args.num_train_epochs)
    logger.info("  Batch size = %d", args.train_batch_size)
    logger.info("  Gradient Accumulation steps = %d", args.gradient_accumulation_steps)
    logger.info("  Total optimization steps = %d", t_total)

    global_step = 1
    epochs_trained = 0
    steps_trained_in_current_epoch = 0

    # Check if continuing training from a checkpoint
    if os.path.exists(args.model_path):
        try:
            # set global_step to gobal_step of last saved checkpoint from model path
            checkpoint_suffix = args.model_path.split("-")[-1].split("/")[0]
            global_step = int(checkpoint_suffix)
            epochs_trained = global_step // (len(train_dataloader) // args.gradient_accumulation_steps)
            steps_trained_in_current_epoch = global_step % (len(train_dataloader) // args.gradient_accumulation_steps)

            logger.info("  Continuing training from checkpoint, will skip to saved global_step")
            logger.info("  Continuing training from epoch %d", epochs_trained)
            logger.info("  Continuing training from global step %d", global_step)
            logger.info("  Will skip the first %d steps in the first epoch", steps_trained_in_current_epoch)
        except ValueError:
            logger.info("  Starting fine-tuning.")

    model.train()
    model.zero_grad()
    train_iterator = trange(epochs_trained, int(args.num_train_epochs), desc="Epoch")

    # Added here for reproductibility
    set_seed(args)

    loss_cum = None
    # torch.autograd.set_detect_anomaly(True)
    for _ in train_iterator:

        epoch_iterator = tqdm(train_dataloader, desc="Iteration", smoothing=0.05)
        for step, batch_cpu in enumerate(epoch_iterator):

            # Skip past any already trained steps if resuming training
            if steps_trained_in_current_epoch > 0:
                steps_trained_in_current_epoch -= 1
                continue

            batch = tuple(t.to(args.device) for t in batch_cpu)

            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "token_type_ids": batch[2],
                "labels": batch[0]
            }

            outputs = model(**inputs)
            loss = outputs[0]

            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps

            if args.fp16:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
                    if loss_cum is None:
                        loss_cum = loss.detach()
                    else:
                        loss_cum += loss.detach()

            else:
                loss.backward()
                if loss_cum is None:
                    loss_cum = loss.detach()
                else:
                    loss_cum += loss.detach()

            if (step + 1) % args.gradient_accumulation_steps == 0:
                if args.fp16:
                    torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm)
                else:
                    torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)

                optimizer.step()
                scheduler.step()  # Update learning rate schedule
                model.zero_grad()
                global_step += 1

                # Log train metrics
                if (not global_step % args.train_logging_steps) and args.train_logging_steps > 0:
                    tb_writer.add_scalar('train_loss', loss_cum.item() / args.train_logging_steps, global_step)

                    loss_cum = None
                # Log dev metrics
                if args.dev_logging_steps > 0 and global_step % args.dev_logging_steps == 0 and args.evaluate_during_training:
                    dev_loss = evaluate(args, dev_dataset, model)
                    tb_writer.add_scalar("dev_loss", dev_loss, global_step)
                    tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step)

                # Save model checkpoint
                if args.save_steps > 0 and global_step % args.save_steps == 0:
                    output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step))
                    logger.info("Saving model checkpoint to %s", output_dir)

                    model.save_pretrained(output_dir)
                    tokenizer.save_pretrained(output_dir)

                    torch.save(args, os.path.join(output_dir, "training_args.bin"))
                    torch.save(optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt"))
                    torch.save(scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt"))

            if args.max_steps > 0 and global_step > args.max_steps:
                epoch_iterator.close()
                break
        if args.max_steps > 0 and global_step > args.max_steps:
            train_iterator.close()
            break

        tb_writer.close()
Esempio n. 11
0
def main():
    ''' set default hyperparams in default_hyperparams.py '''
    parser = argparse.ArgumentParser()

    # Required arguments
    parser.add_argument('-d',
                        '--dataset',
                        choices=wilds.supported_datasets,
                        required=True)
    parser.add_argument('--algorithm',
                        required=True,
                        choices=supported.algorithms)
    parser.add_argument(
        '--root_dir',
        required=True,
        help=
        'The directory where [dataset]/data can be found (or should be downloaded to, if it does not exist).'
    )

    # Dataset
    parser.add_argument(
        '--split_scheme',
        help=
        'Identifies how the train/val/test split is constructed. Choices are dataset-specific.'
    )
    parser.add_argument('--dataset_kwargs',
                        nargs='*',
                        action=ParseKwargs,
                        default={})
    parser.add_argument(
        '--download',
        default=False,
        type=parse_bool,
        const=True,
        nargs='?',
        help=
        'If true, tries to downloads the dataset if it does not exist in root_dir.'
    )
    parser.add_argument(
        '--frac',
        type=float,
        default=1.0,
        help=
        'Convenience parameter that scales all dataset splits down to the specified fraction, for development purposes. Note that this also scales the test set down, so the reported numbers are not comparable with the full test set.'
    )
    parser.add_argument('--version', default=None, type=str)

    # Loaders
    parser.add_argument('--loader_kwargs',
                        nargs='*',
                        action=ParseKwargs,
                        default={})
    parser.add_argument('--train_loader', choices=['standard', 'group'])
    parser.add_argument('--uniform_over_groups',
                        type=parse_bool,
                        const=True,
                        nargs='?')
    parser.add_argument('--distinct_groups',
                        type=parse_bool,
                        const=True,
                        nargs='?')
    parser.add_argument('--n_groups_per_batch', type=int)
    parser.add_argument('--batch_size', type=int)
    parser.add_argument('--eval_loader',
                        choices=['standard'],
                        default='standard')

    # Model
    parser.add_argument('--model', choices=supported.models)
    parser.add_argument(
        '--model_kwargs',
        nargs='*',
        action=ParseKwargs,
        default={},
        help=
        'keyword arguments for model initialization passed as key1=value1 key2=value2'
    )

    # Transforms
    parser.add_argument('--train_transform', choices=supported.transforms)
    parser.add_argument('--eval_transform', choices=supported.transforms)
    parser.add_argument(
        '--target_resolution',
        nargs='+',
        type=int,
        help=
        'The input resolution that images will be resized to before being passed into the model. For example, use --target_resolution 224 224 for a standard ResNet.'
    )
    parser.add_argument('--resize_scale', type=float)
    parser.add_argument('--max_token_length', type=int)

    # Objective
    parser.add_argument('--loss_function', choices=supported.losses)

    # Algorithm
    parser.add_argument('--groupby_fields', nargs='+')
    parser.add_argument('--group_dro_step_size', type=float)
    parser.add_argument('--coral_penalty_weight', type=float)
    parser.add_argument('--dann_lambda', type=float)
    parser.add_argument('--dann_domain_layers', type=int,
                        default=1)  # hidden layers
    parser.add_argument('--dann_label_layers', type=int,
                        default=1)  # hidden layers
    parser.add_argument('--domain_loss_function', choices=supported.losses)
    parser.add_argument('--irm_lambda', type=float)
    parser.add_argument('--irm_penalty_anneal_iters', type=int)
    parser.add_argument('--algo_log_metric')

    # Model selection
    parser.add_argument('--val_metric')
    parser.add_argument('--val_metric_decreasing',
                        type=parse_bool,
                        const=True,
                        nargs='?')

    # Optimization
    parser.add_argument('--n_epochs', type=int)
    parser.add_argument('--optimizer', choices=supported.optimizers)
    parser.add_argument('--lr', type=float)
    parser.add_argument('--weight_decay', type=float)
    parser.add_argument('--max_grad_norm', type=float)
    parser.add_argument('--optimizer_kwargs',
                        nargs='*',
                        action=ParseKwargs,
                        default={})

    # Scheduler
    parser.add_argument('--scheduler', choices=supported.schedulers)
    parser.add_argument('--scheduler_kwargs',
                        nargs='*',
                        action=ParseKwargs,
                        default={})
    parser.add_argument('--scheduler_metric_split',
                        choices=['train', 'val'],
                        default='val')
    parser.add_argument('--scheduler_metric_name')

    # Evaluation
    parser.add_argument('--process_outputs_function',
                        choices=supported.process_outputs_functions)
    parser.add_argument('--evaluate_all_splits',
                        type=parse_bool,
                        const=True,
                        nargs='?',
                        default=True)
    parser.add_argument('--eval_splits', nargs='+', default=[])
    parser.add_argument('--eval_only',
                        type=parse_bool,
                        const=True,
                        nargs='?',
                        default=False)
    parser.add_argument(
        '--eval_epoch',
        default=None,
        type=int,
        help=
        'If eval_only is set, then eval_epoch allows you to specify evaluating at a particular epoch. By default, it evaluates the best epoch by validation performance.'
    )

    # Misc
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--log_dir', default='./logs')
    parser.add_argument('--log_every', default=50, type=int)
    parser.add_argument('--save_step', type=int)
    parser.add_argument('--save_best',
                        type=parse_bool,
                        const=True,
                        nargs='?',
                        default=True)
    parser.add_argument('--save_last',
                        type=parse_bool,
                        const=True,
                        nargs='?',
                        default=True)
    parser.add_argument('--save_pred',
                        type=parse_bool,
                        const=True,
                        nargs='?',
                        default=True)
    parser.add_argument('--no_group_logging',
                        type=parse_bool,
                        const=True,
                        nargs='?')
    parser.add_argument('--use_wandb',
                        type=parse_bool,
                        const=True,
                        nargs='?',
                        default=False)
    parser.add_argument('--progress_bar',
                        type=parse_bool,
                        const=True,
                        nargs='?',
                        default=False)
    parser.add_argument('--resume',
                        type=parse_bool,
                        const=True,
                        nargs='?',
                        default=False)

    config = parser.parse_args()
    config = populate_defaults(config)

    # set device
    config.device = torch.device("cuda:" + str(
        config.device)) if torch.cuda.is_available() else torch.device("cpu")

    ## Initialize logs
    if os.path.exists(config.log_dir) and config.resume:
        resume = True
        mode = 'a'
    elif os.path.exists(config.log_dir) and config.eval_only:
        resume = False
        mode = 'a'
    else:
        resume = False
        mode = 'w'

    if not os.path.exists(config.log_dir):
        os.makedirs(config.log_dir)
    logger = Logger(os.path.join(config.log_dir, 'log.txt'), mode)

    # Record config
    log_config(config, logger)

    # Set random seed
    set_seed(config.seed)

    # Data
    full_dataset = wilds.get_dataset(dataset=config.dataset,
                                     version=config.version,
                                     root_dir=config.root_dir,
                                     download=config.download,
                                     split_scheme=config.split_scheme,
                                     **config.dataset_kwargs)

    # To implement data augmentation (i.e., have different transforms
    # at training time vs. test time), modify these two lines:
    train_transform = initialize_transform(
        transform_name=config.train_transform,
        config=config,
        dataset=full_dataset)
    eval_transform = initialize_transform(transform_name=config.eval_transform,
                                          config=config,
                                          dataset=full_dataset)

    train_grouper = CombinatorialGrouper(dataset=full_dataset,
                                         groupby_fields=config.groupby_fields)

    datasets = defaultdict(dict)
    for split in full_dataset.split_dict.keys():
        if split == 'train':
            transform = train_transform
            verbose = True
        elif split == 'val':
            transform = eval_transform
            verbose = True
        else:
            transform = eval_transform
            verbose = False
        # Get subset
        datasets[split]['dataset'] = full_dataset.get_subset(
            split, frac=config.frac, transform=transform)

        if split == 'train':
            datasets[split]['loader'] = get_train_loader(
                loader=config.train_loader,
                dataset=datasets[split]['dataset'],
                batch_size=config.batch_size,
                uniform_over_groups=config.uniform_over_groups,
                grouper=train_grouper,
                distinct_groups=config.distinct_groups,
                n_groups_per_batch=config.n_groups_per_batch,
                **config.loader_kwargs)
        else:
            datasets[split]['loader'] = get_eval_loader(
                loader=config.eval_loader,
                dataset=datasets[split]['dataset'],
                grouper=train_grouper,
                batch_size=config.batch_size,
                **config.loader_kwargs)

        # Set fields
        datasets[split]['split'] = split
        datasets[split]['name'] = full_dataset.split_names[split]
        datasets[split]['verbose'] = verbose

        # Loggers
        datasets[split]['eval_logger'] = BatchLogger(
            os.path.join(config.log_dir, f'{split}_eval.csv'),
            mode=mode,
            use_wandb=(config.use_wandb and verbose))
        datasets[split]['algo_logger'] = BatchLogger(
            os.path.join(config.log_dir, f'{split}_algo.csv'),
            mode=mode,
            use_wandb=(config.use_wandb and verbose))

        if config.use_wandb:
            initialize_wandb(config)

    # Logging dataset info
    # Show class breakdown if feasible
    if config.no_group_logging and full_dataset.is_classification and full_dataset.y_size == 1 and full_dataset.n_classes <= 10:
        log_grouper = CombinatorialGrouper(dataset=full_dataset,
                                           groupby_fields=['y'])
    elif config.no_group_logging:
        log_grouper = None
    else:
        log_grouper = train_grouper
    log_group_data(datasets, log_grouper, logger)

    ## Initialize algorithm
    algorithm = initialize_algorithm(config=config,
                                     datasets=datasets,
                                     train_grouper=train_grouper)

    model_prefix = get_model_prefix(datasets['train'], config)
    if not config.eval_only:
        ## Load saved results if resuming
        resume_success = False
        if resume:
            save_path = model_prefix + 'epoch:last_model.pth'
            if not os.path.exists(save_path):
                epochs = [
                    int(file.split('epoch:')[1].split('_')[0])
                    for file in os.listdir(config.log_dir)
                    if file.endswith('.pth')
                ]
                if len(epochs) > 0:
                    latest_epoch = max(epochs)
                    save_path = model_prefix + f'epoch:{latest_epoch}_model.pth'
            try:
                prev_epoch, best_val_metric = load(algorithm, save_path)
                epoch_offset = prev_epoch + 1
                logger.write(
                    f'Resuming from epoch {epoch_offset} with best val metric {best_val_metric}'
                )
                resume_success = True
            except FileNotFoundError:
                pass

        if resume_success == False:
            epoch_offset = 0
            best_val_metric = None

        train(algorithm=algorithm,
              datasets=datasets,
              general_logger=logger,
              config=config,
              epoch_offset=epoch_offset,
              best_val_metric=best_val_metric)
    else:
        if config.eval_epoch is None:
            eval_model_path = model_prefix + 'epoch:best_model.pth'
        else:
            eval_model_path = model_prefix + f'epoch:{config.eval_epoch}_model.pth'
        best_epoch, best_val_metric = load(algorithm, eval_model_path)
        if config.eval_epoch is None:
            epoch = best_epoch
        else:
            epoch = config.eval_epoch
        evaluate(algorithm=algorithm,
                 datasets=datasets,
                 epoch=epoch,
                 general_logger=logger,
                 config=config)

    logger.close()
    for split in datasets:
        datasets[split]['eval_logger'].close()
        datasets[split]['algo_logger'].close()
Esempio n. 12
0
import os
# import copy
import fire
import torch
import utils
import stats
import torch.nn.functional as F

utils.set_seed(2019)
logger = utils.setup_logger()
device = torch.device(f"cuda" if torch.cuda.is_available() else "cpu")

out_path = os.getcwd() + '/out/'


def main(algorithm,
         optimizer,
         dataset,
         num_classes=10,
         optim_params={
             'lr': 0.05,
             'weight_decay': 5e-4,
             'momentum': 0.9
         }):

    filename = algorithm + '_' + optimizer + '_' + dataset

    # prepare dataset
    logger.info("====== Evaluation ======")
    logger.info("Preparing dataset...{}".format(dataset))
    db = utils.Datasets(dataset)
Esempio n. 13
0
    utils.plot_loss(trainer.validation_history["loss"],
                    label="Validation loss")
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.title("Accuracy")
    utils.plot_loss(trainer.validation_history["accuracy"],
                    label="Validation Accuracy")
    utils.plot_loss(trainer.train_history["accuracy"],
                    label="Training Accuracy")

    print(trainer.train_history["accuracy"].popitem(last=True), " train acc")
    print(trainer.train_history["loss"].popitem(last=True), " train loss")
    plt.legend()
    plt.savefig(plot_path.joinpath(f"{name}_final_.png"))
    plt.show()


if __name__ == "__main__":
    # Set the random generator seed (parameters, shuffling etc).
    # You can try to change this and check if you still get the same result!
    utils.set_seed(0)
    epochs = 10
    batch_size = 64
    learning_rate = 5e-4  # 5e-4?
    early_stop_count = 10
    dataloaders = load_cifar10(batch_size)
    model = ConvModel1(image_channels=3, num_classes=10)
    trainer = Trainer(batch_size, learning_rate, early_stop_count, epochs,
                      model, dataloaders)
    trainer.train()
    create_plots(trainer, "task2")
Esempio n. 14
0
def main():
    parser = argparse.ArgumentParser()
    arg = parser.add_argument
    arg('mode', choices=['train', 'validate', 'predict'])
    arg('run_root')
    arg('--batch-size', type=int, default=64)
    arg('--step', type=int, default=1)
    arg('--workers', type=int, default=2)
    arg('--lr', type=float, default=0.00001)
    arg('--patience', type=int, default=4)
    arg('--clean', action='store_true')
    arg('--n-epochs', type=int, default=5)
    arg('--limit', type=int)
    arg('--fold', type=int, default=0)
    arg('--multi-gpu', type=int, default=0)
    arg('--lr_layerdecay', type=float, default=0.95)
    args = parser.parse_args()

    set_seed()

    run_root = Path('../experiments/' + args.run_root)
    DATA_ROOT = Path('../byebyejuly/')

    folds = pd.read_pickle(DATA_ROOT / 'folds.pkl')
    train_fold = folds[folds['fold'] != args.fold]
    valid_fold = folds[folds['fold'] == args.fold]
    if args.limit:
        train_fold = train_fold[:args.limit]
        valid_fold = valid_fold[:args.limit]

    if args.mode == 'train':
        if run_root.exists() and args.clean:
            shutil.rmtree(run_root)
        run_root.mkdir(exist_ok=True, parents=True)
        (run_root / 'params.json').write_text(
            json.dumps(vars(args), indent=4, sort_keys=True))

        training_set = TrainDataset(train_fold, do_lower=True, shuffle=True)

        training_loader = DataLoader(training_set,
                                     collate_fn=collate_fn,
                                     shuffle=True,
                                     batch_size=args.batch_size,
                                     num_workers=args.workers)

        valid_set = TrainDataset(valid_fold)
        valid_loader = DataLoader(valid_set,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  collate_fn=collate_fn,
                                  num_workers=args.workers)

        model = PairModel(BERT_PRETRAIN_PATH)
        model.cuda()

        # param_optimizer = list(model.named_parameters())
        # no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        # optimizer_grouped_parameters = [
        #     {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay) and p.requires_grad],
        #      'weight_decay': 0.01},
        #     {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay) and p.requires_grad],
        #      'weight_decay': 0.0}
        # ]
        NUM_LAYERS = 12
        optimizer_grouped_parameters = [{
            'params':
            model.bert.bert.embeddings.parameters(),
            'lr':
            args.lr * (args.lr_layerdecay**NUM_LAYERS)
        }, {
            'params': model.head.parameters(),
            'lr': args.lr
        }, {
            'params':
            model.bert.bert.pooler.parameters(),
            'lr':
            args.lr
        }]

        for layer in range(NUM_LAYERS):
            optimizer_grouped_parameters.append(
                {
                    'params':
                    model.bert.bert.encoder.layer.__getattr__(
                        '%d' % (NUM_LAYERS - 1 - layer)).parameters(),
                    'lr':
                    args.lr * (args.lr_layerdecay**layer)
                }, )
        optimizer = BertAdam(optimizer_grouped_parameters,
                             lr=args.lr,
                             warmup=0.05,
                             t_total=len(training_loader) * args.n_epochs //
                             args.step)

        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level="O2",
                                          verbosity=0)
        optimizer.zero_grad()

        if args.multi_gpu == 1:
            model = nn.DataParallel(model)

        train(args,
              model,
              optimizer,
              None,
              train_loader=training_loader,
              valid_df=valid_fold,
              valid_loader=valid_loader,
              epoch_length=len(training_set))

    elif args.mode == 'validate':
        valid_fold = pd.read_table('../byebyejuly/test.txt',
                                   names=['a', 'b', 'label'])

        valid_set = TrainDataset(valid_fold)
        valid_loader = DataLoader(valid_set,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  collate_fn=collate_fn,
                                  num_workers=args.workers)

        model = PairModel(BERT_PRETRAIN_PATH)
        load_model(model,
                   run_root / ('best-model-%d.pt' % args.fold),
                   multi2single=False)
        model.cuda()
        if args.multi_gpu == 1:
            model = nn.DataParallel(model)
        validation(model, valid_fold, valid_loader, args, False, progress=True)
Esempio n. 15
0
                    help='resume from checkpoint')
parser.add_argument('--seed', default=0, type=int)
parser.add_argument('--mask_method', default="label-square", type=str)
parser.add_argument('--num_samples', default=1024, type=int)
parser.add_argument('--update_mask_epochs', default=500, type=int)
parser.add_argument('--save_file', default="default_accs.bin", type=str)
parser.add_argument('--max_epoch', default=200, type=int)
parser.add_argument('--pretrain_epoch', default=0, type=int)
parser.add_argument('--sparsity', default=0.005, type=float)

args = parser.parse_args()


sample_type, grad_type = args.mask_method.split("-")

set_seed(args)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch

# Data
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
Esempio n. 16
0
    parser.add_argument('--eps', type=float)
    parser.add_argument('--nb_iter',
                        type=int,
                        default=40,
                        help='number of attack iterations')

    parser.add_argument('--resume', type=int, default=0)
    parser.add_argument('--save_model_loc', type=str, default=None)

    args = parser.parse_args()

    print(args)

    device = "cuda"

    set_seed(0)

    trainset, normalize, unnormalize = str2dataset(args.dataset, train=True)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=2)

    net = str2model(path=args.save_model_loc,
                    dataset=args.dataset,
                    pretrained=args.resume).eval().to(device)

    if args.attack == "frank":
        attacker = FrankWolfe(predict=lambda x: net(normalize(x)),
                              loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                              eps=args.eps,
from train import *
from test import *
from model import *
from tensorboard_logger import Logger
from thumos_features import *

if __name__ == "__main__":
    args = parse_args()
    if args.debug:
        pdb.set_trace()

    config = Config(args)
    worker_init_fn = None

    if config.seed >= 0:
        utils.set_seed(config.seed)
        worker_init_fn = np.random.seed(config.seed)

    utils.save_config(config, os.path.join(config.output_path, "config.txt"))

    net = Model(config.len_feature, config.num_classes, config.r_act,
                config.r_bkg)
    net = net.cuda()

    train_loader = data.DataLoader(ThumosFeature(
        data_path=config.data_path,
        mode='train',
        modal=config.modal,
        feature_fps=config.feature_fps,
        num_segments=config.num_segments,
        supervision='weak',
Esempio n. 18
0
from systems import *
from integrator import Integrator
from utils import set_seed
from lkf import LKF
from kf import KF

from typing import Callable
import numpy as np
import pandas as pd
import pdb
import scipy.stats as stats

import matplotlib.pyplot as plt

set_seed(9001)

dt = 1e-3
T = 60.

z = TimeVarying(dt, 0.0, 1.0, f=1 / 20)
F_hat = lambda t: z.F(0)
eta = lambda t: F_hat(t) - z.F(t)

print(F_hat(0))
f1 = KF(z.x0, F_hat, z.H, z.Q, z.R, dt)
f2 = LKF(z.x0, F_hat, z.H, z.Q, z.R, dt, tau=0.25, eps=3e-2, gamma=0.9)

max_err = 2.
max_eta_err = 100
max_zz = 100.
def train(args, train_dataset, model, dev_dataset):
    """ Train the model """
    if args.local_rank in [-1, 0]:
        tb_writer = SummaryWriter(log_dir=args.tensorboard_dir)

    args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
    train_sampler = RandomSampler(
        train_dataset) if args.local_rank == -1 else DistributedSampler(
            train_dataset)
    train_dataloader = DataLoader(train_dataset,
                                  sampler=train_sampler,
                                  batch_size=args.train_batch_size)

    if args.max_steps > 0:
        t_total = args.max_steps
        args.num_train_epochs = args.max_steps // (
            len(train_dataloader) // args.gradient_accumulation_steps) + 1
    else:
        t_total = len(
            train_dataloader
        ) // args.gradient_accumulation_steps * args.num_train_epochs

    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ['bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params': [
            p for n, p in model.named_parameters()
            if not any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        args.weight_decay
    }, {
        'params': [
            p for n, p in model.named_parameters()
            if any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        0.0
    }]
    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=args.learning_rate,
                      eps=args.adam_epsilon)
    scheduler = WarmupLinearSchedule(optimizer,
                                     warmup_steps=args.warmup_steps,
                                     t_total=t_total)
    if args.fp16:
        try:
            from apex import amp
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=args.fp16_opt_level)

    # multi-gpu training (should be after apex fp16 initialization)
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # Distributed training (should be after apex fp16 initialization)
    if args.local_rank != -1:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            find_unused_parameters=True)

    # Train!
    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_dataset))
    logger.info("  Num Epochs = %d", args.num_train_epochs)
    logger.info("  Instantaneous batch size per GPU = %d",
                args.per_gpu_train_batch_size)
    logger.info(
        "  Total train batch size (w. parallel, distributed & accumulation) = %d",
        args.train_batch_size * args.gradient_accumulation_steps *
        (torch.distributed.get_world_size() if args.local_rank != -1 else 1))
    logger.info("  Gradient Accumulation steps = %d",
                args.gradient_accumulation_steps)
    logger.info("  Total optimization steps = %d", t_total)

    global_step = 0
    tr_loss, logging_loss = 0.0, 0.0
    model.zero_grad()
    train_iterator = trange(int(args.num_train_epochs),
                            desc="Epoch",
                            disable=args.local_rank not in [-1, 0])
    set_seed(
        args)  # Added here for reproductibility (even between python 2 and 3)
    for epoch in train_iterator:
        set_seed(
            args, epoch + 10
        )  # Added here for reproductibility (even between python 2 and 3)
        # logger.info("  seed = %d", torch.initial_seed())
        epoch_iterator = tqdm(train_dataloader,
                              desc="Iteration",
                              disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):
            model.train()
            batch = tuple(t.to(args.device) for t in batch)
            inputs = {
                'input_ids':
                batch[0],
                'entity_a':
                batch[1],
                'entity_b':
                batch[2],
                'attention_mask':
                batch[3],
                'token_type_ids':
                batch[4] if args.model_type in ['bert', 'xlnet'] else None,
                # XLM don't use segment_ids
                'labels':
                batch[5]
            }
            outputs = model(**inputs)
            loss = outputs[
                0]  # model outputs are always tuple in pytorch-transformers (see doc)

            if args.n_gpu > 1:
                loss = loss.mean(
                )  # mean() to average on multi-gpu parallel training
            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps

            if args.fp16:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
                torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer),
                                               args.max_grad_norm)
            else:
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(),
                                               args.max_grad_norm)

            tr_loss += loss.item()
            if (step + 1) % args.gradient_accumulation_steps == 0:
                scheduler.step()  # Update learning rate schedule
                optimizer.step()
                model.zero_grad()
                global_step += 1

                if args.local_rank in [
                        -1, 0
                ] and args.logging_steps > 0 and global_step % args.logging_steps == 0:
                    # Log metrics
                    if args.local_rank == -1 and args.evaluate_during_training:
                        # Only evaluate when single GPU otherwise metrics may not average well
                        results, eval_loss = evaluate(args, model, dev_dataset)
                        for key, value in results.items():
                            tb_writer.add_scalar('eval_{}'.format(key), value,
                                                 global_step)
                    logger.info(" global step = %d", global_step)
                    tb_writer.add_scalar('lr',
                                         scheduler.get_lr()[0], global_step)
                    tb_writer.add_scalar('loss', (tr_loss - logging_loss) /
                                         args.logging_steps, global_step)
                    logging_loss = tr_loss

                if args.local_rank in [
                        -1, 0
                ] and args.save_steps > 0 and global_step % args.save_steps == 0:
                    # Save model checkpoint
                    output_dir = os.path.join(
                        args.output_dir, 'checkpoint-{}'.format(global_step))
                    if not os.path.exists(output_dir):
                        os.makedirs(output_dir)
                    # Take care of distributed/parallel training
                    model_to_save = model.module if hasattr(
                        model, 'module') else model
                    model_to_save.save_pretrained(output_dir)
                    torch.save(args,
                               os.path.join(output_dir, 'training_args.bin'))
                    logger.info("Saving model checkpoint to %s", output_dir)

            if 0 < args.max_steps < global_step:
                epoch_iterator.close()
                break
        if 0 < args.max_steps < global_step:
            train_iterator.close()
            break

    if args.local_rank in [-1, 0]:
        tb_writer.close()

    return global_step, tr_loss / global_step
Esempio n. 20
0
    def train(self):
        train_sampler = RandomSampler(self.train_dataset)
        train_dataloader = DataLoader(self.train_dataset,
                                      sampler=train_sampler,
                                      batch_size=self.args.batch_size)

        t_total = len(
            train_dataloader
        ) // self.args.gradient_accumulation_steps * self.args.num_train_epochs

        # Prepare optimizer and schedule (linear warmup and decay)
        no_decay = ['bias', 'LayerNorm.weight']
        # n是参数的name: BERT_NAME: embeddings.word_embeddings.weight encoder.layer.5.output.LayerNorm.bias等
        # 下面这段代码的意思是,如果no_decay中的任何一个字段都不在name中则对para使用L2正则项, 否则默认设为0, 即bias相关的不带偏置项,
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in self.model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            self.args.weight_decay
        }, {
            'params': [
                p for n, p in self.model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.0
        }]
        optimizer = AdamW(optimizer_grouped_parameters,
                          lr=self.args.learning_rate,
                          eps=self.args.adam_epsilon)
        # 调度学习率在初期上升,后期下降(warm_up)
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=self.args.warmup_steps,
            num_training_steps=t_total)

        # Train!
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(self.train_dataset))
        logger.info("  Num Epochs = %d", self.args.num_train_epochs)
        logger.info("  Total train batch size = %d", self.args.batch_size)
        logger.info("  Gradient Accumulation steps = %d",
                    self.args.gradient_accumulation_steps)
        logger.info("  Total optimization steps = %d", t_total)

        global_step = 0  #总步数
        tr_loss = 0.0
        self.model.zero_grad()  # 清空梯度

        train_iterator = trange(int(self.args.num_train_epochs), desc="Epoch")
        set_seed(self.args)

        for _ in train_iterator:  # 一次遍历数据集
            epoch_iterator = tqdm(train_dataloader, desc="Iteration")
            for step, batch in enumerate(
                    epoch_iterator
            ):  # 取出一个batch: 原数据集是tuple(5 * Tensor(4478)) 所以一个batch是tuple(5 * Tensor(16))
                self.model.train()  # 告诉pytorch正在训练 而不是预测
                batch = tuple(t.to(self.device) for t in batch)  # GPU or CPU
                inputs = {
                    'input_ids': batch[0],
                    'attention_mask': batch[1],
                    'segment_ids': batch[2],
                    'start_positions': batch[3],
                    'end_positions': batch[4]
                }
                outputs = self.model(
                    **inputs)  #该语句自动执行forward, 与显式调用forward不同的是这个过程还会调用一些hooks
                loss = outputs[0]  # 喂入的是一个batch, loss应该是一个batch的平均值

                if self.args.gradient_accumulation_steps > 1:  #取一个step的平均loss
                    loss = loss / self.args.gradient_accumulation_steps

                loss.backward()

                tr_loss += loss.item()
                if (
                        step + 1
                ) % self.args.gradient_accumulation_steps == 0:  # 一个step结束, 需要更新参数
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                                   self.args.max_grad_norm)
                    #梯度截断
                    optimizer.step()  #一个loss的积累过程结束,更新参数
                    scheduler.step()  # Update learning rate schedule, 更新学习率
                    self.model.zero_grad()  #清空梯度
                    global_step += 1

                    if self.args.save_steps > 0 and global_step % self.args.save_steps == 0:  # 200步save model
                        self.save_model()
            self.evaluate()
        return global_step, tr_loss / global_step
Esempio n. 21
0
def main():
    parser = argparse.ArgumentParser()

    # Settings
    parser.add_argument('-d',
                        '--dataset',
                        choices=dataset_attributes.keys(),
                        required=True)
    parser.add_argument('-s',
                        '--shift_type',
                        choices=shift_types,
                        required=True)
    # Confounders
    parser.add_argument('-t', '--target_name')
    parser.add_argument('-c', '--confounder_names', nargs='+')
    # Resume?
    parser.add_argument('--resume', default=False, action='store_true')
    # Label shifts
    parser.add_argument('--minority_fraction', type=float)
    parser.add_argument('--imbalance_ratio', type=float)
    # Data
    parser.add_argument('--fraction', type=float, default=1.0)
    parser.add_argument('--root_dir', default=None)
    parser.add_argument('--subsample_to_minority',
                        action='store_true',
                        default=False)
    parser.add_argument('--reweight_groups',
                        action='store_true',
                        default=False)
    parser.add_argument('--augment_data', action='store_true', default=False)
    parser.add_argument('--val_fraction', type=float, default=0.1)
    # Objective
    parser.add_argument('--robust', default=False, action='store_true')
    parser.add_argument('--alpha', type=float, default=0.2)
    parser.add_argument('--generalization_adjustment', default="0.0")
    parser.add_argument('--automatic_adjustment',
                        default=False,
                        action='store_true')
    parser.add_argument('--robust_step_size', default=0.01, type=float)
    parser.add_argument('--use_normalized_loss',
                        default=False,
                        action='store_true')
    parser.add_argument('--btl', default=False, action='store_true')
    parser.add_argument('--hinge', default=False, action='store_true')

    # Model
    parser.add_argument('--model',
                        choices=model_attributes.keys(),
                        default='resnet50')
    parser.add_argument('--train_from_scratch',
                        action='store_true',
                        default=False)
    parser.add_argument('--resnet_width', type=int, default=None)

    # Optimization
    parser.add_argument('--n_epochs', type=int, default=4)
    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--scheduler', action='store_true', default=False)
    parser.add_argument('--weight_decay', type=float, default=5e-5)
    parser.add_argument('--gamma', type=float, default=0.1)
    parser.add_argument('--minimum_variational_weight', type=float, default=0)
    # Misc
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--show_progress', default=False, action='store_true')
    parser.add_argument('--log_dir', default='./logs')
    parser.add_argument('--log_every', default=50, type=int)
    parser.add_argument('--save_step', type=int, default=10)
    parser.add_argument('--save_best', action='store_true', default=False)
    parser.add_argument('--save_last', action='store_true', default=True)
    parser.add_argument('--student_width', type=int)
    parser.add_argument('--teacher_dir', type=str)
    parser.add_argument('--teacher_width', type=int)
    parser.add_argument('--gpu', type=str)
    parser.add_argument('--temp', type=str)

    args = parser.parse_args()
    gpu = args.gpu
    temp = args.temp
    check_args(args)
    teacher_dir = args.teacher_dir
    student_width = args.student_width
    teacher_width = args.teacher_width
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    def DistillationLoss(temperature):
        cross_entropy = torch.nn.CrossEntropyLoss()

        def loss(student_logits, teacher_logits, target):
            last_dim = len(student_logits.shape) - 1
            p_t = nn.functional.softmax(teacher_logits / temperature,
                                        dim=last_dim)
            log_p_s = nn.functional.log_softmax(student_logits / temperature,
                                                dim=last_dim)
            return cross_entropy(student_logits, target) - (p_t * log_p_s).sum(
                dim=last_dim).mean() * temperature**2

        return loss

    # BERT-specific configs copied over from run_glue.py
    if args.model == 'bert':
        args.max_grad_norm = 1.0
        args.adam_epsilon = 1e-8
        args.warmup_steps = 0

    if os.path.exists(args.log_dir) and args.resume:
        resume = True
        mode = 'a'
    else:
        resume = False
        mode = 'w'

    ## Initialize logs
    if not os.path.exists(args.log_dir):
        os.makedirs(args.log_dir)

    logger = Logger(os.path.join(args.log_dir, 'log.txt'), mode)
    # Record args
    log_args(args, logger)

    set_seed(args.seed)
    print("starting prep")
    # Data
    # Test data for label_shift_step is not implemented yet
    test_data = None
    test_loader = None
    if args.shift_type == 'confounder':
        train_data, val_data, test_data = prepare_data(args, train=True)
    elif args.shift_type == 'label_shift_step':
        train_data, val_data = prepare_data(args, train=True)
    print("done prep")
    loader_kwargs = {
        'batch_size': args.batch_size,
        'num_workers': 16,
        'pin_memory': True
    }
    train_loader = train_data.get_loader(train=True,
                                         reweight_groups=args.reweight_groups,
                                         **loader_kwargs)
    val_loader = val_data.get_loader(train=False,
                                     reweight_groups=None,
                                     **loader_kwargs)
    if test_data is not None:
        test_loader = test_data.get_loader(train=False,
                                           reweight_groups=None,
                                           **loader_kwargs)

    data = {}
    data['train_loader'] = train_loader
    data['val_loader'] = val_loader
    data['test_loader'] = test_loader
    data['train_data'] = train_data
    data['val_data'] = val_data
    data['test_data'] = test_data
    n_classes = train_data.n_classes

    log_data(data, logger)
    logger.flush()

    ## Define the objective
    if args.hinge:
        assert args.dataset in ['CelebA', 'CUB']  # Only supports binary

        def hinge_loss(yhat, y):
            # The torch loss takes in three arguments so we need to split yhat
            # It also expects classes in {+1.0, -1.0} whereas by default we give them in {0, 1}
            # Furthermore, if y = 1 it expects the first input to be higher instead of the second,
            # so we need to swap yhat[:, 0] and yhat[:, 1]...
            torch_loss = torch.nn.MarginRankingLoss(margin=1.0,
                                                    reduction='none')
            y = (y.float() * 2.0) - 1.0
            return torch_loss(yhat[:, 1], yhat[:, 0], y)

        criterion = hinge_loss
    else:
        criterion = torch.nn.CrossEntropyLoss(reduction='none')

    if resume:
        df = pd.read_csv(os.path.join(args.log_dir, 'test.csv'))
        epoch_offset = df.loc[len(df) - 1, 'epoch'] + 1
        logger.write(f'starting from epoch {epoch_offset}')
    else:
        epoch_offset = 0

    train_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'train.csv'),
                                      train_data.n_groups,
                                      mode=mode)
    val_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'val.csv'),
                                    train_data.n_groups,
                                    mode=mode)
    test_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'test.csv'),
                                     train_data.n_groups,
                                     mode=mode)
    strain_csv_logger = CSVBatchLogger(os.path.join(args.log_dir,
                                                    'strain.csv'),
                                       train_data.n_groups,
                                       mode=mode)
    sval_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'sval.csv'),
                                     train_data.n_groups,
                                     mode=mode)
    stest_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'stest.csv'),
                                      train_data.n_groups,
                                      mode=mode)

    teacher = resnet10vw(teacher_width, num_classes=n_classes)
    teacher_old = torch.load(teacher_dir + "/10_model.pth")
    for k, m in teacher_old.named_modules():
        m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatability
    teacher.load_state_dict(teacher_old.state_dict())
    teacher = teacher.to('cuda')
    #    def DistillationLoss(temperature):
    #        cross_entropy = torch.nn.CrossEntropyLoss()
    #
    #        def loss(student_logits, teacher_logits, target):
    #            last_dim = len(student_logits.shape) - 1
    #
    #            p_t = nn.functional.softmax(teacher_logits/temperature, dim=last_dim)
    #            log_p_s = nn.functional.log_softmax(student_logits/temperature, dim=last_dim)
    #
    #            return cross_entropy(student_logits, target) - (p_t * log_p_s).sum(dim=last_dim).mean()
    #
    #        return loss

    distill_criterion = DistillationLoss(float(temp))
    student = resnet10vw(int(student_width), num_classes=n_classes).to('cuda')

    #student.to(device)
    train(teacher,
          student,
          criterion,
          distill_criterion,
          data,
          logger,
          train_csv_logger,
          val_csv_logger,
          test_csv_logger,
          strain_csv_logger,
          sval_csv_logger,
          test_csv_logger,
          args,
          epoch_offset=epoch_offset)
    train_csv_logger.close()
    val_csv_logger.close()
    test_csv_logger.close()
    strain_csv_logger.close()
    sval_csv_logger.close()
    stest_csv_logger.close()
Esempio n. 22
0
                        default=1.,
                        help="Learning rate")

    parser.add_argument("--train",
                        type=bool,
                        default=False,
                        help="train or test")
    parser.add_argument("--test",
                        type=bool,
                        default=False,
                        help="train or test")

    args = parser.parse_args()

    # Set the seed
    set_seed(args.seed)

    # Set the hyperparameters
    LR = args.lr
    WD = args.wd

    EPOCHS = args.epochs
    OFFSET = args.offset

    # pdb.set_trace()

    dataset_train = get_data_loader_list('./data/CUB_2011_train.txt',
                                         1,
                                         train=True)
    dataset_test = get_data_loader_list('./data/CUB_2011_test.txt',
                                        1,
Esempio n. 23
0
    def train(self):
        train_sampler = RandomSampler(self.train_dataset)
        train_dataloader = DataLoader(self.train_dataset, sampler=train_sampler, batch_size=self.args.batch_size)

        if self.args.max_steps > 0:
            t_total = self.args.max_steps
            self.args.num_train_epochs = self.args.max_steps // (len(train_dataloader) // self.args.gradient_accumulation_steps) + 1
        else:
            t_total = len(train_dataloader) // self.args.gradient_accumulation_steps * self.args.num_train_epochs

        # Prepare optimizer and schedule (linear warmup and decay)
        no_decay = ['bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],
             'weight_decay': self.args.weight_decay},
            {'params': [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]
        optimizer = AdamW(optimizer_grouped_parameters, lr=self.args.learning_rate, eps=self.args.adam_epsilon)
        scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=self.args.warmup_steps, num_training_steps=t_total)

        # Train!
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(self.train_dataset))
        logger.info("  Num Epochs = %d", self.args.num_train_epochs)
        logger.info("  Total train batch size = %d", self.args.batch_size)
        logger.info("  Gradient Accumulation steps = %d", self.args.gradient_accumulation_steps)
        logger.info("  Total optimization steps = %d", t_total)

        global_step = 0
        tr_loss = 0.0
        self.model.zero_grad()

        train_iterator = trange(int(self.args.num_train_epochs), desc="Epoch")
        set_seed(self.args)

        for _ in train_iterator:
            epoch_iterator = tqdm(train_dataloader, desc="Iteration")
            for step, batch in enumerate(epoch_iterator):
                self.model.train()
                batch = tuple(t.to(self.device) for t in batch)  # GPU or CPU

                inputs = {'input_ids': batch[0],
                          'attention_mask': batch[1],
                          'intent_label_ids': batch[3],
                          'slot_labels_ids': batch[4]}
                if self.args.model_type != 'distilbert':
                    inputs['token_type_ids'] = batch[2]
                outputs = self.model(**inputs)
                loss = outputs[0]

                if self.args.gradient_accumulation_steps > 1:
                    loss = loss / self.args.gradient_accumulation_steps

                loss.backward()

                tr_loss += loss.item()
                if (step + 1) % self.args.gradient_accumulation_steps == 0:
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args.max_grad_norm)

                    optimizer.step()
                    scheduler.step()  # Update learning rate schedule
                    self.model.zero_grad()
                    global_step += 1

                    if self.args.logging_steps > 0 and global_step % self.args.logging_steps == 0:
                        self.evaluate("dev")

                    if self.args.save_steps > 0 and global_step % self.args.save_steps == 0:
                        self.save_model()

                if 0 < self.args.max_steps < global_step:
                    epoch_iterator.close()
                    break

            if 0 < self.args.max_steps < global_step:
                train_iterator.close()
                break

        return global_step, tr_loss / global_step
Esempio n. 24
0
import torch.optim as optim
from progressbar import ProgressBar
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torch.distributions.multivariate_normal import MultivariateNormal

from config import gen_args
from data import PhysicsDataset, load_data
from data_d4rl import D4RLDataset
from models_kp import KeyPointNet
from models_dy import DynaNetGNN, HLoss
from utils import rand_int, count_parameters, Tee, AverageMeter, get_lr, to_np, set_seed

args = gen_args()
set_seed(args.random_seed)

torch.manual_seed(args.random_seed)
np.random.seed(args.random_seed)

os.system('mkdir -p ' + args.outf_kp)
os.system('mkdir -p ' + args.dataf)

if args.stage == 'dy':
    os.system('mkdir -p ' + args.outf_dy)
    tee = Tee(os.path.join(args.outf_dy, 'train.log'), 'w')
else:
    raise AssertionError("Unsupported env %s" % args.stage)

print(args)
Esempio n. 25
0
def main():
    parser = argparse.ArgumentParser(description='selfplaying script')
    parser.add_argument('--alice_model_file',
                        type=str,
                        help='Alice model file')
    parser.add_argument('--bob_model_file', type=str, help='Bob model file')
    parser.add_argument('--context_file', type=str, help='context file')
    parser.add_argument('--temperature',
                        type=float,
                        default=1.0,
                        help='temperature')
    parser.add_argument('--verbose',
                        action='store_true',
                        default=False,
                        help='print out converations')
    parser.add_argument('--seed', type=int, default=1, help='random seed')
    parser.add_argument(
        '--score_threshold',
        type=int,
        default=6,
        help='successful dialog should have more than score_threshold in score'
    )
    parser.add_argument('--max_turns',
                        type=int,
                        default=20,
                        help='maximum number of turns in a dialog')
    parser.add_argument('--log_file',
                        type=str,
                        default='',
                        help='log successful dialogs to file for training')
    parser.add_argument('--smart_alice',
                        action='store_true',
                        default=False,
                        help='make Alice smart again')
    parser.add_argument('--fast_rollout',
                        action='store_true',
                        default=False,
                        help='to use faster rollouts')
    parser.add_argument('--rollout_bsz',
                        type=int,
                        default=100,
                        help='rollout batch size')
    parser.add_argument('--rollout_count_threshold',
                        type=int,
                        default=3,
                        help='rollout count threshold')
    parser.add_argument('--smart_bob',
                        action='store_true',
                        default=False,
                        help='make Bob smart again')
    parser.add_argument('--ref_text',
                        type=str,
                        help='file with the reference text')
    parser.add_argument('--domain',
                        type=str,
                        default='object_division',
                        help='domain for the dialogue')
    parser.add_argument('--fixed_bob',
                        action='store_true',
                        default=False,
                        help='make Bob smart again')
    args = parser.parse_args()

    utils.set_seed(args.seed)

    alice_model = utils.load_model(args.alice_model_file)
    alice_ty = get_agent_type(alice_model, args.smart_alice, args.fast_rollout)
    alice = alice_ty(alice_model, args, name='Alice')

    bob_model = utils.load_model(args.bob_model_file)
    bob_ty = get_agent_type(bob_model, args.smart_bob, args.fast_rollout)
    bob = bob_ty(bob_model, args, name='Bob')
    dialog = Dialog([alice, bob], args)
    logger = DialogLogger(verbose=args.verbose, log_file=args.log_file)
    ctx_gen = ContextGenerator(args.context_file)

    selfplay = SelfPlay(dialog, ctx_gen, args, logger)
    selfplay.run()
def run():
    parser = ArgumentParser()
    parser.add_argument("--run_name",
                        type=str,
                        default='run1',
                        help="The name of the run (subdirectory in ./runs)")
    parser.add_argument(
        "--model",
        type=str,
        default="openai-gpt",
        help="Model type (openai-gpt or gpt2)",
        choices=['openai-gpt',
                 'gpt2'])  # anything besides gpt2 will load openai-gpt
    parser.add_argument(
        "--max_history",
        type=int,
        default=2,
        help="Number of previous utterances to keep in history")
    parser.add_argument("--device",
                        type=str,
                        default="cuda" if torch.cuda.is_available() else "cpu",
                        help="Device (cuda or cpu)")
    parser.add_argument("--no_sample",
                        action='store_true',
                        help="Set to use greedy decoding instead of sampling")
    parser.add_argument("--max_length",
                        type=int,
                        default=40,
                        help="Maximum length of the output utterances")
    parser.add_argument("--min_length",
                        type=int,
                        default=1,
                        help="Minimum length of the output utterances")
    parser.add_argument("--seed", type=int, default=0, help="Seed")
    parser.add_argument("--temperature",
                        type=int,
                        default=1,
                        help="Sampling softmax temperature")
    parser.add_argument(
        "--top_k",
        type=int,
        default=0,
        help="Filter top-k tokens before sampling (<=0: no filtering)")
    parser.add_argument(
        "--top_p",
        type=float,
        default=0.8,
        help="Nucleus filtering (top-p) before sampling (<=0.0: no filtering)")
    args = parser.parse_args()

    # set seed
    set_seed(args.seed)

    logger.info("Get pretrained model and tokenizer")
    model_path = os.path.join('runs', args.run_name)
    tokenizer_class, model_class = (
        GPT2Tokenizer,
        GPT2LMHeadModel) if args.model == 'gpt2' else (OpenAIGPTTokenizer,
                                                       OpenAIGPTLMHeadModel)
    tokenizer = tokenizer_class.from_pretrained(model_path)
    model = model_class.from_pretrained(model_path)
    model.to(args.device)
    add_special_tokens_(model, tokenizer)
    history = []
    while True:
        raw_text = input(">>> ")
        while not raw_text:
            print('Prompt should not be empty!')
            raw_text = input(">>> ")
        history.append(tokenizer.encode(raw_text))
        with torch.no_grad():
            out_ids = sample_sequence(history, tokenizer, model, args)
        history.append(out_ids)
        history = history[-(2 * args.max_history + 1):]
        out_text = tokenizer.decode(out_ids, skip_special_tokens=True)
        print(out_text)
Esempio n. 27
0
import utils
import torch
import time
import Procedure
import numpy as np
from parse import args
import model
from pprint import pprint
import dataloader
from parse import para_dict
import torch.optim as optim

pprint(vars(args))
utils.set_seed(args.seed)

# dataset
dataset = dataloader.Loader(path=args.datadir + args.dataset)

# model
n_users = para_dict['user_num']
m_items = para_dict['item_num']
Recmodel = model.LightGCN(n_users, m_items).to(args.device)

weight_file = utils.getFileName()
print(f"model will be save in {weight_file}")

# loss
opt = optim.Adam(Recmodel.parameters(), lr=args.lr)

# result
best_val = {'recall': np.array([0.0]),
Esempio n. 28
0
def main(rank, dev_id, args):
    set_seed()
    # Remove the line below will result in problems for multiprocess
    if args['num_devices'] > 1:
        torch.set_num_threads(1)
    if dev_id == -1:
        args['device'] = torch.device('cpu')
    else:
        args['device'] = torch.device('cuda:{}'.format(dev_id))
        # Set current device
        torch.cuda.set_device(args['device'])

    train_set, val_set = load_dataset(args)
    get_center_subset(train_set, rank, args['num_devices'])
    train_loader = DataLoader(train_set,
                              batch_size=args['batch_size'],
                              collate_fn=collate_center,
                              shuffle=True)
    val_loader = DataLoader(val_set,
                            batch_size=args['batch_size'],
                            collate_fn=collate_center,
                            shuffle=False)

    model = WLNReactionCenter(node_in_feats=args['node_in_feats'],
                              edge_in_feats=args['edge_in_feats'],
                              node_pair_in_feats=args['node_pair_in_feats'],
                              node_out_feats=args['node_out_feats'],
                              n_layers=args['n_layers'],
                              n_tasks=args['n_tasks']).to(args['device'])
    model.train()
    if rank == 0:
        print('# trainable parameters in the model: ', count_parameters(model))

    criterion = BCEWithLogitsLoss(reduction='sum')
    optimizer = Adam(model.parameters(), lr=args['lr'])
    if args['num_devices'] <= 1:
        from utils import Optimizer
        optimizer = Optimizer(model,
                              args['lr'],
                              optimizer,
                              max_grad_norm=args['max_norm'])
    else:
        from utils import MultiProcessOptimizer
        optimizer = MultiProcessOptimizer(args['num_devices'],
                                          model,
                                          args['lr'],
                                          optimizer,
                                          max_grad_norm=args['max_norm'])

    total_iter = 0
    rank_iter = 0
    grad_norm_sum = 0
    loss_sum = 0
    dur = []

    for epoch in range(args['num_epochs']):
        t0 = time.time()
        for batch_id, batch_data in enumerate(train_loader):
            total_iter += args['num_devices']
            rank_iter += 1

            batch_reactions, batch_graph_edits, batch_mol_graphs, \
            batch_complete_graphs, batch_atom_pair_labels = batch_data
            labels = batch_atom_pair_labels.to(args['device'])
            pred, biased_pred = reaction_center_prediction(
                args['device'], model, batch_mol_graphs, batch_complete_graphs)
            loss = criterion(pred, labels) / len(batch_reactions)
            loss_sum += loss.cpu().detach().data.item()
            grad_norm_sum += optimizer.backward_and_step(loss)

            if rank_iter % args['print_every'] == 0 and rank == 0:
                progress = 'Epoch {:d}/{:d}, iter {:d}/{:d} | ' \
                           'loss {:.4f} | grad norm {:.4f}'.format(
                    epoch + 1, args['num_epochs'], batch_id + 1, len(train_loader),
                    loss_sum / args['print_every'], grad_norm_sum / args['print_every'])
                print(progress)
                grad_norm_sum = 0
                loss_sum = 0

            if total_iter % args['decay_every'] == 0:
                optimizer.decay_lr(args['lr_decay_factor'])
            if total_iter % args['decay_every'] == 0 and rank == 0:
                if epoch >= 1:
                    dur.append(time.time() - t0)
                    print('Training time per {:d} iterations: {:.4f}'.format(
                        rank_iter, np.mean(dur)))
                total_samples = total_iter * args['batch_size']
                prediction_summary = 'total samples {:d}, (epoch {:d}/{:d}, iter {:d}/{:d}) '.format(
                    total_samples, epoch + 1, args['num_epochs'], batch_id + 1, len(train_loader)) + \
                      reaction_center_final_eval(args, args['top_ks_val'], model, val_loader, easy=True)
                print(prediction_summary)
                with open(args['result_path'] + '/val_eval.txt', 'a') as f:
                    f.write(prediction_summary)
                torch.save({'model_state_dict': model.state_dict()},
                           args['result_path'] +
                           '/model_{:d}.pkl'.format(total_samples))
                t0 = time.time()
                model.train()
        synchronize(args['num_devices'])
Esempio n. 29
0
def train(args):

    # 加载数据
    trainset = IMDBDataset(is_training=True)
    testset = IMDBDataset(is_training=False)

    # 封装成MapDataSet的形式
    train_ds = MapDataset(trainset, label_list=[0, 1])
    test_ds = MapDataset(testset, label_list=[0, 1])

    # 定义XLNet的Tokenizer
    tokenizer = XLNetTokenizer.from_pretrained(args.model_name_or_path)

    trans_func = partial(convert_example,
                         tokenizer=tokenizer,
                         label_list=train_ds.label_list,
                         max_seq_length=args.max_seq_length)

    # 构造train_data_loader 和 dev_data_loader
    train_ds = train_ds.map(trans_func, lazy=True)
    train_batch_sampler = paddle.io.DistributedBatchSampler(
        train_ds, batch_size=args.batch_size, shuffle=True)

    batchify_fn = lambda samples, fn=Tuple(
        Pad(axis=0, pad_val=tokenizer.pad_token_id, pad_right=False),  # input
        Pad(axis=0, pad_val=tokenizer.pad_token_type_id, pad_right=False
            ),  # token_type
        Pad(axis=0, pad_val=0, pad_right=False),  # attention_mask
        Stack(dtype="int64" if train_ds.label_list else "float32"),  # label
    ): fn(samples)

    train_data_loader = DataLoader(dataset=train_ds,
                                   batch_sampler=train_batch_sampler,
                                   collate_fn=batchify_fn,
                                   num_workers=0,
                                   return_list=True)

    dev_ds = MapDataset(testset)
    dev_ds = dev_ds.map(trans_func, lazy=True)
    dev_batch_sampler = paddle.io.BatchSampler(dev_ds,
                                               batch_size=args.batch_size,
                                               shuffle=False)

    dev_data_loader = DataLoader(dataset=dev_ds,
                                 batch_sampler=dev_batch_sampler,
                                 collate_fn=batchify_fn,
                                 num_workers=0,
                                 return_list=True)

    # 训练配置
    # 固定随机种子
    set_seed(args)

    # 设定运行环境
    use_gpu = True if paddle.get_device().startswith("gpu") else False
    if use_gpu:
        paddle.set_device('gpu:0')

    num_classes = len(train_ds.label_list)
    model = XLNetForSequenceClassification.from_pretrained(
        args.model_name_or_path, num_classes=num_classes)

    #paddle.set_device(args.device)
    if paddle.distributed.get_world_size() > 1:
        paddle.distributed.init_parallel_env()
        model = paddle.DataParallel(model)

    # 设定lr_scheduler
    if args.max_steps > 0:
        num_training_steps = args.max_steps
        num_train_epochs = ceil(num_training_steps / len(train_data_loader))
    else:
        num_training_steps = len(train_data_loader) * args.num_train_epochs
        num_train_epochs = args.num_train_epochs

    warmup = args.warmup_steps if args.warmup_steps > 0 else args.warmup_proportion
    lr_scheduler = LinearDecayWithWarmup(args.learning_rate,
                                         num_training_steps, warmup)

    # 制定优化器
    clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=args.max_grad_norm)
    decay_params = [
        p.name for n, p in model.named_parameters()
        if not any(nd in n for nd in ["bias", "layer_norm"])
    ]
    optimizer = paddle.optimizer.AdamW(
        learning_rate=lr_scheduler,
        beta1=0.9,
        beta2=0.999,
        epsilon=args.adam_epsilon,
        parameters=model.parameters(),
        grad_clip=clip,
        weight_decay=args.weight_decay,
        apply_decay_param_fun=lambda x: x in decay_params)

    # 模型训练
    metric = Accuracy()

    # 定义损失函数
    loss_fct = paddle.nn.loss.CrossEntropyLoss(
    ) if train_ds.label_list else paddle.nn.loss.MSELoss()

    global_step = 0
    tic_train = time.time()
    model.train()
    for epoch in range(num_train_epochs):
        for step, batch in enumerate(train_data_loader):
            global_step += 1
            input_ids, token_type_ids, attention_mask, labels = batch
            logits = model(input_ids, token_type_ids, attention_mask)
            loss = loss_fct(logits, labels)
            loss.backward()
            optimizer.step()
            lr_scheduler.step()
            optimizer.clear_grad()

            if global_step % args.logging_steps == 0:
                print(
                    "global step %d/%d, epoch: %d, batch: %d, rank_id: %s, loss: %f, lr: %.10f, speed: %.4f step/s"
                    % (global_step, num_training_steps, epoch, step,
                       paddle.distributed.get_rank(), loss, optimizer.get_lr(),
                       args.logging_steps / (time.time() - tic_train)))
                tic_train = time.time()

            if global_step % args.save_steps == 0 or global_step == num_training_steps:
                tic_eval = time.time()
                evaluate(model, loss_fct, metric, dev_data_loader)
                print("eval done total : %s s" % (time.time() - tic_eval))

                if (not paddle.distributed.get_world_size() > 1
                    ) or paddle.distributed.get_rank() == 0:
                    output_dir = os.path.join(
                        args.output_dir,
                        "%s_ft_model_%d" % (args.task_name, global_step))
                    if not os.path.exists(output_dir):
                        os.makedirs(output_dir)
                    # Need better way to get inner model of DataParallel
                    model_to_save = model._layers if isinstance(
                        model, paddle.DataParallel) else model
                    model_to_save.save_pretrained(output_dir)
                    tokenizer.save_pretrained(output_dir)
                if global_step == num_training_steps:
                    exit(0)
                tic_train += time.time() - tic_eval
Esempio n. 30
0
utils.set_path(('..', '../gibbs'))
from Demo.Data.HMLN.GeneratorRobotMapping import generate_rel_graph, load_data
from RelationalGraph import *
from MLNPotential import *
from Potential import QuadraticPotential, TablePotential, HybridQuadraticPotential
from EPBPLogVersion import EPBP
from OneShot import OneShot, LiftedOneShot
from NPVI import NPVI, LiftedNPVI
from CompressedGraphSorted import CompressedGraphSorted
import numpy as np
import time
from copy import copy

seed = 0
utils.set_seed(seed)
from mixture_beliefs import joint_map
from utils import eval_joint_assignment_energy
# from hybrid_gaussian_mrf import HybridGaussianSampler
# from hybrid_gaussian_mrf import convert_to_bn, block_gibbs_sample, get_crv_marg, get_drv_marg, \
#     get_rv_marg_map_from_bn_params
# import sampling_utils

from utils import kl_continuous_logpdf

import argparse

parser = argparse.ArgumentParser()
# parser.add_argument('algo', type=str)  # any of OSI, LOSI, NPVI, LNPVI
parser.add_argument('K', type=int)
parser.add_argument('-n', '--num_tests', type=int, default=5)
Esempio n. 31
0
                                                        the dataset is structured.'''
    )
    args = parser.parse_args()

    # Fetch parameters
    parameters = read_yaml(args.yaml_file)
    check_folder(parameters['output_dir'])
    save_yaml(parameters, os.path.join(parameters['output_dir'], 'config.yml'))
    logging.basicConfig(filename=os.path.join(parameters['output_dir'],
                                              parameters['log_file']),
                        filemode='w+',
                        level=logging.INFO)
    logging.info("Parameters fetched.")

    logging.info("Setting seed for reproductibility...")
    set_seed(parameters['seed'])
    logging.info("\tDone.")

    logging.info("Set and retrieve the device on which to run...")
    device = get_device()
    task = parameters['task'].lower()
    logging.info("\tDone.")

    logging.info("Instanciating dataset and data processor...")
    if task in ['language_modeling']:
        data = LMDataset(task,
                         parameters['dataset_name'].lower(),
                         dataset_dir=parameters['dataset_dir'])
        processor = LMProcessor()
    logging.info("\tDone.")
Esempio n. 32
0
from __future__ import print_function
import warnings
import os
import torch
import numpy as np
from time import time
from termcolor import colored

from parameter import parse_arguments, net_args_are_same
from architectures import get_net
import utils as u
from data import extract_patches

warnings.filterwarnings("ignore")
u.set_seed()


class Training:
    def __init__(self, args, outpath, dtype=torch.cuda.FloatTensor):

        self.args = args
        self.dtype = dtype
        self.outpath = outpath
        if args.loss == 'mse':
            self.loss_fn = torch.nn.MSELoss().type(self.dtype)
        else:
            self.loss_fn = torch.nn.L1Loss().type(self.dtype)
        self.loss_reg_fn = torch.nn.MSELoss().type(self.dtype)
        self.elapsed = None
        self.iiter = 0
        self.iter_to_be_saved = list(range(0, self.args.epochs, int(self.args.save_every))) \
Esempio n. 33
0
        CustomArgs(['--percent', '--percent'],
                   type=float,
                   target=('trainer', 'percent')),
        CustomArgs(['--conv', '--conv_layer'],
                   type=str,
                   target=('arch', 'args', 'conv_layer_type')),
        CustomArgs(['--norm', '--norm_layer'],
                   type=str,
                   target=('arch', 'args', 'norm_layer_type')),
        CustomArgs(['--subset_percent', '--subset_percent'],
                   type=float,
                   target=('trainer', 'subset_percent')),
        CustomArgs(['--asym', '--asym'], type=bool,
                   target=('trainer', 'asym')),
        CustomArgs(['--sym', '--sym'], type=bool, target=('trainer', 'sym')),
        CustomArgs(['--name', '--exp_name'], type=str, target=('name', )),
        CustomArgs(['--key', '--comet_key'], type=str,
                   target=('comet', 'api')),
        CustomArgs(['--offline', '--comet_offline'],
                   type=str,
                   target=('comet', 'offline')),
        CustomArgs(['--seed', '--seed'], type=int, target=('seed', )),
        CustomArgs(['--wd', '--weight_decay'],
                   type=float,
                   target=('optimizer', 'args', 'weight_decay'))
    ]
    config = ConfigParser.get_instance(args, options)

    set_seed(manualSeed=config['seed'])
    main(config)
Esempio n. 34
0
def train(args):
    paddle.set_device(args.device)
    world_size = dist.get_world_size()
    if world_size > 1:
        dist.init_parallel_env()

    set_seed(args.seed)

    model = UnifiedTransformerLMHeadModel.from_pretrained(
        args.model_name_or_path)
    tokenizer = UnifiedTransformerTokenizer.from_pretrained(
        args.model_name_or_path)

    if world_size > 1:
        model = paddle.DataParallel(model)

    train_ds, dev_ds = load_dataset('duconv', splits=('train', 'dev'))
    train_ds, train_data_loader = create_data_loader(train_ds, tokenizer, args,
                                                     'train')
    dev_ds, dev_data_loader = create_data_loader(dev_ds, tokenizer, args,
                                                 'dev')

    lr_scheduler = NoamDecay(1 / (args.warmup_steps * (args.lr**2)),
                             args.warmup_steps)
    # Generate parameter names needed to perform weight decay.
    # All bias and LayerNorm parameters are excluded.
    decay_params = [
        p.name for n, p in model.named_parameters()
        if not any(nd in n for nd in ["bias", "norm"])
    ]
    optimizer = AdamW(learning_rate=lr_scheduler,
                      parameters=model.parameters(),
                      weight_decay=args.weight_decay,
                      apply_decay_param_fun=lambda x: x in decay_params,
                      grad_clip=nn.ClipGradByGlobalNorm(args.max_grad_norm))

    step = 0
    total_time = 0.0
    best_ppl = 1e9
    for epoch in range(args.epochs):
        print('\nEpoch %d/%d' % (epoch + 1, args.epochs))
        batch_start_time = time.time()
        for inputs in train_data_loader:
            step += 1
            labels = inputs[-1]

            logits = model(*inputs[:-1])
            loss = F.cross_entropy(logits, labels)
            loss.backward()
            optimizer.step()
            lr_scheduler.step()
            optimizer.clear_grad()

            total_time += (time.time() - batch_start_time)
            if step % args.logging_steps == 0:
                ppl = paddle.exp(loss)
                print(
                    'step %d - loss: %.4f - ppl: %.4f - lr: %.7f - %.3fs/step'
                    % (step, loss, ppl, optimizer.get_lr(),
                       total_time / args.logging_steps))
                total_time = 0.0
            if step % args.save_steps == 0:
                ppl = evaluation(model, dev_data_loader)
                if dist.get_rank() == 0:
                    save_ckpt(model, tokenizer, args.save_dir, step)
                    if ppl < best_ppl:
                        best_ppl = ppl
                        save_ckpt(model, tokenizer, args.save_dir, 'best')
                        print('Saved step {} as best model.\n'.format(step))
            batch_start_time = time.time()
    print('\nTraining completed.')
Esempio n. 35
0
    t = perf_counter()
    for epoch in range(epochs):
        optimizer.step(closure)  # LBFGS专用
    train_time = perf_counter() - t
    return model, train_time


def test_regression(model, test_features, test_labels):
    with torch.no_grad():
        model.eval()
        return f1(model(test_features), test_labels)


# 随机种子固定结果
set_seed(args.seed, args.cuda)

# 邻接矩阵(全), 特征, 标签, 训练集,验证集,测试集
adj, features, labels, idx_train, idx_val, idx_test = \
    load_reddit_data(normalization=args.normalization, cuda=args.cuda)
print("Finished data loading.")

if args.model == 'SGC':
    model = SGC(features.size(1), labels.max().item() + 1)
    if args.cuda:
        model.cuda()
    # precompute
    processed_features, precompute_time = sgc_precompute(
        features, adj, args.degree)
    # train
    train_features = processed_features[idx_train]
    train1 = [(x[0] + ' ' + x[1], x[2]) for x in train_data]
    train2 = [(x[1] + ' ' + x[0], x[2]) for x in train_data]
    train_data = train1 + train2
    train_dataset = BuildDataSet(train_data)
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_dataset)
    train_load = DataLoader(dataset=train_dataset,
                            batch_size=config.batch_size,
                            shuffle=False,
                            collate_fn=collate_fn,
                            sampler=train_sampler)

    for model_name in config.model_name:
        if config.local_rank in [0, -1]:
            msg = 'model_name:{},train_nums:{},train_iter:{},batch_size:{}'
            print(
                msg.format(model_name, len(train_data), len(train_load),
                           config.batch_size))

        train_process(config, train_load, train_sampler, model_name)
        torch.distributed.barrier()


if __name__ == '__main__':
    config = roBerta_Config()
    config.local_rank = local_rank
    config.device = device
    config.nprocs = torch.cuda.device_count()
    set_seed(config)
    train(config)
Esempio n. 37
0
                    help='Multiprocessing number.')
parser.add_argument('--Ks', nargs='?', default='[1,5,10]',
                    help='Output sizes of every layer')
parser.add_argument('--skip', type=int, default=0,
                    help='SKip epochs.')
parser.add_argument('--seed', type=int, default=42,
                    help='Random Seed.')
args, _ = parser.parse_known_args()
args.layers = eval(args.layers)
print('#' * 70)
if not args.if_stack:
    args.if_raw = True
if args.if_output:
    print('\n'.join([(str(_) + ':' + str(vars(args)[_])) for _ in vars(args).keys()]))
args.cuda = not args.no_cuda and torch.cuda.is_available()
utils.set_seed(args.seed, args.cuda)
args.device = torch.device("cuda:0" if args.cuda else "cpu")
print(args.device)
if args.dataset == 'wechat':
    args.out_epoch = 1
args.loss = 'bpr'

ndcg.init(args)

# In[4]:


para_dict = pickle.load(open(args.datadir + args.dataset + '/warm_dict.pkl', 'rb'))
uuid_code = str(uuid.uuid4())[:4]
root_path = os.getcwd() + '/'
save_path = root_path + 'model_save/'
Esempio n. 38
0
import os
import shutil

import click
import pandas as pd
from deepsense import neptune
from sklearn.metrics import roc_auc_score

import pipeline_config as cfg
from pipelines import PIPELINES
from utils import init_logger, read_params, create_submission, set_seed, save_evaluation_predictions, \
    read_csv_time_chunks, cut_data_in_time_chunks, data_hash_channel_send, get_submission_hours_index

set_seed(1234)
logger = init_logger()
ctx = neptune.Context()
params = read_params(ctx)


@click.group()
def action():
    pass


@action.command()
def prepare_data():
    logger.info('chunking train')
    train = pd.read_csv(params.raw_train_filepath)
    cut_data_in_time_chunks(train,
                            timestamp_column='click_time',
                            chunks_dir=params.train_chunks_dir,
                        default=None,
                        help='Checkpoint location.')
    parser.add_argument('-save',
                        metavar='save',
                        type=utils.str2bool,
                        help='Boolean',
                        default=False)
    parser.add_argument(
        '-conf',
        metavar='config',
        default="./conf/ner/rnn.json",
        help='model configuration. JSON files defined in ./configs/')
    parser.add_argument('-ckpt', metavar='ckpt', help='Checkpoint location')
    args = parser.parse_args()

    # Set Seed for reproducibility
    utils.set_seed()

    if args.load is None:
        # Load Config file
        conf = json.load(open(args.conf, "r"))
        # Load Dataset object
        dset = Dataset(batch_size=conf["train"]["batch_size"])
        # Main training loop
        Main(dset, conf, save=args.save)

    else:
        # Load Config file
        # Load Model
        pass