Exemple #1
0
    def __init__(self, config, storage, replay_buffer, state=None):
        set_all_seeds(config.seed)

        self.run_tag = config.run_tag
        self.group_tag = config.group_tag
        self.worker_id = 'learner'
        self.replay_buffer = replay_buffer
        self.storage = storage
        self.config = deepcopy(config)

        if "learner" in self.config.use_gpu_for:
            if torch.cuda.is_available():
                if self.config.learner_gpu_device_id is not None:
                    device_id = self.config.learner_gpu_device_id
                    self.device = torch.device("cuda:{}".format(device_id))
                else:
                    self.device = torch.device("cuda")
            else:
                raise RuntimeError(
                    "GPU was requested but torch.cuda.is_available() is False."
                )
        else:
            self.device = torch.device("cpu")

        self.network = get_network(config, self.device)
        self.network.to(self.device)
        self.network.train()

        self.optimizer = get_optimizer(config, self.network.parameters())
        self.lr_scheduler = get_lr_scheduler(config, self.optimizer)
        self.scalar_loss_fn, self.policy_loss_fn = get_loss_functions(config)

        self.training_step = 0
        self.losses_to_log = {'reward': 0., 'value': 0., 'policy': 0.}

        self.throughput = {
            'total_frames': 0,
            'total_games': 0,
            'training_step': 0,
            'time': {
                'ups': 0,
                'fps': 0
            }
        }

        if self.config.norm_obs:
            self.obs_min = np.array(self.config.obs_range[::2],
                                    dtype=np.float32)
            self.obs_max = np.array(self.config.obs_range[1::2],
                                    dtype=np.float32)
            self.obs_range = self.obs_max - self.obs_min

        if state is not None:
            self.load_state(state)

        Logger.__init__(self)
Exemple #2
0
def main(args: Namespace) -> None:
    """Run the main program.

    Arguments:
        args: The object containing the commandline arguments
    """
    config = load_config(args.config)

    # Automatically implements frame skipping internally
    env = gym.make("Pong-v4", frameskip=config.frame_skips)

    if config.seed is not None:
        set_all_seeds(env, config.seed)

    model = get_model(env.action_space.n)

    fixed = get_model(env.action_space.n)
    replay = ReplayBuffer[TransitionType](limit=config.replay_size)

    # Save each run into a directory by its timestamp.
    # Remove microseconds and convert to ISO 8601 YYYY-MM-DDThh:mm:ss format.
    time_stamp = datetime.now().replace(microsecond=0).isoformat()
    log_dir = args.log_dir / time_stamp

    for directory in log_dir, args.save_dir:
        if not directory.exists():
            directory.mkdir(parents=True)
        with open(directory / CONFIG_NAME, "w") as conf:
            toml.dump(vars(args), conf)

    optimizer = tf.keras.optimizers.Adam(config.lr)
    writer = tf.summary.create_file_writer(log_dir)

    trainer = DQNTrainer(
        env,
        model,
        fixed,
        replay,
        optimizer,
        writer,
        config=config,
        log_steps=args.log_steps,
        video_eps=args.video_eps,
        log_dir=log_dir,
        save_dir=args.save_dir,
    )

    if args.resume:
        start = trainer.load_info()
    else:
        fixed.set_weights(model.get_weights())
        start = 0

    trainer.train(args.save_eps, start=start)
Exemple #3
0
def run(evaluator, seed=None):
  environment = get_environment(evaluator.config)
  if seed is not None:
    environment.seed(seed)
    set_all_seeds(seed)

  with torch.inference_mode():
    game = evaluator.play_game(environment)

  game.history.observations = []
  game.environment = None
  return game
Exemple #4
0
def init_environment(env_id, path_dir_output="{env_id}-results", seed=None):
    # Format the path of the output directory
    path_dir_output = path_dir_output.format(env_id=env_id)

    envx = gym.make(env_id)
    env = envx
    env = wrappers.Monitor(envx,
                           directory=path_dir_output,
                           force=True,
                           video_callable=False)
    if seed is not None:
        set_all_seeds(env, seed)

    return env, envx
Exemple #5
0
def main(args: Namespace) -> None:
    """Run the main program.

    Arguments:
        args: The object containing the commandline arguments
    """
    config = load_config(args.config)

    env = gym.make("Pong-v4", frameskip=config.frame_skips)

    if config.seed is not None:
        set_all_seeds(env, config.seed)

    model = get_model(env.action_space.n)
    model.load_weights(args.load_dir / DQNTrainer.MODEL_NAME)
    print("Loaded model")

    if not args.log_dir.exists():
        args.log_dir.mkdir(parents=True)

    test(env, model, log_dir=args.log_dir)
Exemple #6
0
    #     # Get All the values
    #     # Trajectories dimension [bs, sequence, (obs, a, r, n_obs, done)]
    #     obss, acts, rewards, next_obss, dones = np.split(trajectories, 5, axis=2)
    #
    #
    #
    #     input_obs, initial_shape = utils.get_obs_from_traj(trajectories)
    #     values = sess.run(self.val_pred, {
    #         self.state: input_obs
    #     }).reshape(initial_shape)
    #     # ex_rewards = utils.get_expected_rewards(self.gamma, trajectories)
    #     #
    #     # advantages = ex_rewards - values


rng, seed = utils.set_all_seeds()

env = gym.make('CartPole-v0')
env.seed(seed)
act_space = env.action_space

agent = PPO()
N = 2
K = 2
batch_size = 10
max_iter = 1;

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    while t < max_iter:
Exemple #7
0
def main():
    """
    main function for conducting Subtask D. Parameters are parsed with argparse.
    Language model should be one of the following:
    Language model should be suitable for German e.g.:
        'bert-base-multilingual-uncased', 
        'bert-base-multilingual-cased',              
        'bert-base-german-cased', 
        'bert-base-german-dbmdz-cased',
        'bert-base-german-dbmdz-uncased',
        'distilbert-base-german-cased',
        'distilbert-base-multilingual-cased'.
    """

    parser = argparse.ArgumentParser(description='Run Subtask D of GermEval 2017 Using Pre-Trained Language Model.')
    parser.add_argument('--seed', type=int, default=42, help='Random seed.')
    parser.add_argument('--lang_model', type=str, default='bert-base-german-dbmdz-uncased', help='The pre-trained language model.')
    parser.add_argument('--epochs', type=int, default=4, help='Number of epochs for training.')
    parser.add_argument('--lr', type=float, default=5e-5, help='The learning rate.')
    parser.add_argument('--max_len', type=int, default=256, help='The maximum sequence length of the input text.')
    parser.add_argument('--batch_size', type=int, default=32, help='Your train set batch size.')
    parser.add_argument('--df_path', type=str, default='./data/', help='The data directory.')    
    parser.add_argument('--train_data', type=str, default='train_df_opinion.tsv', help='The filename of the input train data.')
    parser.add_argument('--dev_data', type=str, default='dev_df_opinion.tsv', help='The filename of the input development data.')
    parser.add_argument('--test_data1', type=str, default='test_syn_df_opinion.tsv', help='The filename of the first input test data (synchronic).')
    parser.add_argument('--test_data2', type=str, default='test_dia_df_opinion.tsv', help='The filename of the second input test data (diachronic).')
    parser.add_argument('--output_path', type=str, default='./output/subtaskD/', help='The output directory of the model and predictions.')
    parser.add_argument("--train", default=True, action="store_true", help="Flag for training.")
    parser.add_argument("--use_crf", default=False, action="store_true", help="Flag for CRF usage.")
    parser.add_argument("--save_cr", default=False, action="store_true", help="Flag for saving classification report.")
    args = parser.parse_args()
    #############################################################################
    # Settings
    set_all_seeds(args.seed)
    device, n_gpu = initialize_device_settings(use_cuda=True)

    lm = args.lang_model
    if args.use_crf:
        lm = args.lang_model+"_crf"


    #############################################################################
    # Load and prepare data by adding BIO tags
    train_df = bio_tagging_df(pd.read_csv(args.df_path + args.train_data, delimiter = '\t'))
    dev_df = bio_tagging_df(pd.read_csv(args.df_path + args.dev_data, delimiter = '\t'))
    test_syn_df = bio_tagging_df(pd.read_csv(args.df_path + args.test_data1, delimiter = '\t'))
    test_dia_df = bio_tagging_df(pd.read_csv(args.df_path + args.test_data2, delimiter = '\t'))
    
    # 1. Create a tokenizer
    lower_case = False
    if args.lang_model[-7:] == "uncased":
        lower_case = True

    if args.lang_model[:4] == "bert":
        model_class = "BERT"
        tokenizer = BertTokenizer.from_pretrained(args.lang_model, do_lower_case = lower_case, max_length=args.max_len)
    
    if args.lang_model[:10] == "distilbert":
        model_class = "DistilBERT"
        tokenizer = DistilBertTokenizer.from_pretrained(args.lang_model, do_lower_case = lower_case, max_length=args.max_len)

    # get training features
    df = pd.concat([train_df, dev_df])
    sentences = df.text.values
    labels = df.bio_tags.values
    tokenized_texts, labels = get_sentences_biotags(tokenizer, sentences, labels, args.max_len)
    
    sentences_syn = test_syn_df.text.values
    labels_syn = test_syn_df.bio_tags
    tokenized_texts_syn, labels_syn = get_sentences_biotags(tokenizer, sentences_syn, labels_syn, args.max_len)
    
    sentences_dia = test_dia_df.text.values
    labels_dia = test_dia_df.bio_tags
    tokenized_texts_dia, labels_dia = get_sentences_biotags(tokenizer, sentences_dia, labels_dia, args.max_len)


    # get tag values and dictionary
    tag_values, tag2idx, entities = get_tags_list(args.df_path)
    
    # pad input_ids and tags
    input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
                          maxlen = args.max_len, value=0.0, padding="post",
                          dtype="long", truncating="post")
    tags = pad_sequences([[tag2idx.get(l) for l in lab] for lab in labels],
                     maxlen=args.max_len, value=tag2idx["PAD"], padding="post",
                     dtype="long", truncating="post")
    
    
    input_ids_syn = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts_syn],
                          maxlen = args.max_len, value=0.0, padding="post",
                          dtype="long", truncating="post")
    tags_syn = pad_sequences([[tag2idx.get(l) for l in lab] for lab in labels_syn],
                     maxlen=args.max_len, value=tag2idx["PAD"], padding="post",
                     dtype="long", truncating="post")    
    
    input_ids_dia = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts_dia],
                          maxlen = args.max_len, value=0.0, padding="post",
                          dtype="long", truncating="post")
    tags_dia = pad_sequences([[tag2idx.get(l) for l in lab] for lab in labels_dia],
                     maxlen=args.max_len, value=tag2idx["PAD"], padding="post",
                     dtype="long", truncating="post")
    
    # create attention masks
    attention_masks= [[int(token_id > 0) for token_id in sent] for sent in input_ids]    
    attention_masks_syn = [[int(token_id > 0) for token_id in sent] for sent in input_ids_syn]
    attention_masks_dia = [[int(token_id > 0) for token_id in sent] for sent in input_ids_dia]


    # split train, dev
    train_inputs, train_labels, dev_inputs, dev_labels, train_masks, dev_masks = split_train_dev(
        train_df, dev_df, attention_masks, input_ids, tags)

    # transform to torch tensor
    train_inputs = torch.tensor(train_inputs, dtype = torch.long)
    dev_inputs = torch.tensor(dev_inputs, dtype = torch.long)

    train_labels = torch.tensor(train_labels, dtype = torch.long)
    dev_labels = torch.tensor(dev_labels, dtype = torch.long)

    train_masks = torch.tensor(train_masks, dtype = torch.uint8)
    dev_masks = torch.tensor(dev_masks, dtype = torch.uint8)

    test_syn_inputs = torch.tensor(input_ids_syn, dtype = torch.long)
    test_syn_labels = torch.tensor(tags_syn, dtype = torch.long)
    test_syn_masks = torch.tensor(attention_masks_syn, dtype = torch.uint8)

    test_dia_inputs = torch.tensor(input_ids_dia, dtype = torch.long)
    test_dia_labels = torch.tensor(tags_dia, dtype = torch.long)
    test_dia_masks = torch.tensor(attention_masks_dia, dtype = torch.uint8)

    # create DataLoader
    train_dataloader = create_dataloader(train_inputs, train_masks, train_labels, args.batch_size, train = True)
    dev_dataloader = create_dataloader(dev_inputs, dev_masks, dev_labels, args.batch_size, train = False)  

    test_syn_dataloader = create_dataloader(test_syn_inputs, test_syn_masks, test_syn_labels, args.batch_size, train = False)   
    test_dia_dataloader = create_dataloader(test_dia_inputs, test_dia_masks, test_dia_labels, args.batch_size, train = False)


    #############################################################################
    # Training
    if args.train:
        # Load Config
        if model_class=="BERT":
            config = BertConfig.from_pretrained(args.lang_model, num_labels=len(tag2idx))
            config.hidden_dropout_prob = 0.1 # dropout probability for all fully connected layers
                                             # in the embeddings, encoder, and pooler; default = 0.1
            model = TokenBERT(
                model_name=args.lang_model, 
                num_labels=len(tag2idx),
                use_crf=args.use_crf)

        if model_class=="DistilBERT":
            config = DistilBertConfig.from_pretrained(args.lang_model, num_labels=len(tag2idx))   
            config.hidden_dropout_prob = 0.1       
            model = TokenDistilBERT(
                model_name=args.lang_model, 
                num_labels=len(tag2idx),
                use_crf=args.use_crf)
        
        model.cuda() 

        # Create an optimizer
        param_optimizer = list(model.named_parameters())
        no_decay = ['bias', 'LayerNorm.weight', 'gamma', 'beta']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
                'weight_decay_rate': 0.01},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
                'weight_decay_rate': 0.0}
        ]
        optimizer = AdamW(
            optimizer_grouped_parameters,
            lr=args.lr,
            eps=1e-8
        )
        # Total number of training steps = number of batches * number of epochs
        total_steps = len(train_dataloader) * args.epochs
        # Create the learning rate scheduler
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=0,
            num_training_steps=total_steps
        )

        # Main Loop
        print("=================== Train ================")
        print("##### Language Model:", args.lang_model, ",", "use CRF:", args.use_crf, ",", "learning rate:", args.lr, ",", "DROPOUT:", config.hidden_dropout_prob)
        print()

        track_time = time.time()
                
        for epoch in trange(args.epochs, desc="Epoch"):
            print("Epoch: %4i"%epoch, dt.datetime.now())
            
            # TRAINING
            model, optimizer, scheduler, tr_loss = training(
                train_dataloader, 
                model=model, 
                device=device, 
                optimizer=optimizer, 
                scheduler=scheduler
                )
            
            # EVALUATION: TRAIN SET
            y_true_train, y_pred_train, f1s_train, f1s_overlap_train = evaluation(
                    train_dataloader, model=model, device=device, tag_values=tag_values)
            print("TRAIN: F1 Exact %.3f | F1 Overlap %.3f"%(f1s_train, f1s_overlap_train))
            
            # EVALUATION: DEV SET
            y_true_dev, y_pred_dev, f1s_dev, f1s_overlap_dev = evaluation(
                    dev_dataloader, model=model, device=device, tag_values=tag_values)
            print("EVAL: F1 Exact %.3f | F1 Overlap %.3f"%(f1s_dev, f1s_overlap_dev))
        
        print("  Training and validation took in total: {:}".format(format_time(time.time()-track_time)))

        # EVALUATION: TEST SYN SET
        y_true_test_syn, y_pred_test_syn, f1s_test_syn, f1s_overlap_test_syn = evaluation(
                test_syn_dataloader, model=model, device=device, tag_values=tag_values)
        print("TEST SYN: F1 Exact %.3f | F1 Overlap %.3f"%(f1s_test_syn, f1s_overlap_test_syn))
                
        # EVALUATION: TEST DIA SET
        y_true_test_dia, y_pred_test_dia, f1s_test_dia, f1s_overlap_test_dia = evaluation(
                test_dia_dataloader, model=model, device=device, tag_values=tag_values)
        print("TEST DIA: F1 Exact %.3f | F1 Overlap %.3f"%(f1s_test_dia, f1s_overlap_test_dia))
        
        # Print classification report
        cr_report_syn = seq_classification_report(y_true_test_syn, y_pred_test_syn, digits = 4)
        cr_report_dia = seq_classification_report(y_true_test_dia, y_pred_test_dia, digits = 4)
        cr_report_syn_overlap = seq_classification_report(y_true_test_syn, y_pred_test_syn, digits = 4, overlap = True)
        cr_report_dia_overlap = seq_classification_report(y_true_test_dia, y_pred_test_dia, digits = 4, overlap = True)
        
        print("Classification report for TEST SYN (Exact):", cr_report_syn)
        print("Classification report for TEST SYN (Overlap):", cr_report_dia)
        print("Classification report for TEST DIA (Exact):", cr_report_syn_overlap)
        print("Classification report for TEST DIA (Overlap):", cr_report_dia_overlap)

        if args.save_cr:            
            pickle.dump(cr_report_syn, open(args.output_path+'classification_report_'+lm+str(batch_size)+'_test_syn_exact.txt','wb'))
            pickle.dump(cr_report_dia, open(args.output_path+'classification_report_'+lm+str(batch_size)+'_test_dia_exact.txt','wb'))
            pickle.dump(cr_report_syn_overlap, open(args.output_path+'classification_report_'+lm+str(batch_size)+'_test_syn_overlap.txt','wb'))
            pickle.dump(cr_report_dia_overlap, open(args.output_path+'classification_report_'+lm+str(batch_size)+'_test_dia_overlap.txt','wb'))
Exemple #8
0
import numpy as np
import datetime as dt
from tqdm import tqdm, trange

from keras.preprocessing.sequence import pad_sequences
from transformers import (AdamW, get_linear_schedule_with_warmup,
                          BertTokenizer, BertConfig,
                          DistilBertTokenizer, DistilBertConfig)

from utils import set_all_seeds, initialize_device_settings, format_time
from data_prep import bio_tagging_df
from data_handler import (get_tags_list, get_sentences_biotags, split_train_dev, create_dataloader)
from modeling_token import TokenBERT, TokenDistilBERT
from seqeval_metrics import (seq_accuracy_score, seq_f1_score, 
                             seq_classification_report)
set_all_seeds()

logger = logging.getLogger(__name__)
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    level=logging.INFO
)


def training(train_dataloader, model, device, optimizer, scheduler, max_grad_norm=1.0):
    model.train()
    tr_loss = 0
    nb_tr_examples, nb_tr_steps = 0, 0

    for step, batch in enumerate(train_dataloader):
Exemple #9
0
    def __init__(self, actor_key, config, storage, replay_buffer, state=None):
        set_all_seeds(config.seed +
                      actor_key if config.seed is not None else None)

        self.run_tag = config.run_tag
        self.group_tag = config.group_tag
        self.actor_key = actor_key
        self.config = deepcopy(config)
        self.storage = storage
        self.replay_buffer = replay_buffer

        self.environment = get_environment(config)
        self.environment.seed(config.seed)
        self.mcts = MCTS(config)

        if "actors" in self.config.use_gpu_for:
            if torch.cuda.is_available():
                if self.config.actors_gpu_device_ids is not None:
                    device_id = self.config.actors_gpu_device_ids[
                        self.actor_key]
                    self.device = torch.device("cuda:{}".format(device_id))
                else:
                    self.device = torch.device("cuda")
            else:
                raise RuntimeError(
                    "GPU was requested but torch.cuda.is_available() is False."
                )
        else:
            self.device = torch.device("cpu")

        self.network = get_network(config, self.device)
        self.network.to(self.device)
        self.network.eval()

        if config.fixed_temperatures:
            self.temperature = config.fixed_temperatures[self.actor_key]
            self.worker_id = 'actors/temp={}'.format(round(
                self.temperature, 1))
        else:
            self.worker_id = 'actor-{}'.format(self.actor_key)

        if self.config.norm_obs:
            self.obs_min = np.array(self.config.obs_range[::2],
                                    dtype=np.float32)
            self.obs_max = np.array(self.config.obs_range[1::2],
                                    dtype=np.float32)
            self.obs_range = self.obs_max - self.obs_min

        if self.config.two_players:
            self.stats_to_log = defaultdict(int)

        self.experiences_collected = 0
        self.training_step = 0
        self.games_played = 0
        self.return_to_log = 0
        self.length_to_log = 0
        self.value_to_log = {'avg': 0, 'max': 0}

        if state is not None:
            self.load_state(state)

        Logger.__init__(self)
Exemple #10
0
def main():
    """
    main function for conducting Subtask A. Parameters are parsed with argparse.
    Language model should be suitable for German e.g.:
        'bert-base-multilingual-uncased', 
        'bert-base-multilingual-cased',              
        'bert-base-german-cased', 
        'bert-base-german-dbmdz-cased',
        'bert-base-german-dbmdz-uncased',
        'distilbert-base-german-cased',
        'distilbert-base-multilingual-cased'.
    """

    ############################ variable settings #################################
    parser = argparse.ArgumentParser(description='Run Subtask A or B of GermEval 2017 Using Pre-Trained Language Model.')
    parser.add_argument('--task', type=str, default='A', help="The task you want to conduct ('A' or 'B').")
    parser.add_argument('--seed', type=int, default=42, help='Random seed.')
    parser.add_argument('--lang_model', type=str, default='bert-base-german-dbmdz-uncased', help='The pre-trained language model.')
    parser.add_argument('--epochs', type=int, default=4, help='Number of epochs for training.')
    parser.add_argument('--lr', type=float, default=5e-5, help='The learning rate.')
    parser.add_argument('--max_len', type=int, default=256, help='The maximum sequence length of the input text.')
    parser.add_argument('--batch_size', type=int, default=32, help='Your train set batch size.')
    parser.add_argument('--df_path', type=str, default='./data/', help='The data directory.')    
    parser.add_argument('--train_data', type=str, default='train_df.tsv', help='The filename of the input train data.')
    parser.add_argument('--dev_data', type=str, default='dev_df.tsv', help='The filename of the input development data.')
    parser.add_argument('--test_data1', type=str, default='test_syn_df.tsv', help='The filename of the first input test data (synchronic).')
    parser.add_argument('--test_data2', type=str, default='test_dia_df.tsv', help='The filename of the second input test data (diachronic).')
    parser.add_argument('--output_path', type=str, default='./output/subtaskA/', help='The output directory of the model and predictions.')
    parser.add_argument("--train", default=True, action="store_true", help="Flag for training.")
    parser.add_argument("--save_prediction", default=True, action="store_true", help="Flag for saving predictions.")
    args = parser.parse_args()

    ################################################################################
    set_all_seeds(args.seed)
    device, n_gpu = initialize_device_settings(use_cuda=True)

    # Load data
    train_df = pd.read_csv(args.df_path + args.train_data, delimiter = '\t')
    dev_df = pd.read_csv(args.df_path + args.dev_data, delimiter = '\t')
    test_syn_df = pd.read_csv(args.df_path + args.test_data1, delimiter = '\t')
    test_syn_df = test_syn_df.dropna(subset = ["text"])    
    test_dia_df = pd.read_csv(args.df_path + args.test_data2, delimiter = '\t')
    
    # Create a tokenizer
    lower_case = False
    if args.lang_model[-7:] == "uncased":
        lower_case = True

    if args.lang_model[:4] == "bert":
        model_class = "BERT"
        tokenizer = BertTokenizer.from_pretrained(args.lang_model, do_lower_case=lower_case, max_length=args.max_len)
    
    if args.lang_model[:10] == "distilbert":
        model_class = "DistilBERT"
        tokenizer = DistilBertTokenizer.from_pretrained(args.lang_model, do_lower_case=lower_case, max_length=args.max_len)
    
    # get training features
    df = pd.concat([train_df, dev_df])
    sentences = df.text.values
    sentences_syn = test_syn_df.text.values    
    sentences_dia = test_dia_df.text.values
    
    if args.task == 'A':
        class_list = [False, True]
        df['relevance_label'] = df.apply(lambda x:  class_list.index(x['relevance']), axis = 1)
        labels = df.relevance_label.values
        test_syn_df['relevance_label'] = test_syn_df.apply(lambda x:  class_list.index(x['relevance']), axis = 1)
        labels_syn = test_syn_df.relevance_label.values
        test_dia_df['relevance_label'] = test_dia_df.apply(lambda x:  class_list.index(x['relevance']), axis = 1)
        labels_dia = test_dia_df.relevance_label.values

    if args.task == 'B':
        class_list = ["negative", "neutral", "positive"]
        df['sentiment_label'] = df.apply(lambda x:  class_list.index(x['sentiment']), axis = 1)
        labels = df.sentiment_label.values
        test_syn_df['sentiment_label'] = test_syn_df.apply(lambda x:  class_list.index(x['sentiment']), axis = 1)
        labels_syn = test_syn_df.sentiment_label.values
        test_dia_df['sentiment_label'] = test_dia_df.apply(lambda x:  class_list.index(x['sentiment']), axis = 1)
        labels_dia = test_dia_df.sentiment_label.values
    
    num_labels = len(set(labels))
    
    # Tokenize all of the sentences and map the tokens to their word IDs.
    input_ids = [tokenizer.encode(sent, add_special_tokens=True, truncation=True, 
                                  max_length=args.max_len) for sent in sentences]
    input_ids = pad_sequences(input_ids, maxlen=args.max_len, dtype="long", 
                          value=0.0, truncating="post", padding="post")
    # Create attention masks
    attention_masks = [[int(token_id > 0) for token_id in sent] for sent in input_ids]

    # synchronic test data
    input_ids_syn = [tokenizer.encode(sent, add_special_tokens=True, truncation=True) for sent in sentences_syn]
    input_ids_syn = pad_sequences(input_ids_syn, maxlen=args.max_len, dtype="long", 
                          value=0.0, truncating="post", padding="post")
    attention_masks_syn = [[int(token_id > 0) for token_id in sent] for sent in input_ids_syn]
    
    # diachronic test data
    input_ids_dia = [tokenizer.encode(sent, add_special_tokens=True, truncation=True) for sent in sentences_dia]
    input_ids_dia = pad_sequences(input_ids_dia, maxlen=args.max_len, dtype="long", 
                          value=0.0, truncating="post", padding="post")
    attention_masks_dia = [[int(token_id > 0) for token_id in sent] for sent in input_ids_dia]

    # split train, dev
    train_inputs, train_labels, dev_inputs, dev_labels, train_masks, dev_masks = split_train_dev(
        train_df, dev_df, attention_masks, input_ids, labels)

    # transform to torch tensor
    train_inputs = torch.tensor(train_inputs)
    dev_inputs = torch.tensor(dev_inputs)

    train_labels = torch.tensor(train_labels)
    dev_labels = torch.tensor(dev_labels)

    train_masks = torch.tensor(train_masks)
    dev_masks = torch.tensor(dev_masks)

    test_syn_inputs = torch.tensor(input_ids_syn)
    test_syn_labels = torch.tensor(labels_syn)
    test_syn_masks = torch.tensor(attention_masks_syn)

    test_dia_inputs = torch.tensor(input_ids_dia)
    test_dia_labels = torch.tensor(labels_dia)
    test_dia_masks = torch.tensor(attention_masks_dia)

    # Create the DataLoader
    train_dataloader = create_dataloader(train_inputs, train_masks, 
                                     train_labels, args.batch_size, train=True)

    dev_dataloader = create_dataloader(dev_inputs, dev_masks, 
                                   dev_labels, args.batch_size, train=False)

    test_syn_dataloader = create_dataloader(test_syn_inputs, test_syn_masks, 
                                        test_syn_labels, args.batch_size, 
                                        train=False)

    test_dia_dataloader = create_dataloader(test_dia_inputs, test_dia_masks, 
                                        test_dia_labels, args.batch_size, 
                                        train=False)

    # Create model
    if args.train:
        if model_class == "BERT":
            config = BertConfig.from_pretrained(args.lang_model, num_labels=num_labels)   
            config.hidden_dropout_prob = 0.1
            model = BertForSequenceClassification.from_pretrained(
                args.lang_model,
                num_labels = num_labels,
                output_attentions = False,
                output_hidden_states = False
            )

        if model_class == "DistilBERT":
            config = DistilBertConfig.from_pretrained(args.lang_model, num_labels=num_labels)   
            config.hidden_dropout_prob = 0.1 
            model = DistilBertForSequenceClassification.from_pretrained(
                args.lang_model,
                num_labels = num_labels,
                output_attentions = False,
                output_hidden_states = False
            )
        model.cuda()


        # Create an optimizer
        param_optimizer = list(model.named_parameters())
        no_decay = ['bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
                'weight_decay_rate': 0.01},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
                'weight_decay_rate': 0.0}
        ]
        optimizer = AdamW(
            optimizer_grouped_parameters,
            lr=args.lr,
            eps=1e-8
        )

        # Total number of training steps = number of batches * number of epochs
        total_steps = len(train_dataloader) * args.epochs
        # Create the learning rate scheduler
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=0,
            num_training_steps=total_steps
        )
    
        # train model
        # Main Loop
        print("=================== Train ================")
        print("##### Language Model:", args.lang_model, ",", "learning rate:", args.lr)
        print()

        track_time = time.time()
        # trange is a tqdm wrapper around the normal python range
        for epoch in trange(args.epochs, desc="Epoch"):
            print("Epoch: %4i"%epoch, dt.datetime.now())

            model, optimizer, scheduler, tr_loss = train(
                train_dataloader, 
                model=model, 
                device=device, 
                optimizer=optimizer, 
                scheduler=scheduler
            )
            # EVALUATION: TRAIN SET
            true_bools_train, pred_bools_train, f1_train = eval(
                train_dataloader, model=model, device=device)
            print("TRAIN: micro F1 %.4f"%(f1_train)) # here: same as accuracy
            print(confusion_matrix(true_bools_train,pred_bools_train))
            
            # EVALUATION: DEV SET
            true_bools_dev, pred_bools_dev, f1_dev = eval(
                dev_dataloader, model=model, device=device)
            print("EVAL: micro F1 %.4f"%(f1_dev))
            print(confusion_matrix(true_bools_dev,pred_bools_dev))
        

        print("  Training and validation took in total: {:}".format(format_time(time.time()-track_time)))

        # EVALUATION: TEST SYN SET
        true_bools_syn, pred_bools_syn, f1_test_syn = eval(
            test_syn_dataloader, model=model, device=device)
        print("TEST SYN: micro F1 %.4f"%(f1_test_syn))
        print(confusion_matrix(true_bools_syn,pred_bools_syn))

        # EVALUATION: TEST DIA SET
        true_bools_dia, pred_bools_dia, f1_test_dia = eval(
            test_dia_dataloader, model=model, device=device)
        print("TEST DIA: micro F1 %.4f"%(f1_test_dia))
        print(confusion_matrix(true_bools_dia, pred_bools_dia))

        if args.save_prediction:
            if args.task == 'A':
                test_syn_df["relevance_pred"] = pred_bools_syn
                test_dia_df["relevance_pred"] = pred_bools_dia
            if args.task == 'B':                
                test_syn_df["sentiment_pred"] = pred_bools_syn
                test_dia_df["sentiment_pred"] = pred_bools_dia
            
            test_syn_df.to_csv(args.output_path+args.lang_model+"_eval_test_syn.tsv", sep="\t", index = False, 
                header = True, encoding = "utf-8-sig")
            test_dia_df.to_csv(args.output_path+args.lang_model+"_eval_test_dia.tsv", sep="\t", index = False, 
                header = True, encoding = "utf-8-sig")
Exemple #11
0
def main():
    """
    main function for conducting Subtask C. Parameters are parsed with argparse.
    Language model should be suitable for German e.g.:
        'bert-base-multilingual-uncased', 
        'bert-base-multilingual-cased',              
        'bert-base-german-cased', 
        'bert-base-german-dbmdz-cased',
        'bert-base-german-dbmdz-uncased',
        'distilbert-base-german-cased',
        'distilbert-base-multilingual-cased'.
    """

    ############################ variable settings #################################
    parser = argparse.ArgumentParser(
        description=
        'Run Subtask C of GermEval 2017 Using Pre-Trained Language Model.')
    parser.add_argument('--seed', type=int, default=42, help='Random seed.')
    parser.add_argument('--lang_model',
                        type=str,
                        default='bert-base-german-dbmdz-uncased',
                        help='The pre-trained language model.')
    parser.add_argument('--epochs',
                        type=int,
                        default=4,
                        help='Number of epochs for training.')
    parser.add_argument('--lr',
                        type=float,
                        default=5e-5,
                        help='The learning rate.')
    parser.add_argument('--max_len',
                        type=int,
                        default=256,
                        help='The maximum sequence length of the input text.')
    parser.add_argument('--batch_size',
                        type=int,
                        default=32,
                        help='Your train set batch size.')
    parser.add_argument('--df_path',
                        type=str,
                        default='./data/',
                        help='The data directory.')
    parser.add_argument('--train_data',
                        type=str,
                        default='train_df_cat.tsv',
                        help='The filename of the input train data.')
    parser.add_argument('--dev_data',
                        type=str,
                        default='dev_df_cat.tsv',
                        help='The filename of the input development data.')
    parser.add_argument(
        '--test_data1',
        type=str,
        default='test_syn_df_cat.tsv',
        help='The filename of the first input test data (synchronic).')
    parser.add_argument(
        '--test_data2',
        type=str,
        default='test_dia_df_cat.tsv',
        help='The filename of the second input test data (diachronic).')
    parser.add_argument(
        '--output_path',
        type=str,
        default='./output/subtaskC/',
        help='The output directory of the model and predictions.')
    parser.add_argument("--train",
                        default=True,
                        action="store_true",
                        help="Flag for training.")
    parser.add_argument("--save_prediction",
                        default=False,
                        action="store_true",
                        help="Flag for saving predictions.")
    parser.add_argument("--save_cr",
                        default=False,
                        action="store_true",
                        help="Flag for saving confusion matrix.")
    parser.add_argument("--exclude_general",
                        default=False,
                        action="store_true",
                        help="Flag for excluding category Allgemein.")
    parser.add_argument("--exclude_neutral",
                        default=False,
                        action="store_true",
                        help="Flag for excluding neutral polarity.")
    parser.add_argument("--exclude_general_neutral",
                        default=False,
                        action="store_true",
                        help="Flag for excluding category Allgemein:neutral.")
    args = parser.parse_args()
    ################################################################################
    set_all_seeds(args.seed)
    device, n_gpu = initialize_device_settings(use_cuda=True)

    # Load data
    train_df = pd.read_csv(args.df_path + args.train_data, delimiter='\t')
    dev_df = pd.read_csv(args.df_path + args.dev_data, delimiter='\t')
    test_syn_df = pd.read_csv(args.df_path + args.test_data1, delimiter='\t')
    test_dia_df = pd.read_csv(args.df_path + args.test_data2, delimiter='\t')

    # Create a tokenizer
    lower_case = False
    if args.lang_model[-7:] == "uncased":
        lower_case = True

    if args.lang_model[:4] == "bert":
        model_class = "BERT"
        tokenizer = BertTokenizer.from_pretrained(args.lang_model,
                                                  do_lower_case=lower_case,
                                                  max_length=args.max_len)

    if args.lang_model[:10] == "distilbert":
        model_class = "DistilBERT"
        tokenizer = DistilBertTokenizer.from_pretrained(
            args.lang_model, do_lower_case=lower_case, max_length=args.max_len)

    # get training features
    cats = train_df.columns[5:]
    end = "full"
    # exclude categories if required
    if (args.exclude_general):
        cats = [i for i in list(cats) if "Allgemein" not in i]
        end = "excl_gen"
    if (args.exclude_neutral):
        cats = [i for i in list(cats) if "neutral" not in i]
        end = "excl_neu"
    if (args.exclude_general_neutral):
        cats = [i for i in list(cats) if "Allgemein:neutral" not in i]
        end = "excl_genneu"

    num_labels = len(list(cats))

    # create one hot labels
    train_df['one_hot_labels'] = list(train_df[list(cats)].values)
    dev_df['one_hot_labels'] = list(dev_df[list(cats)].values)
    test_syn_df['one_hot_labels'] = list(test_syn_df[list(cats)].values)
    test_dia_df['one_hot_labels'] = list(test_dia_df[list(cats)].values)

    # retrieve sentences and labels
    df = pd.concat([train_df, dev_df])
    sentences = df.text.values
    labels = list(df.one_hot_labels.values)

    sentences_syn = test_syn_df.text.values
    labels_syn = list(test_syn_df.one_hot_labels.values)

    sentences_dia = test_dia_df.text.values
    labels_dia = list(test_dia_df.one_hot_labels.values)

    print("number of categories:", len(list(cats)))

    # Tokenize all of the sentences and map the tokens to their word IDs.
    input_ids = [
        tokenizer.encode(sent,
                         add_special_tokens=True,
                         truncation=True,
                         max_length=args.max_len) for sent in sentences
    ]
    input_ids = pad_sequences(input_ids,
                              maxlen=args.max_len,
                              dtype="long",
                              value=0.0,
                              truncating="post",
                              padding="post")
    # Create attention masks
    attention_masks = [[int(token_id > 0) for token_id in sent]
                       for sent in input_ids]

    # synchronic test data
    input_ids_syn = [
        tokenizer.encode(sent, add_special_tokens=True, truncation=True)
        for sent in sentences_syn
    ]
    input_ids_syn = pad_sequences(input_ids_syn,
                                  maxlen=args.max_len,
                                  dtype="long",
                                  value=0.0,
                                  truncating="post",
                                  padding="post")
    attention_masks_syn = [[int(token_id > 0) for token_id in sent]
                           for sent in input_ids_syn]

    # diachronic test data
    input_ids_dia = [
        tokenizer.encode(sent, add_special_tokens=True, truncation=True)
        for sent in sentences_dia
    ]
    input_ids_dia = pad_sequences(input_ids_dia,
                                  maxlen=args.max_len,
                                  dtype="long",
                                  value=0.0,
                                  truncating="post",
                                  padding="post")
    attention_masks_dia = [[int(token_id > 0) for token_id in sent]
                           for sent in input_ids_dia]

    # split train, dev
    train_inputs, train_labels, dev_inputs, dev_labels, train_masks, dev_masks = split_train_dev(
        train_df, dev_df, attention_masks, input_ids, labels)

    # transform to torch tensor
    train_inputs = torch.tensor(train_inputs)
    dev_inputs = torch.tensor(dev_inputs)

    train_labels = torch.tensor(train_labels)
    dev_labels = torch.tensor(dev_labels)

    train_masks = torch.tensor(train_masks)
    dev_masks = torch.tensor(dev_masks)

    test_syn_inputs = torch.tensor(input_ids_syn)
    test_syn_masks = torch.tensor(attention_masks_syn)
    test_syn_labels = torch.tensor(labels_syn)

    test_dia_inputs = torch.tensor(input_ids_dia)
    test_dia_masks = torch.tensor(attention_masks_dia)
    test_dia_labels = torch.tensor(labels_dia)

    # Create the DataLoader
    train_dataloader = create_dataloader(train_inputs,
                                         train_masks,
                                         train_labels,
                                         args.batch_size,
                                         train=True)

    dev_dataloader = create_dataloader(dev_inputs,
                                       dev_masks,
                                       dev_labels,
                                       args.batch_size,
                                       train=False)

    test_syn_dataloader = create_dataloader(test_syn_inputs,
                                            test_syn_masks,
                                            test_syn_labels,
                                            args.batch_size,
                                            train=False)

    test_dia_dataloader = create_dataloader(test_dia_inputs,
                                            test_dia_masks,
                                            test_dia_labels,
                                            args.batch_size,
                                            train=False)

    # Create model
    if args.train:
        if model_class == "BERT":
            config = BertConfig.from_pretrained(args.lang_model,
                                                num_labels=num_labels)
            config.hidden_dropout_prob = 0.1
            model = BertForSequenceClassification.from_pretrained(
                args.lang_model,
                num_labels=num_labels,
                output_attentions=False,
                output_hidden_states=False)

        if model_class == "DistilBERT":
            config = DistilBertConfig.from_pretrained(args.lang_model,
                                                      num_labels=num_labels)
            config.hidden_dropout_prob = 0.1
            model = DistilBertForSequenceClassification.from_pretrained(
                args.lang_model,
                num_labels=num_labels,
                output_attentions=False,
                output_hidden_states=False)
        model.cuda()

        # Create an optimizer
        param_optimizer = list(model.named_parameters())
        no_decay = ['bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay_rate':
            0.01
        }, {
            'params':
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay_rate':
            0.0
        }]
        optimizer = AdamW(optimizer_grouped_parameters, lr=args.lr, eps=1e-8)
        # Total number of training steps = number of batches * number of epochs
        total_steps = len(train_dataloader) * args.epochs
        # Create the learning rate scheduler
        scheduler = get_linear_schedule_with_warmup(
            optimizer, num_warmup_steps=0, num_training_steps=total_steps)

        # train model
        # Main Loop
        print("=================== Train ================")
        print("##### Language Model:", args.lang_model, ",", "learning rate:",
              args.lr)
        print()

        track_time = time.time()
        # trange is a tqdm wrapper around the normal python range
        for epoch in trange(args.epochs, desc="Epoch"):
            print("Epoch: %4i" % epoch, dt.datetime.now())

            model, optimizer, scheduler, tr_loss = train_multilabel(
                train_dataloader=train_dataloader,
                model=model,
                device=device,
                optimizer=optimizer,
                scheduler=scheduler,
                num_labels=num_labels)
            # EVALUATION: TRAIN SET
            pred_bools_train, true_bools_train, f1_train = eval_multilabel(
                train_dataloader, model=model, device=device)
            print("TRAIN: micro F1 %.3f" % (f1_train))

            # EVALUATION: DEV SET
            pred_bools_dev, true_bools_dev, f1_dev = eval_multilabel(
                dev_dataloader, model=model, device=device)
            print("EVAL: micro F1 %.3f" % (f1_dev))

        print("  Training and validation took in total: {:}".format(
            format_time(time.time() - track_time)))

        # EVALUATION: TEST SYN SET
        pred_bools_syn, true_bools_syn, f1_test_syn = eval_multilabel(
            test_syn_dataloader, model=model, device=device)
        print("TEST SYN: micro F1 %.4f" % (f1_test_syn))

        # classification report
        clf_report_syn = classification_report(true_bools_syn,
                                               pred_bools_syn,
                                               target_names=cats,
                                               digits=3)
        print(clf_report_syn)

        # EVALUATION: TEST DIA SET
        pred_bools_dia, true_bools_dia, f1_test_dia = eval_multilabel(
            test_dia_dataloader, model=model, device=device)
        print("TEST DIA: micro F1 %.4f" % (f1_test_dia))

        # classification report
        clf_report_dia = classification_report(true_bools_dia,
                                               pred_bools_dia,
                                               target_names=cats,
                                               digits=3)
        print(clf_report_dia)

        if args.save_cr:
            pickle.dump(
                clf_report_syn,
                open(
                    args.output_path + 'clf_report_' + args.lang_model +
                    '_test_syn_' + str(num_labels) + end + '.txt', 'wb'))
            pickle.dump(
                clf_report_dia,
                open(
                    args.output_path + 'clf_report_' + args.lang_model +
                    '_test_dia_' + str(num_labels) + end + '.txt', 'wb'))

        if args.save_prediction:
            test_syn_df["category_pred"] = pred_bools_syn
            test_dia_df["category_pred"] = pred_bools_dia
            test_syn_df.category_pred.to_csv(args.output_path +
                                             args.lang_model + '_test_syn_' +
                                             str(num_labels) + end + ".tsv",
                                             sep="\t",
                                             index=False,
                                             header=True,
                                             encoding="utf-8-sig")
            test_dia_df.category_pred.to_csv(args.output_path +
                                             args.lang_model + '_test_dia_' +
                                             str(num_labels) + end + ".tsv",
                                             sep="\t",
                                             index=False,
                                             header=True,
                                             encoding="utf-8-sig")
Exemple #12
0
    args = parser.parse_args()
    
    if args.dev:
        args.output = 'devs/'

    args.output = os.path.join(args.output, args.expr_name)
    utils.Config(vars(args)).dump(os.path.join(args.output, 'configs.txt'))
    return args


if __name__ == '__main__':
    args = main()

    torch.backends.cudnn.benckmark = True
    utils.set_all_seeds(args.seed)

    # Init data from SNLI or other data source
    data, itos = utils.obtain_data(args.data, args.vocab) 

    # Build Vocabulary
    print("Loading vocabulary and word embeddings...")
    vocab = Vocabulary(args.labels, itos)
    vocab.set_word_embedding(args.embed, args.vocab)
    numpy_data = vocab.process_data(data)
    print("Embedding shape: ")
    print(vocab.embeddings.shape)

    # Build network
    net = s2l_Net(h_size=args.h_size, 
                    v_size=vocab.embeddings.shape[0], 
def parse_args() -> Arguments:
    argparser = ArgumentParser()
    argparser.add_argument('--dataset',
                           '-d',
                           choices=['omniglot', 'miniimagenet', 'cub'],
                           dest='dataset',
                           help='Specify train dataset')
    argparser.add_argument(
        '--classes',
        '--num-classes',
        '-c',
        default=5,
        type=int,
        dest='num_classes',
        help=
        'Number of classes for each task in meta learning i.e. the N in N-way with K shots'
    )
    argparser.add_argument(
        '--support-samples',
        '-s',
        default=1,
        type=int,
        dest='support_samples',
        help='Number of training samples for each class in meta learning '
        'i.e. the K in N-way with K shots')
    argparser.add_argument(
        '--query-samples',
        '-q',
        default=5,
        type=int,
        dest='query_samples',
        help='Number of test samples for each class in meta learning')
    argparser.add_argument(
        '--distance',
        '--dst',
        default='euclidean',
        type=str,
        choices=['euclidean', 'cosine'],
        help='Distance function to use inside PrototypicalNetwork')
    argparser.add_argument(
        '--epochs',
        '-e',
        default=500_000,
        help='Number of training epochs. Set by default to a very high value '
        'because paper specify that train continues until validation loss '
        'continues to decrease.')
    argparser.add_argument('--epoch-steps',
                           default=200,
                           type=int,
                           dest='epoch_steps')
    argparser.add_argument('--seed', default=13, type=int)
    argparser.add_argument('--device', type=str, default='cuda')
    argparser.add_argument('--batch-size', type=int, default=32)
    argparser.add_argument('--eval-steps',
                           type=int,
                           default=None,
                           help='Number of evaluation steps. '
                           'By default is set to the number '
                           'of steps to reach 600 episodes '
                           'considering batch size. This '
                           'is done to match paper results tables')
    argparser.add_argument('--run-path',
                           type=Path,
                           default=None,
                           help='Set this to resume a checkpoint '
                           'instead of start a new training.',
                           dest='run_path')
    argparser.add_argument(
        '--metadata-features',
        type=int,
        default=None,
        help='Number of metadata features. Must set only for zero shot learning '
        'i.e. when --support-samples=0',
        dest='metadata_features')
    argparser.add_argument(
        '--image-features',
        type=int,
        default=None,
        help=
        'Number of image encoded features. Must set only for zero shot learning '
        'i.e. when --support-samples=0',
        dest='image_features')
    argparser.add_argument('--lr',
                           default=1e-4,
                           type=float,
                           help='lr for optimizer(adam)',
                           dest='lr')
    argparser.add_argument('--weight-decay',
                           default=0.0,
                           type=float,
                           dest='weight_decay')
    argparser.add_argument('--lr-decay',
                           default=True,
                           type=eval,
                           choices=[True, False],
                           dest='use_lr_decay',
                           help='Set true to use multiplicative lr decay '
                           '(set also --lr-decay-gamma and --lr-decay-steps)')
    argparser.add_argument('--lr-decay-gamma',
                           default=None,
                           type=float,
                           dest='lr_decay_gamma',
                           help='Multiplicative factor to apply to lr decay')
    argparser.add_argument('--lr-decay-steps',
                           default=None,
                           type=int,
                           dest='lr_decay_steps',
                           help='Number of steps to apply lr decay')
    argparser.add_argument('--early-stop',
                           default=True,
                           type=eval,
                           choices=[True, False],
                           dest='use_early_stop',
                           help='Enable early stop based on validation loss')
    argparser.add_argument('--early-stop-patience',
                           '--es-patience',
                           dest='early_stop_patience',
                           default=3,
                           type=int)
    argparser.add_argument('--early-stop-delta',
                           dest='early_stop_delta',
                           default=0.0,
                           type=float)
    argparser.add_argument('--early-stop-metric',
                           default='accuracy',
                           type=str,
                           choices=['accuracy', 'loss'],
                           dest='early_stop_metric')
    args = argparser.parse_args(namespace=Arguments())

    if args.run_path is not None:
        run_path = Path(args.run_path)
        with open(run_path / 'config.yaml') as f:
            config = yaml.load(f)
        print('loaded config from', repr(run_path))
        args = Arguments()
        for k in config:
            setattr(args, k, config[k])  # args.k = config[k]

    if args.seed is None:
        args.seed = randint(0, 1_000_000)
        print('set seed to %s' % args.seed)

    if args.eval_steps is None:
        from math import ceil
        args.eval_steps = int(ceil(600 / args.batch_size))
        print('set eval_steps to %s' % args.eval_steps)

    if args.device.startswith('cuda'):
        if not torch.cuda.is_available():
            print('Cuda not available, fall back to cpu')
            args.device = 'cpu'
    set_all_seeds(args.seed)
    return args
parser.add_argument('-dir', '--data_dir', type=str, default='data/VOCdevkit/VOC2010/')
parser.add_argument('-tr', '--train_split', type=str, default='train')
parser.add_argument('-val', '--val_split', type=str, default='val')
parser.add_argument('-cf', '--class2ind_file', type=str, default='object_class2ind')
parser.add_argument('-e', '--n_epochs', type=int, default=30)
parser.add_argument('-lr', '--learning_rate', type=float, default=1e-3)
parser.add_argument('-bs', '--batch_size', type=int, default=1)
parser.add_argument('-wd', '--weight_decay', type=float, default=1e-6)
parser.add_argument('--use_objects', dest='use_objects', action='store_true')
parser.add_argument('--use_parts', dest='use_parts', action='store_true')
parser.add_argument('-nw', '--num_workers', type=int, default=0)
parser.add_argument('-ms', '--max_samples', type=int, default=-1)
args = parser.parse_args()

device = torch.device('cuda:%d' % args.device if torch.cuda.is_available() else 'cpu')
set_all_seeds(123)

########## Parameters ##########
DATA_DIR = args.data_dir
TRAIN_SPLIT = args.train_split
VAL_SPLIT = args.val_split
CLASS2IND_FILE = args.class2ind_file
N_EPOCHS = args.n_epochs
USE_OBJECTS = bool(args.use_objects)
USE_PARTS = bool(args.use_parts)
NUM_WORKERS = args.num_workers
MAX_SAMPLES = args.max_samples if args.max_samples > 0 else None

if USE_OBJECTS and USE_PARTS:
    print('[WARNING]: If you are doing Object and Part Detection, make sure you are using the class2ind file that has both classes')
Exemple #15
0
        model_name.append('ddqn')

    if args.suffix:
        model_name.append(args.suffix)

    model_name = '_'.join(model_name)
    args.output = os.path.join(args.output, game_name, model_name)
    utils.Config(vars(args)).dump(os.path.join(args.output, 'configs.txt'))
    return args


if __name__ == '__main__':
    args = main()

    torch.backends.cudnn.benckmark = True
    utils.set_all_seeds(args.seed)

    train_env = Environment(
        args.rom,
        args.frame_skip,
        args.num_frames,
        args.frame_size,
        args.no_op_start + 1,
        utils.large_randint(),
        True)
    eval_env = Environment(
        args.rom,
        args.frame_skip,
        args.num_frames,
        args.frame_size,
        args.no_op_start + 1,