def __init__(self, config, storage, replay_buffer, state=None): set_all_seeds(config.seed) self.run_tag = config.run_tag self.group_tag = config.group_tag self.worker_id = 'learner' self.replay_buffer = replay_buffer self.storage = storage self.config = deepcopy(config) if "learner" in self.config.use_gpu_for: if torch.cuda.is_available(): if self.config.learner_gpu_device_id is not None: device_id = self.config.learner_gpu_device_id self.device = torch.device("cuda:{}".format(device_id)) else: self.device = torch.device("cuda") else: raise RuntimeError( "GPU was requested but torch.cuda.is_available() is False." ) else: self.device = torch.device("cpu") self.network = get_network(config, self.device) self.network.to(self.device) self.network.train() self.optimizer = get_optimizer(config, self.network.parameters()) self.lr_scheduler = get_lr_scheduler(config, self.optimizer) self.scalar_loss_fn, self.policy_loss_fn = get_loss_functions(config) self.training_step = 0 self.losses_to_log = {'reward': 0., 'value': 0., 'policy': 0.} self.throughput = { 'total_frames': 0, 'total_games': 0, 'training_step': 0, 'time': { 'ups': 0, 'fps': 0 } } if self.config.norm_obs: self.obs_min = np.array(self.config.obs_range[::2], dtype=np.float32) self.obs_max = np.array(self.config.obs_range[1::2], dtype=np.float32) self.obs_range = self.obs_max - self.obs_min if state is not None: self.load_state(state) Logger.__init__(self)
def main(args: Namespace) -> None: """Run the main program. Arguments: args: The object containing the commandline arguments """ config = load_config(args.config) # Automatically implements frame skipping internally env = gym.make("Pong-v4", frameskip=config.frame_skips) if config.seed is not None: set_all_seeds(env, config.seed) model = get_model(env.action_space.n) fixed = get_model(env.action_space.n) replay = ReplayBuffer[TransitionType](limit=config.replay_size) # Save each run into a directory by its timestamp. # Remove microseconds and convert to ISO 8601 YYYY-MM-DDThh:mm:ss format. time_stamp = datetime.now().replace(microsecond=0).isoformat() log_dir = args.log_dir / time_stamp for directory in log_dir, args.save_dir: if not directory.exists(): directory.mkdir(parents=True) with open(directory / CONFIG_NAME, "w") as conf: toml.dump(vars(args), conf) optimizer = tf.keras.optimizers.Adam(config.lr) writer = tf.summary.create_file_writer(log_dir) trainer = DQNTrainer( env, model, fixed, replay, optimizer, writer, config=config, log_steps=args.log_steps, video_eps=args.video_eps, log_dir=log_dir, save_dir=args.save_dir, ) if args.resume: start = trainer.load_info() else: fixed.set_weights(model.get_weights()) start = 0 trainer.train(args.save_eps, start=start)
def run(evaluator, seed=None): environment = get_environment(evaluator.config) if seed is not None: environment.seed(seed) set_all_seeds(seed) with torch.inference_mode(): game = evaluator.play_game(environment) game.history.observations = [] game.environment = None return game
def init_environment(env_id, path_dir_output="{env_id}-results", seed=None): # Format the path of the output directory path_dir_output = path_dir_output.format(env_id=env_id) envx = gym.make(env_id) env = envx env = wrappers.Monitor(envx, directory=path_dir_output, force=True, video_callable=False) if seed is not None: set_all_seeds(env, seed) return env, envx
def main(args: Namespace) -> None: """Run the main program. Arguments: args: The object containing the commandline arguments """ config = load_config(args.config) env = gym.make("Pong-v4", frameskip=config.frame_skips) if config.seed is not None: set_all_seeds(env, config.seed) model = get_model(env.action_space.n) model.load_weights(args.load_dir / DQNTrainer.MODEL_NAME) print("Loaded model") if not args.log_dir.exists(): args.log_dir.mkdir(parents=True) test(env, model, log_dir=args.log_dir)
# # Get All the values # # Trajectories dimension [bs, sequence, (obs, a, r, n_obs, done)] # obss, acts, rewards, next_obss, dones = np.split(trajectories, 5, axis=2) # # # # input_obs, initial_shape = utils.get_obs_from_traj(trajectories) # values = sess.run(self.val_pred, { # self.state: input_obs # }).reshape(initial_shape) # # ex_rewards = utils.get_expected_rewards(self.gamma, trajectories) # # # # advantages = ex_rewards - values rng, seed = utils.set_all_seeds() env = gym.make('CartPole-v0') env.seed(seed) act_space = env.action_space agent = PPO() N = 2 K = 2 batch_size = 10 max_iter = 1; with tf.Session() as sess: sess.run(tf.global_variables_initializer()) while t < max_iter:
def main(): """ main function for conducting Subtask D. Parameters are parsed with argparse. Language model should be one of the following: Language model should be suitable for German e.g.: 'bert-base-multilingual-uncased', 'bert-base-multilingual-cased', 'bert-base-german-cased', 'bert-base-german-dbmdz-cased', 'bert-base-german-dbmdz-uncased', 'distilbert-base-german-cased', 'distilbert-base-multilingual-cased'. """ parser = argparse.ArgumentParser(description='Run Subtask D of GermEval 2017 Using Pre-Trained Language Model.') parser.add_argument('--seed', type=int, default=42, help='Random seed.') parser.add_argument('--lang_model', type=str, default='bert-base-german-dbmdz-uncased', help='The pre-trained language model.') parser.add_argument('--epochs', type=int, default=4, help='Number of epochs for training.') parser.add_argument('--lr', type=float, default=5e-5, help='The learning rate.') parser.add_argument('--max_len', type=int, default=256, help='The maximum sequence length of the input text.') parser.add_argument('--batch_size', type=int, default=32, help='Your train set batch size.') parser.add_argument('--df_path', type=str, default='./data/', help='The data directory.') parser.add_argument('--train_data', type=str, default='train_df_opinion.tsv', help='The filename of the input train data.') parser.add_argument('--dev_data', type=str, default='dev_df_opinion.tsv', help='The filename of the input development data.') parser.add_argument('--test_data1', type=str, default='test_syn_df_opinion.tsv', help='The filename of the first input test data (synchronic).') parser.add_argument('--test_data2', type=str, default='test_dia_df_opinion.tsv', help='The filename of the second input test data (diachronic).') parser.add_argument('--output_path', type=str, default='./output/subtaskD/', help='The output directory of the model and predictions.') parser.add_argument("--train", default=True, action="store_true", help="Flag for training.") parser.add_argument("--use_crf", default=False, action="store_true", help="Flag for CRF usage.") parser.add_argument("--save_cr", default=False, action="store_true", help="Flag for saving classification report.") args = parser.parse_args() ############################################################################# # Settings set_all_seeds(args.seed) device, n_gpu = initialize_device_settings(use_cuda=True) lm = args.lang_model if args.use_crf: lm = args.lang_model+"_crf" ############################################################################# # Load and prepare data by adding BIO tags train_df = bio_tagging_df(pd.read_csv(args.df_path + args.train_data, delimiter = '\t')) dev_df = bio_tagging_df(pd.read_csv(args.df_path + args.dev_data, delimiter = '\t')) test_syn_df = bio_tagging_df(pd.read_csv(args.df_path + args.test_data1, delimiter = '\t')) test_dia_df = bio_tagging_df(pd.read_csv(args.df_path + args.test_data2, delimiter = '\t')) # 1. Create a tokenizer lower_case = False if args.lang_model[-7:] == "uncased": lower_case = True if args.lang_model[:4] == "bert": model_class = "BERT" tokenizer = BertTokenizer.from_pretrained(args.lang_model, do_lower_case = lower_case, max_length=args.max_len) if args.lang_model[:10] == "distilbert": model_class = "DistilBERT" tokenizer = DistilBertTokenizer.from_pretrained(args.lang_model, do_lower_case = lower_case, max_length=args.max_len) # get training features df = pd.concat([train_df, dev_df]) sentences = df.text.values labels = df.bio_tags.values tokenized_texts, labels = get_sentences_biotags(tokenizer, sentences, labels, args.max_len) sentences_syn = test_syn_df.text.values labels_syn = test_syn_df.bio_tags tokenized_texts_syn, labels_syn = get_sentences_biotags(tokenizer, sentences_syn, labels_syn, args.max_len) sentences_dia = test_dia_df.text.values labels_dia = test_dia_df.bio_tags tokenized_texts_dia, labels_dia = get_sentences_biotags(tokenizer, sentences_dia, labels_dia, args.max_len) # get tag values and dictionary tag_values, tag2idx, entities = get_tags_list(args.df_path) # pad input_ids and tags input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts], maxlen = args.max_len, value=0.0, padding="post", dtype="long", truncating="post") tags = pad_sequences([[tag2idx.get(l) for l in lab] for lab in labels], maxlen=args.max_len, value=tag2idx["PAD"], padding="post", dtype="long", truncating="post") input_ids_syn = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts_syn], maxlen = args.max_len, value=0.0, padding="post", dtype="long", truncating="post") tags_syn = pad_sequences([[tag2idx.get(l) for l in lab] for lab in labels_syn], maxlen=args.max_len, value=tag2idx["PAD"], padding="post", dtype="long", truncating="post") input_ids_dia = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts_dia], maxlen = args.max_len, value=0.0, padding="post", dtype="long", truncating="post") tags_dia = pad_sequences([[tag2idx.get(l) for l in lab] for lab in labels_dia], maxlen=args.max_len, value=tag2idx["PAD"], padding="post", dtype="long", truncating="post") # create attention masks attention_masks= [[int(token_id > 0) for token_id in sent] for sent in input_ids] attention_masks_syn = [[int(token_id > 0) for token_id in sent] for sent in input_ids_syn] attention_masks_dia = [[int(token_id > 0) for token_id in sent] for sent in input_ids_dia] # split train, dev train_inputs, train_labels, dev_inputs, dev_labels, train_masks, dev_masks = split_train_dev( train_df, dev_df, attention_masks, input_ids, tags) # transform to torch tensor train_inputs = torch.tensor(train_inputs, dtype = torch.long) dev_inputs = torch.tensor(dev_inputs, dtype = torch.long) train_labels = torch.tensor(train_labels, dtype = torch.long) dev_labels = torch.tensor(dev_labels, dtype = torch.long) train_masks = torch.tensor(train_masks, dtype = torch.uint8) dev_masks = torch.tensor(dev_masks, dtype = torch.uint8) test_syn_inputs = torch.tensor(input_ids_syn, dtype = torch.long) test_syn_labels = torch.tensor(tags_syn, dtype = torch.long) test_syn_masks = torch.tensor(attention_masks_syn, dtype = torch.uint8) test_dia_inputs = torch.tensor(input_ids_dia, dtype = torch.long) test_dia_labels = torch.tensor(tags_dia, dtype = torch.long) test_dia_masks = torch.tensor(attention_masks_dia, dtype = torch.uint8) # create DataLoader train_dataloader = create_dataloader(train_inputs, train_masks, train_labels, args.batch_size, train = True) dev_dataloader = create_dataloader(dev_inputs, dev_masks, dev_labels, args.batch_size, train = False) test_syn_dataloader = create_dataloader(test_syn_inputs, test_syn_masks, test_syn_labels, args.batch_size, train = False) test_dia_dataloader = create_dataloader(test_dia_inputs, test_dia_masks, test_dia_labels, args.batch_size, train = False) ############################################################################# # Training if args.train: # Load Config if model_class=="BERT": config = BertConfig.from_pretrained(args.lang_model, num_labels=len(tag2idx)) config.hidden_dropout_prob = 0.1 # dropout probability for all fully connected layers # in the embeddings, encoder, and pooler; default = 0.1 model = TokenBERT( model_name=args.lang_model, num_labels=len(tag2idx), use_crf=args.use_crf) if model_class=="DistilBERT": config = DistilBertConfig.from_pretrained(args.lang_model, num_labels=len(tag2idx)) config.hidden_dropout_prob = 0.1 model = TokenDistilBERT( model_name=args.lang_model, num_labels=len(tag2idx), use_crf=args.use_crf) model.cuda() # Create an optimizer param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.weight', 'gamma', 'beta'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.01}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.0} ] optimizer = AdamW( optimizer_grouped_parameters, lr=args.lr, eps=1e-8 ) # Total number of training steps = number of batches * number of epochs total_steps = len(train_dataloader) * args.epochs # Create the learning rate scheduler scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=total_steps ) # Main Loop print("=================== Train ================") print("##### Language Model:", args.lang_model, ",", "use CRF:", args.use_crf, ",", "learning rate:", args.lr, ",", "DROPOUT:", config.hidden_dropout_prob) print() track_time = time.time() for epoch in trange(args.epochs, desc="Epoch"): print("Epoch: %4i"%epoch, dt.datetime.now()) # TRAINING model, optimizer, scheduler, tr_loss = training( train_dataloader, model=model, device=device, optimizer=optimizer, scheduler=scheduler ) # EVALUATION: TRAIN SET y_true_train, y_pred_train, f1s_train, f1s_overlap_train = evaluation( train_dataloader, model=model, device=device, tag_values=tag_values) print("TRAIN: F1 Exact %.3f | F1 Overlap %.3f"%(f1s_train, f1s_overlap_train)) # EVALUATION: DEV SET y_true_dev, y_pred_dev, f1s_dev, f1s_overlap_dev = evaluation( dev_dataloader, model=model, device=device, tag_values=tag_values) print("EVAL: F1 Exact %.3f | F1 Overlap %.3f"%(f1s_dev, f1s_overlap_dev)) print(" Training and validation took in total: {:}".format(format_time(time.time()-track_time))) # EVALUATION: TEST SYN SET y_true_test_syn, y_pred_test_syn, f1s_test_syn, f1s_overlap_test_syn = evaluation( test_syn_dataloader, model=model, device=device, tag_values=tag_values) print("TEST SYN: F1 Exact %.3f | F1 Overlap %.3f"%(f1s_test_syn, f1s_overlap_test_syn)) # EVALUATION: TEST DIA SET y_true_test_dia, y_pred_test_dia, f1s_test_dia, f1s_overlap_test_dia = evaluation( test_dia_dataloader, model=model, device=device, tag_values=tag_values) print("TEST DIA: F1 Exact %.3f | F1 Overlap %.3f"%(f1s_test_dia, f1s_overlap_test_dia)) # Print classification report cr_report_syn = seq_classification_report(y_true_test_syn, y_pred_test_syn, digits = 4) cr_report_dia = seq_classification_report(y_true_test_dia, y_pred_test_dia, digits = 4) cr_report_syn_overlap = seq_classification_report(y_true_test_syn, y_pred_test_syn, digits = 4, overlap = True) cr_report_dia_overlap = seq_classification_report(y_true_test_dia, y_pred_test_dia, digits = 4, overlap = True) print("Classification report for TEST SYN (Exact):", cr_report_syn) print("Classification report for TEST SYN (Overlap):", cr_report_dia) print("Classification report for TEST DIA (Exact):", cr_report_syn_overlap) print("Classification report for TEST DIA (Overlap):", cr_report_dia_overlap) if args.save_cr: pickle.dump(cr_report_syn, open(args.output_path+'classification_report_'+lm+str(batch_size)+'_test_syn_exact.txt','wb')) pickle.dump(cr_report_dia, open(args.output_path+'classification_report_'+lm+str(batch_size)+'_test_dia_exact.txt','wb')) pickle.dump(cr_report_syn_overlap, open(args.output_path+'classification_report_'+lm+str(batch_size)+'_test_syn_overlap.txt','wb')) pickle.dump(cr_report_dia_overlap, open(args.output_path+'classification_report_'+lm+str(batch_size)+'_test_dia_overlap.txt','wb'))
import numpy as np import datetime as dt from tqdm import tqdm, trange from keras.preprocessing.sequence import pad_sequences from transformers import (AdamW, get_linear_schedule_with_warmup, BertTokenizer, BertConfig, DistilBertTokenizer, DistilBertConfig) from utils import set_all_seeds, initialize_device_settings, format_time from data_prep import bio_tagging_df from data_handler import (get_tags_list, get_sentences_biotags, split_train_dev, create_dataloader) from modeling_token import TokenBERT, TokenDistilBERT from seqeval_metrics import (seq_accuracy_score, seq_f1_score, seq_classification_report) set_all_seeds() logger = logging.getLogger(__name__) logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO ) def training(train_dataloader, model, device, optimizer, scheduler, max_grad_norm=1.0): model.train() tr_loss = 0 nb_tr_examples, nb_tr_steps = 0, 0 for step, batch in enumerate(train_dataloader):
def __init__(self, actor_key, config, storage, replay_buffer, state=None): set_all_seeds(config.seed + actor_key if config.seed is not None else None) self.run_tag = config.run_tag self.group_tag = config.group_tag self.actor_key = actor_key self.config = deepcopy(config) self.storage = storage self.replay_buffer = replay_buffer self.environment = get_environment(config) self.environment.seed(config.seed) self.mcts = MCTS(config) if "actors" in self.config.use_gpu_for: if torch.cuda.is_available(): if self.config.actors_gpu_device_ids is not None: device_id = self.config.actors_gpu_device_ids[ self.actor_key] self.device = torch.device("cuda:{}".format(device_id)) else: self.device = torch.device("cuda") else: raise RuntimeError( "GPU was requested but torch.cuda.is_available() is False." ) else: self.device = torch.device("cpu") self.network = get_network(config, self.device) self.network.to(self.device) self.network.eval() if config.fixed_temperatures: self.temperature = config.fixed_temperatures[self.actor_key] self.worker_id = 'actors/temp={}'.format(round( self.temperature, 1)) else: self.worker_id = 'actor-{}'.format(self.actor_key) if self.config.norm_obs: self.obs_min = np.array(self.config.obs_range[::2], dtype=np.float32) self.obs_max = np.array(self.config.obs_range[1::2], dtype=np.float32) self.obs_range = self.obs_max - self.obs_min if self.config.two_players: self.stats_to_log = defaultdict(int) self.experiences_collected = 0 self.training_step = 0 self.games_played = 0 self.return_to_log = 0 self.length_to_log = 0 self.value_to_log = {'avg': 0, 'max': 0} if state is not None: self.load_state(state) Logger.__init__(self)
def main(): """ main function for conducting Subtask A. Parameters are parsed with argparse. Language model should be suitable for German e.g.: 'bert-base-multilingual-uncased', 'bert-base-multilingual-cased', 'bert-base-german-cased', 'bert-base-german-dbmdz-cased', 'bert-base-german-dbmdz-uncased', 'distilbert-base-german-cased', 'distilbert-base-multilingual-cased'. """ ############################ variable settings ################################# parser = argparse.ArgumentParser(description='Run Subtask A or B of GermEval 2017 Using Pre-Trained Language Model.') parser.add_argument('--task', type=str, default='A', help="The task you want to conduct ('A' or 'B').") parser.add_argument('--seed', type=int, default=42, help='Random seed.') parser.add_argument('--lang_model', type=str, default='bert-base-german-dbmdz-uncased', help='The pre-trained language model.') parser.add_argument('--epochs', type=int, default=4, help='Number of epochs for training.') parser.add_argument('--lr', type=float, default=5e-5, help='The learning rate.') parser.add_argument('--max_len', type=int, default=256, help='The maximum sequence length of the input text.') parser.add_argument('--batch_size', type=int, default=32, help='Your train set batch size.') parser.add_argument('--df_path', type=str, default='./data/', help='The data directory.') parser.add_argument('--train_data', type=str, default='train_df.tsv', help='The filename of the input train data.') parser.add_argument('--dev_data', type=str, default='dev_df.tsv', help='The filename of the input development data.') parser.add_argument('--test_data1', type=str, default='test_syn_df.tsv', help='The filename of the first input test data (synchronic).') parser.add_argument('--test_data2', type=str, default='test_dia_df.tsv', help='The filename of the second input test data (diachronic).') parser.add_argument('--output_path', type=str, default='./output/subtaskA/', help='The output directory of the model and predictions.') parser.add_argument("--train", default=True, action="store_true", help="Flag for training.") parser.add_argument("--save_prediction", default=True, action="store_true", help="Flag for saving predictions.") args = parser.parse_args() ################################################################################ set_all_seeds(args.seed) device, n_gpu = initialize_device_settings(use_cuda=True) # Load data train_df = pd.read_csv(args.df_path + args.train_data, delimiter = '\t') dev_df = pd.read_csv(args.df_path + args.dev_data, delimiter = '\t') test_syn_df = pd.read_csv(args.df_path + args.test_data1, delimiter = '\t') test_syn_df = test_syn_df.dropna(subset = ["text"]) test_dia_df = pd.read_csv(args.df_path + args.test_data2, delimiter = '\t') # Create a tokenizer lower_case = False if args.lang_model[-7:] == "uncased": lower_case = True if args.lang_model[:4] == "bert": model_class = "BERT" tokenizer = BertTokenizer.from_pretrained(args.lang_model, do_lower_case=lower_case, max_length=args.max_len) if args.lang_model[:10] == "distilbert": model_class = "DistilBERT" tokenizer = DistilBertTokenizer.from_pretrained(args.lang_model, do_lower_case=lower_case, max_length=args.max_len) # get training features df = pd.concat([train_df, dev_df]) sentences = df.text.values sentences_syn = test_syn_df.text.values sentences_dia = test_dia_df.text.values if args.task == 'A': class_list = [False, True] df['relevance_label'] = df.apply(lambda x: class_list.index(x['relevance']), axis = 1) labels = df.relevance_label.values test_syn_df['relevance_label'] = test_syn_df.apply(lambda x: class_list.index(x['relevance']), axis = 1) labels_syn = test_syn_df.relevance_label.values test_dia_df['relevance_label'] = test_dia_df.apply(lambda x: class_list.index(x['relevance']), axis = 1) labels_dia = test_dia_df.relevance_label.values if args.task == 'B': class_list = ["negative", "neutral", "positive"] df['sentiment_label'] = df.apply(lambda x: class_list.index(x['sentiment']), axis = 1) labels = df.sentiment_label.values test_syn_df['sentiment_label'] = test_syn_df.apply(lambda x: class_list.index(x['sentiment']), axis = 1) labels_syn = test_syn_df.sentiment_label.values test_dia_df['sentiment_label'] = test_dia_df.apply(lambda x: class_list.index(x['sentiment']), axis = 1) labels_dia = test_dia_df.sentiment_label.values num_labels = len(set(labels)) # Tokenize all of the sentences and map the tokens to their word IDs. input_ids = [tokenizer.encode(sent, add_special_tokens=True, truncation=True, max_length=args.max_len) for sent in sentences] input_ids = pad_sequences(input_ids, maxlen=args.max_len, dtype="long", value=0.0, truncating="post", padding="post") # Create attention masks attention_masks = [[int(token_id > 0) for token_id in sent] for sent in input_ids] # synchronic test data input_ids_syn = [tokenizer.encode(sent, add_special_tokens=True, truncation=True) for sent in sentences_syn] input_ids_syn = pad_sequences(input_ids_syn, maxlen=args.max_len, dtype="long", value=0.0, truncating="post", padding="post") attention_masks_syn = [[int(token_id > 0) for token_id in sent] for sent in input_ids_syn] # diachronic test data input_ids_dia = [tokenizer.encode(sent, add_special_tokens=True, truncation=True) for sent in sentences_dia] input_ids_dia = pad_sequences(input_ids_dia, maxlen=args.max_len, dtype="long", value=0.0, truncating="post", padding="post") attention_masks_dia = [[int(token_id > 0) for token_id in sent] for sent in input_ids_dia] # split train, dev train_inputs, train_labels, dev_inputs, dev_labels, train_masks, dev_masks = split_train_dev( train_df, dev_df, attention_masks, input_ids, labels) # transform to torch tensor train_inputs = torch.tensor(train_inputs) dev_inputs = torch.tensor(dev_inputs) train_labels = torch.tensor(train_labels) dev_labels = torch.tensor(dev_labels) train_masks = torch.tensor(train_masks) dev_masks = torch.tensor(dev_masks) test_syn_inputs = torch.tensor(input_ids_syn) test_syn_labels = torch.tensor(labels_syn) test_syn_masks = torch.tensor(attention_masks_syn) test_dia_inputs = torch.tensor(input_ids_dia) test_dia_labels = torch.tensor(labels_dia) test_dia_masks = torch.tensor(attention_masks_dia) # Create the DataLoader train_dataloader = create_dataloader(train_inputs, train_masks, train_labels, args.batch_size, train=True) dev_dataloader = create_dataloader(dev_inputs, dev_masks, dev_labels, args.batch_size, train=False) test_syn_dataloader = create_dataloader(test_syn_inputs, test_syn_masks, test_syn_labels, args.batch_size, train=False) test_dia_dataloader = create_dataloader(test_dia_inputs, test_dia_masks, test_dia_labels, args.batch_size, train=False) # Create model if args.train: if model_class == "BERT": config = BertConfig.from_pretrained(args.lang_model, num_labels=num_labels) config.hidden_dropout_prob = 0.1 model = BertForSequenceClassification.from_pretrained( args.lang_model, num_labels = num_labels, output_attentions = False, output_hidden_states = False ) if model_class == "DistilBERT": config = DistilBertConfig.from_pretrained(args.lang_model, num_labels=num_labels) config.hidden_dropout_prob = 0.1 model = DistilBertForSequenceClassification.from_pretrained( args.lang_model, num_labels = num_labels, output_attentions = False, output_hidden_states = False ) model.cuda() # Create an optimizer param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.01}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.0} ] optimizer = AdamW( optimizer_grouped_parameters, lr=args.lr, eps=1e-8 ) # Total number of training steps = number of batches * number of epochs total_steps = len(train_dataloader) * args.epochs # Create the learning rate scheduler scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=total_steps ) # train model # Main Loop print("=================== Train ================") print("##### Language Model:", args.lang_model, ",", "learning rate:", args.lr) print() track_time = time.time() # trange is a tqdm wrapper around the normal python range for epoch in trange(args.epochs, desc="Epoch"): print("Epoch: %4i"%epoch, dt.datetime.now()) model, optimizer, scheduler, tr_loss = train( train_dataloader, model=model, device=device, optimizer=optimizer, scheduler=scheduler ) # EVALUATION: TRAIN SET true_bools_train, pred_bools_train, f1_train = eval( train_dataloader, model=model, device=device) print("TRAIN: micro F1 %.4f"%(f1_train)) # here: same as accuracy print(confusion_matrix(true_bools_train,pred_bools_train)) # EVALUATION: DEV SET true_bools_dev, pred_bools_dev, f1_dev = eval( dev_dataloader, model=model, device=device) print("EVAL: micro F1 %.4f"%(f1_dev)) print(confusion_matrix(true_bools_dev,pred_bools_dev)) print(" Training and validation took in total: {:}".format(format_time(time.time()-track_time))) # EVALUATION: TEST SYN SET true_bools_syn, pred_bools_syn, f1_test_syn = eval( test_syn_dataloader, model=model, device=device) print("TEST SYN: micro F1 %.4f"%(f1_test_syn)) print(confusion_matrix(true_bools_syn,pred_bools_syn)) # EVALUATION: TEST DIA SET true_bools_dia, pred_bools_dia, f1_test_dia = eval( test_dia_dataloader, model=model, device=device) print("TEST DIA: micro F1 %.4f"%(f1_test_dia)) print(confusion_matrix(true_bools_dia, pred_bools_dia)) if args.save_prediction: if args.task == 'A': test_syn_df["relevance_pred"] = pred_bools_syn test_dia_df["relevance_pred"] = pred_bools_dia if args.task == 'B': test_syn_df["sentiment_pred"] = pred_bools_syn test_dia_df["sentiment_pred"] = pred_bools_dia test_syn_df.to_csv(args.output_path+args.lang_model+"_eval_test_syn.tsv", sep="\t", index = False, header = True, encoding = "utf-8-sig") test_dia_df.to_csv(args.output_path+args.lang_model+"_eval_test_dia.tsv", sep="\t", index = False, header = True, encoding = "utf-8-sig")
def main(): """ main function for conducting Subtask C. Parameters are parsed with argparse. Language model should be suitable for German e.g.: 'bert-base-multilingual-uncased', 'bert-base-multilingual-cased', 'bert-base-german-cased', 'bert-base-german-dbmdz-cased', 'bert-base-german-dbmdz-uncased', 'distilbert-base-german-cased', 'distilbert-base-multilingual-cased'. """ ############################ variable settings ################################# parser = argparse.ArgumentParser( description= 'Run Subtask C of GermEval 2017 Using Pre-Trained Language Model.') parser.add_argument('--seed', type=int, default=42, help='Random seed.') parser.add_argument('--lang_model', type=str, default='bert-base-german-dbmdz-uncased', help='The pre-trained language model.') parser.add_argument('--epochs', type=int, default=4, help='Number of epochs for training.') parser.add_argument('--lr', type=float, default=5e-5, help='The learning rate.') parser.add_argument('--max_len', type=int, default=256, help='The maximum sequence length of the input text.') parser.add_argument('--batch_size', type=int, default=32, help='Your train set batch size.') parser.add_argument('--df_path', type=str, default='./data/', help='The data directory.') parser.add_argument('--train_data', type=str, default='train_df_cat.tsv', help='The filename of the input train data.') parser.add_argument('--dev_data', type=str, default='dev_df_cat.tsv', help='The filename of the input development data.') parser.add_argument( '--test_data1', type=str, default='test_syn_df_cat.tsv', help='The filename of the first input test data (synchronic).') parser.add_argument( '--test_data2', type=str, default='test_dia_df_cat.tsv', help='The filename of the second input test data (diachronic).') parser.add_argument( '--output_path', type=str, default='./output/subtaskC/', help='The output directory of the model and predictions.') parser.add_argument("--train", default=True, action="store_true", help="Flag for training.") parser.add_argument("--save_prediction", default=False, action="store_true", help="Flag for saving predictions.") parser.add_argument("--save_cr", default=False, action="store_true", help="Flag for saving confusion matrix.") parser.add_argument("--exclude_general", default=False, action="store_true", help="Flag for excluding category Allgemein.") parser.add_argument("--exclude_neutral", default=False, action="store_true", help="Flag for excluding neutral polarity.") parser.add_argument("--exclude_general_neutral", default=False, action="store_true", help="Flag for excluding category Allgemein:neutral.") args = parser.parse_args() ################################################################################ set_all_seeds(args.seed) device, n_gpu = initialize_device_settings(use_cuda=True) # Load data train_df = pd.read_csv(args.df_path + args.train_data, delimiter='\t') dev_df = pd.read_csv(args.df_path + args.dev_data, delimiter='\t') test_syn_df = pd.read_csv(args.df_path + args.test_data1, delimiter='\t') test_dia_df = pd.read_csv(args.df_path + args.test_data2, delimiter='\t') # Create a tokenizer lower_case = False if args.lang_model[-7:] == "uncased": lower_case = True if args.lang_model[:4] == "bert": model_class = "BERT" tokenizer = BertTokenizer.from_pretrained(args.lang_model, do_lower_case=lower_case, max_length=args.max_len) if args.lang_model[:10] == "distilbert": model_class = "DistilBERT" tokenizer = DistilBertTokenizer.from_pretrained( args.lang_model, do_lower_case=lower_case, max_length=args.max_len) # get training features cats = train_df.columns[5:] end = "full" # exclude categories if required if (args.exclude_general): cats = [i for i in list(cats) if "Allgemein" not in i] end = "excl_gen" if (args.exclude_neutral): cats = [i for i in list(cats) if "neutral" not in i] end = "excl_neu" if (args.exclude_general_neutral): cats = [i for i in list(cats) if "Allgemein:neutral" not in i] end = "excl_genneu" num_labels = len(list(cats)) # create one hot labels train_df['one_hot_labels'] = list(train_df[list(cats)].values) dev_df['one_hot_labels'] = list(dev_df[list(cats)].values) test_syn_df['one_hot_labels'] = list(test_syn_df[list(cats)].values) test_dia_df['one_hot_labels'] = list(test_dia_df[list(cats)].values) # retrieve sentences and labels df = pd.concat([train_df, dev_df]) sentences = df.text.values labels = list(df.one_hot_labels.values) sentences_syn = test_syn_df.text.values labels_syn = list(test_syn_df.one_hot_labels.values) sentences_dia = test_dia_df.text.values labels_dia = list(test_dia_df.one_hot_labels.values) print("number of categories:", len(list(cats))) # Tokenize all of the sentences and map the tokens to their word IDs. input_ids = [ tokenizer.encode(sent, add_special_tokens=True, truncation=True, max_length=args.max_len) for sent in sentences ] input_ids = pad_sequences(input_ids, maxlen=args.max_len, dtype="long", value=0.0, truncating="post", padding="post") # Create attention masks attention_masks = [[int(token_id > 0) for token_id in sent] for sent in input_ids] # synchronic test data input_ids_syn = [ tokenizer.encode(sent, add_special_tokens=True, truncation=True) for sent in sentences_syn ] input_ids_syn = pad_sequences(input_ids_syn, maxlen=args.max_len, dtype="long", value=0.0, truncating="post", padding="post") attention_masks_syn = [[int(token_id > 0) for token_id in sent] for sent in input_ids_syn] # diachronic test data input_ids_dia = [ tokenizer.encode(sent, add_special_tokens=True, truncation=True) for sent in sentences_dia ] input_ids_dia = pad_sequences(input_ids_dia, maxlen=args.max_len, dtype="long", value=0.0, truncating="post", padding="post") attention_masks_dia = [[int(token_id > 0) for token_id in sent] for sent in input_ids_dia] # split train, dev train_inputs, train_labels, dev_inputs, dev_labels, train_masks, dev_masks = split_train_dev( train_df, dev_df, attention_masks, input_ids, labels) # transform to torch tensor train_inputs = torch.tensor(train_inputs) dev_inputs = torch.tensor(dev_inputs) train_labels = torch.tensor(train_labels) dev_labels = torch.tensor(dev_labels) train_masks = torch.tensor(train_masks) dev_masks = torch.tensor(dev_masks) test_syn_inputs = torch.tensor(input_ids_syn) test_syn_masks = torch.tensor(attention_masks_syn) test_syn_labels = torch.tensor(labels_syn) test_dia_inputs = torch.tensor(input_ids_dia) test_dia_masks = torch.tensor(attention_masks_dia) test_dia_labels = torch.tensor(labels_dia) # Create the DataLoader train_dataloader = create_dataloader(train_inputs, train_masks, train_labels, args.batch_size, train=True) dev_dataloader = create_dataloader(dev_inputs, dev_masks, dev_labels, args.batch_size, train=False) test_syn_dataloader = create_dataloader(test_syn_inputs, test_syn_masks, test_syn_labels, args.batch_size, train=False) test_dia_dataloader = create_dataloader(test_dia_inputs, test_dia_masks, test_dia_labels, args.batch_size, train=False) # Create model if args.train: if model_class == "BERT": config = BertConfig.from_pretrained(args.lang_model, num_labels=num_labels) config.hidden_dropout_prob = 0.1 model = BertForSequenceClassification.from_pretrained( args.lang_model, num_labels=num_labels, output_attentions=False, output_hidden_states=False) if model_class == "DistilBERT": config = DistilBertConfig.from_pretrained(args.lang_model, num_labels=num_labels) config.hidden_dropout_prob = 0.1 model = DistilBertForSequenceClassification.from_pretrained( args.lang_model, num_labels=num_labels, output_attentions=False, output_hidden_states=False) model.cuda() # Create an optimizer param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay_rate': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.0 }] optimizer = AdamW(optimizer_grouped_parameters, lr=args.lr, eps=1e-8) # Total number of training steps = number of batches * number of epochs total_steps = len(train_dataloader) * args.epochs # Create the learning rate scheduler scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=total_steps) # train model # Main Loop print("=================== Train ================") print("##### Language Model:", args.lang_model, ",", "learning rate:", args.lr) print() track_time = time.time() # trange is a tqdm wrapper around the normal python range for epoch in trange(args.epochs, desc="Epoch"): print("Epoch: %4i" % epoch, dt.datetime.now()) model, optimizer, scheduler, tr_loss = train_multilabel( train_dataloader=train_dataloader, model=model, device=device, optimizer=optimizer, scheduler=scheduler, num_labels=num_labels) # EVALUATION: TRAIN SET pred_bools_train, true_bools_train, f1_train = eval_multilabel( train_dataloader, model=model, device=device) print("TRAIN: micro F1 %.3f" % (f1_train)) # EVALUATION: DEV SET pred_bools_dev, true_bools_dev, f1_dev = eval_multilabel( dev_dataloader, model=model, device=device) print("EVAL: micro F1 %.3f" % (f1_dev)) print(" Training and validation took in total: {:}".format( format_time(time.time() - track_time))) # EVALUATION: TEST SYN SET pred_bools_syn, true_bools_syn, f1_test_syn = eval_multilabel( test_syn_dataloader, model=model, device=device) print("TEST SYN: micro F1 %.4f" % (f1_test_syn)) # classification report clf_report_syn = classification_report(true_bools_syn, pred_bools_syn, target_names=cats, digits=3) print(clf_report_syn) # EVALUATION: TEST DIA SET pred_bools_dia, true_bools_dia, f1_test_dia = eval_multilabel( test_dia_dataloader, model=model, device=device) print("TEST DIA: micro F1 %.4f" % (f1_test_dia)) # classification report clf_report_dia = classification_report(true_bools_dia, pred_bools_dia, target_names=cats, digits=3) print(clf_report_dia) if args.save_cr: pickle.dump( clf_report_syn, open( args.output_path + 'clf_report_' + args.lang_model + '_test_syn_' + str(num_labels) + end + '.txt', 'wb')) pickle.dump( clf_report_dia, open( args.output_path + 'clf_report_' + args.lang_model + '_test_dia_' + str(num_labels) + end + '.txt', 'wb')) if args.save_prediction: test_syn_df["category_pred"] = pred_bools_syn test_dia_df["category_pred"] = pred_bools_dia test_syn_df.category_pred.to_csv(args.output_path + args.lang_model + '_test_syn_' + str(num_labels) + end + ".tsv", sep="\t", index=False, header=True, encoding="utf-8-sig") test_dia_df.category_pred.to_csv(args.output_path + args.lang_model + '_test_dia_' + str(num_labels) + end + ".tsv", sep="\t", index=False, header=True, encoding="utf-8-sig")
args = parser.parse_args() if args.dev: args.output = 'devs/' args.output = os.path.join(args.output, args.expr_name) utils.Config(vars(args)).dump(os.path.join(args.output, 'configs.txt')) return args if __name__ == '__main__': args = main() torch.backends.cudnn.benckmark = True utils.set_all_seeds(args.seed) # Init data from SNLI or other data source data, itos = utils.obtain_data(args.data, args.vocab) # Build Vocabulary print("Loading vocabulary and word embeddings...") vocab = Vocabulary(args.labels, itos) vocab.set_word_embedding(args.embed, args.vocab) numpy_data = vocab.process_data(data) print("Embedding shape: ") print(vocab.embeddings.shape) # Build network net = s2l_Net(h_size=args.h_size, v_size=vocab.embeddings.shape[0],
def parse_args() -> Arguments: argparser = ArgumentParser() argparser.add_argument('--dataset', '-d', choices=['omniglot', 'miniimagenet', 'cub'], dest='dataset', help='Specify train dataset') argparser.add_argument( '--classes', '--num-classes', '-c', default=5, type=int, dest='num_classes', help= 'Number of classes for each task in meta learning i.e. the N in N-way with K shots' ) argparser.add_argument( '--support-samples', '-s', default=1, type=int, dest='support_samples', help='Number of training samples for each class in meta learning ' 'i.e. the K in N-way with K shots') argparser.add_argument( '--query-samples', '-q', default=5, type=int, dest='query_samples', help='Number of test samples for each class in meta learning') argparser.add_argument( '--distance', '--dst', default='euclidean', type=str, choices=['euclidean', 'cosine'], help='Distance function to use inside PrototypicalNetwork') argparser.add_argument( '--epochs', '-e', default=500_000, help='Number of training epochs. Set by default to a very high value ' 'because paper specify that train continues until validation loss ' 'continues to decrease.') argparser.add_argument('--epoch-steps', default=200, type=int, dest='epoch_steps') argparser.add_argument('--seed', default=13, type=int) argparser.add_argument('--device', type=str, default='cuda') argparser.add_argument('--batch-size', type=int, default=32) argparser.add_argument('--eval-steps', type=int, default=None, help='Number of evaluation steps. ' 'By default is set to the number ' 'of steps to reach 600 episodes ' 'considering batch size. This ' 'is done to match paper results tables') argparser.add_argument('--run-path', type=Path, default=None, help='Set this to resume a checkpoint ' 'instead of start a new training.', dest='run_path') argparser.add_argument( '--metadata-features', type=int, default=None, help='Number of metadata features. Must set only for zero shot learning ' 'i.e. when --support-samples=0', dest='metadata_features') argparser.add_argument( '--image-features', type=int, default=None, help= 'Number of image encoded features. Must set only for zero shot learning ' 'i.e. when --support-samples=0', dest='image_features') argparser.add_argument('--lr', default=1e-4, type=float, help='lr for optimizer(adam)', dest='lr') argparser.add_argument('--weight-decay', default=0.0, type=float, dest='weight_decay') argparser.add_argument('--lr-decay', default=True, type=eval, choices=[True, False], dest='use_lr_decay', help='Set true to use multiplicative lr decay ' '(set also --lr-decay-gamma and --lr-decay-steps)') argparser.add_argument('--lr-decay-gamma', default=None, type=float, dest='lr_decay_gamma', help='Multiplicative factor to apply to lr decay') argparser.add_argument('--lr-decay-steps', default=None, type=int, dest='lr_decay_steps', help='Number of steps to apply lr decay') argparser.add_argument('--early-stop', default=True, type=eval, choices=[True, False], dest='use_early_stop', help='Enable early stop based on validation loss') argparser.add_argument('--early-stop-patience', '--es-patience', dest='early_stop_patience', default=3, type=int) argparser.add_argument('--early-stop-delta', dest='early_stop_delta', default=0.0, type=float) argparser.add_argument('--early-stop-metric', default='accuracy', type=str, choices=['accuracy', 'loss'], dest='early_stop_metric') args = argparser.parse_args(namespace=Arguments()) if args.run_path is not None: run_path = Path(args.run_path) with open(run_path / 'config.yaml') as f: config = yaml.load(f) print('loaded config from', repr(run_path)) args = Arguments() for k in config: setattr(args, k, config[k]) # args.k = config[k] if args.seed is None: args.seed = randint(0, 1_000_000) print('set seed to %s' % args.seed) if args.eval_steps is None: from math import ceil args.eval_steps = int(ceil(600 / args.batch_size)) print('set eval_steps to %s' % args.eval_steps) if args.device.startswith('cuda'): if not torch.cuda.is_available(): print('Cuda not available, fall back to cpu') args.device = 'cpu' set_all_seeds(args.seed) return args
parser.add_argument('-dir', '--data_dir', type=str, default='data/VOCdevkit/VOC2010/') parser.add_argument('-tr', '--train_split', type=str, default='train') parser.add_argument('-val', '--val_split', type=str, default='val') parser.add_argument('-cf', '--class2ind_file', type=str, default='object_class2ind') parser.add_argument('-e', '--n_epochs', type=int, default=30) parser.add_argument('-lr', '--learning_rate', type=float, default=1e-3) parser.add_argument('-bs', '--batch_size', type=int, default=1) parser.add_argument('-wd', '--weight_decay', type=float, default=1e-6) parser.add_argument('--use_objects', dest='use_objects', action='store_true') parser.add_argument('--use_parts', dest='use_parts', action='store_true') parser.add_argument('-nw', '--num_workers', type=int, default=0) parser.add_argument('-ms', '--max_samples', type=int, default=-1) args = parser.parse_args() device = torch.device('cuda:%d' % args.device if torch.cuda.is_available() else 'cpu') set_all_seeds(123) ########## Parameters ########## DATA_DIR = args.data_dir TRAIN_SPLIT = args.train_split VAL_SPLIT = args.val_split CLASS2IND_FILE = args.class2ind_file N_EPOCHS = args.n_epochs USE_OBJECTS = bool(args.use_objects) USE_PARTS = bool(args.use_parts) NUM_WORKERS = args.num_workers MAX_SAMPLES = args.max_samples if args.max_samples > 0 else None if USE_OBJECTS and USE_PARTS: print('[WARNING]: If you are doing Object and Part Detection, make sure you are using the class2ind file that has both classes')
model_name.append('ddqn') if args.suffix: model_name.append(args.suffix) model_name = '_'.join(model_name) args.output = os.path.join(args.output, game_name, model_name) utils.Config(vars(args)).dump(os.path.join(args.output, 'configs.txt')) return args if __name__ == '__main__': args = main() torch.backends.cudnn.benckmark = True utils.set_all_seeds(args.seed) train_env = Environment( args.rom, args.frame_skip, args.num_frames, args.frame_size, args.no_op_start + 1, utils.large_randint(), True) eval_env = Environment( args.rom, args.frame_skip, args.num_frames, args.frame_size, args.no_op_start + 1,