def generator_from_data(dataset, generator_type='regress', features_list=None, n_epochs=100, n_layers=3, n_hiddens=200, p_dropout=0, num_bins=100, training_args=None): """NOTE: Training epochs `n_epochs` should scale with the number of features. """ if generator_type == 'oracle': n_features = dataset[0][0].shape[-1] generator = GeneratorOracle(n_features, gaussian=dataset.gaussian, rho=dataset.rho, normalize=dataset.normalize) return generator, None else: # Generator needs to be trained # All default training arguments are hidden here default_args = DDICT( optimizer='Adam', batch_size=128, lr=0.003, lr_step_size=20, lr_decay=0.5, num_bins=10, ) # Custom training arguments args = default_args if training_args is not None: for k in training_args: args[k] = training_args[k] # Data n_features = dataset[0][0].shape[-1] dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True) if generator_type == 'classify': generator = GeneratorClassify(n_features, n_layers, n_hiddens, num_bins=num_bins, init_dataset=dataset) elif generator_type == 'regress': generator = GeneratorRegress(n_features, n_layers, n_hiddens) else: raise ValueError('generator_type has to be classify or regress') optimizer = getattr(optim, args.optimizer)(generator.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_decay) tr_loss = [] for epoch in range(n_epochs): tr_loss += [ train_generator(generator, dataloader, optimizer, features_list, log_times=0) ] scheduler.step() return generator, tr_loss
trn_imgs = imgs[trn_ndx] trn_lbls = lbls[trn_ndx] vld_imgs = imgs[vld_ndx] vld_lbls = lbls[vld_ndx] # In[5]: training_set = Bengaliai_DS(trn_imgs, trn_lbls, transform=augs) validation_set = Bengaliai_DS(vld_imgs, vld_lbls) batch_size = 96 training_loader = DataLoader(training_set, batch_size=batch_size, num_workers=6, shuffle=True) validation_loader = DataLoader(validation_set, batch_size=batch_size, num_workers=6, shuffle=False) # --- # ### model # In[6]: N_EPOCHS = 120 reduction = 'mean' checkpoint_name = 'notebook_purepytorch_from_embedding_SomeAugs_Mu1_{:d}.pth'
def main(): word_embd_dim = 100 # if using pre-trained choose word_embd_dim from [50, 100, 200, 300] pos_embd_dim = 15 hidden_dim = 125 MLP_inner_dim = 100 epochs = 30 learning_rate = 0.01 dropout_layers_probability = 0.0 weight_decay = 0.0 # TODO have to be 0.0 if using training on some vector (aka some trained and some no) alpha = 0.25 # 0.0 means no word dropout # TODO if using pre-trained require min_freq=1 min_freq = 1 # minimum term-frequency to include in vocabulary, use 1 if you wish to use all words BiLSTM_layers = 3 use_pre_trained = True vectors = f'glove.6B.{word_embd_dim}d' if use_pre_trained else '' path_train = "train.labeled" path_test = "test.labeled" run_description = f"KiperwasserDependencyParser\n" \ f"-------------------------------------------------------------------------------------------\n" \ f"word_embd_dim = {word_embd_dim}\n" \ f"pos_embd_dim = {pos_embd_dim}\n" \ f"hidden_dim = {hidden_dim}\n" \ f"MLP_inner_dim = {MLP_inner_dim}\n" \ f"epochs = {epochs}\n" \ f"learning_rate = {learning_rate}\n" \ f"dropout_layers_probability = {dropout_layers_probability}\n" \ f"weight_decay = {weight_decay}\n" \ f"alpha = {alpha}\n" \ f"min_freq = {min_freq}\n" \ f"BiLSTM_layers = {BiLSTM_layers}\n" \ f"use_pre_trained = {use_pre_trained}\n" \ f"vectors = {vectors}\n" \ f"path_train = {path_train}\n" \ f"path_test = {path_test}\n" current_machine_date_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time()))) print(f"{current_machine_date_time}\n" f"{run_description}") path_to_save_model = os.path.join('saved_models', f'model {current_machine_date_time}.pt') """TRAIN DATA""" # TODO add path test if using pre=trained with freeze? # TODO change to test when needed!!!! train_word_dict, train_pos_dict = get_vocabs_counts( [path_train, path_test, 'comp.unlabeled']) train = DependencyDataset(path=path_train, word_dict=train_word_dict, pos_dict=train_pos_dict, word_embd_dim=word_embd_dim, pos_embd_dim=pos_embd_dim, test=False, use_pre_trained=use_pre_trained, pre_trained_vectors_name=vectors, min_freq=min_freq) train_dataloader = DataLoader(train, shuffle=True) model = KiperwasserDependencyParser(train, hidden_dim, MLP_inner_dim, BiLSTM_layers, dropout_layers_probability) """TEST DATA""" test = DependencyDataset(path=path_test, word_dict=train_word_dict, pos_dict=train_pos_dict, test=[ train.word_idx_mappings, train.pos_idx_mappings, train.word_vectors ]) test_dataloader = DataLoader(test, shuffle=False) """TRAIN THE PARSER ON TRAIN DATA""" train_accuracy_list, train_loss_list, test_accuracy_list, test_loss_list = \ train_kiperwasser_parser(model, train_dataloader, test_dataloader, epochs, learning_rate, weight_decay, alpha) print(f'\ntrain_accuracy_list = {train_accuracy_list}' f'\ntrain_loss_list = {train_loss_list}' f'\ntest_accuracy_list = {test_accuracy_list}' f'\ntest_loss_list = {test_loss_list}') """SAVE MODEL""" torch.save(model.state_dict(), path_to_save_model.replace(':', '-')) """PLOT GRAPHS"""
import os import argparse from itertools import product import pandas as pd from torchvision import models import pandas as pd from minicifar import minicifar_train, minicifar_test, train_sampler, valid_sampler from torch.utils.data.dataloader import DataLoader import numpy as np import random trainloader = DataLoader(minicifar_train, batch_size=32, sampler=train_sampler) validloader = DataLoader(minicifar_train, batch_size=32, sampler=valid_sampler) full_trainloader = DataLoader(minicifar_train, batch_size=32) testloader = DataLoader(minicifar_test, batch_size=32) cfg = { 'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'VGG16': [ 64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M' ], 'VGG19': [ 64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'
def tng_dataloader(self): dataset = DatasetFromFolder(data_dir=self.dataroot / 'train', scale_factor=4, patch_size=96, preupsample=True) return DataLoader(dataset, batch_size=16)
def val_dataloader(self) -> DataLoader: return DataLoader(self.val_fold)
def train_dataloader(self) -> DataLoader: return DataLoader(self.train_fold)
from functions import* import os ''' bbvi without Rao_Blackwellization and Control Variates ''' num_epochs=30 batchSize=12223 num_S=5#训练的采样数量 dim=28*28+1 eta=0.00001#步长 num_St=2000#测试的采样数量 #读取数据 transform=transforms.ToTensor() train_data=DatasetFromCSV('./dataset/train_images_csv.csv','./dataset/train_labels_csv.csv',transforms=transform) test_data=DatasetFromCSV('./dataset/test_images_csv.csv','./dataset/test_labels_csv.csv',transforms=transform) train_loader=DataLoader(train_data,batch_size=batchSize,shuffle=True) #定义分布参数 para=torch.zeros(dim*2,requires_grad=True) #para[dim:]=torch.ones(dim)*(-1) #需要储存结果 elbo_list=[] #AdaGrad G=torch.zeros((dim*2,dim*2)) #开始迭代 for epoch in range(num_epochs): for i ,data in enumerate(train_loader):
transforms.Resize(configuration.image_size), transforms.CenterCrop(configuration.image_size), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])) classes = dataset.classes for class_name in classes: if not os.path.isdir(Path(configuration.predicted_dir, class_name)): os.makedirs(Path(configuration.predicted_dir, class_name), exist_ok=False) # create dataloader device = torch.device("cuda:0" if ( torch.cuda.is_available() and configuration.num_gpus > 0) else "cpu") dataloader = DataLoader(dataset, batch_size=configuration.batch_size, shuffle=True, num_workers=configuration.num_workers) # X_tr, Y_tr, X_te, Y_te = dataset.get_split() optimizer = optim.SGD(net.parameters(), **configuration.optimizer_args) net = net.to(device) # train losses = [] for epoch in range(configuration.epochs): net.train() for batch_idx, (x, y) in enumerate(dataloader): x, y = x.to(device), y.to(device) optimizer.zero_grad() # print('x', x.shape)
def main(): args = parse_args() # Initialize the accelerator. We will let the accelerator handle device placement for us in this example. accelerator = Accelerator() # Make one log on every process with the configuration for debugging. logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, ) logger.info(accelerator.state) # Setup logging, we only want one process per machine to log things on the screen. # accelerator.is_local_main_process is only True for one process per machine. logger.setLevel(logging.INFO if accelerator.is_local_main_process else logging.ERROR) if accelerator.is_local_main_process: datasets.utils.logging.set_verbosity_warning() transformers.utils.logging.set_verbosity_info() else: datasets.utils.logging.set_verbosity_error() transformers.utils.logging.set_verbosity_error() # If passed along, set the training seed now. if args.seed is not None: set_seed(args.seed) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # (the dataset will be downloaded automatically from the datasets Hub). # # For CSV/JSON files, this script will use the column called 'text' or the first column if no column called # 'text' is found. You can easily tweak this behavior (see below). # # In distributed training, the load_dataset function guarantee that only one local process can concurrently # download the dataset. if args.dataset_name is not None: # Downloading and loading a dataset from the hub. raw_datasets = load_dataset(args.dataset_name, args.dataset_config_name) else: data_files = {} if args.train_file is not None: data_files["train"] = args.train_file if args.validation_file is not None: data_files["validation"] = args.validation_file if args.test_file is not None: data_files["test"] = args.test_file extension = args.train_file.split(".")[-1] raw_datasets = load_dataset(extension, data_files=data_files, field="data") # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. # Load pretrained model and tokenizer # # In distributed training, the .from_pretrained methods guarantee that only one local process can concurrently # download model & vocab. if args.config_name: config = AutoConfig.from_pretrained(args.config_name) elif args.model_name_or_path: config = AutoConfig.from_pretrained(args.model_name_or_path) else: config = CONFIG_MAPPING[args.model_type]() logger.warning("You are instantiating a new config instance from scratch.") if args.tokenizer_name: tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name, use_fast=True) elif args.model_name_or_path: tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, use_fast=True) else: raise ValueError( "You are instantiating a new tokenizer from scratch. This is not supported by this script." "You can do it from another script, save it, and load it from here, using --tokenizer_name." ) if args.model_name_or_path: model = AutoModelForQuestionAnswering.from_pretrained( args.model_name_or_path, from_tf=bool(".ckpt" in args.model_name_or_path), config=config, ) else: logger.info("Training new model from scratch") model = AutoModelForQuestionAnswering.from_config(config) # Preprocessing the datasets. # Preprocessing is slighlty different for training and evaluation. column_names = raw_datasets["train"].column_names question_column_name = "question" if "question" in column_names else column_names[0] context_column_name = "context" if "context" in column_names else column_names[1] answer_column_name = "answers" if "answers" in column_names else column_names[2] # Padding side determines if we do (question|context) or (context|question). pad_on_right = tokenizer.padding_side == "right" if args.max_seq_length > tokenizer.model_max_length: logger.warning( f"The max_seq_length passed ({args.max_seq_length}) is larger than the maximum length for the" f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}." ) max_seq_length = min(args.max_seq_length, tokenizer.model_max_length) # Training preprocessing def prepare_train_features(examples): # Tokenize our examples with truncation and maybe padding, but keep the overflows using a stride. This results # in one example possible giving several features when a context is long, each of those features having a # context that overlaps a bit the context of the previous feature. tokenized_examples = tokenizer( examples[question_column_name if pad_on_right else context_column_name], examples[context_column_name if pad_on_right else question_column_name], truncation="only_second" if pad_on_right else "only_first", max_length=max_seq_length, stride=args.doc_stride, return_overflowing_tokens=True, return_offsets_mapping=True, padding="max_length" if args.pad_to_max_length else False, ) # Since one example might give us several features if it has a long context, we need a map from a feature to # its corresponding example. This key gives us just that. sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping") # The offset mappings will give us a map from token to character position in the original context. This will # help us compute the start_positions and end_positions. offset_mapping = tokenized_examples.pop("offset_mapping") # Let's label those examples! tokenized_examples["start_positions"] = [] tokenized_examples["end_positions"] = [] for i, offsets in enumerate(offset_mapping): # We will label impossible answers with the index of the CLS token. input_ids = tokenized_examples["input_ids"][i] cls_index = input_ids.index(tokenizer.cls_token_id) # Grab the sequence corresponding to that example (to know what is the context and what is the question). sequence_ids = tokenized_examples.sequence_ids(i) # One example can give several spans, this is the index of the example containing this span of text. sample_index = sample_mapping[i] answers = examples[answer_column_name][sample_index] # If no answers are given, set the cls_index as answer. if len(answers["answer_start"]) == 0: tokenized_examples["start_positions"].append(cls_index) tokenized_examples["end_positions"].append(cls_index) else: # Start/end character index of the answer in the text. start_char = answers["answer_start"][0] end_char = start_char + len(answers["text"][0]) # Start token index of the current span in the text. token_start_index = 0 while sequence_ids[token_start_index] != (1 if pad_on_right else 0): token_start_index += 1 # End token index of the current span in the text. token_end_index = len(input_ids) - 1 while sequence_ids[token_end_index] != (1 if pad_on_right else 0): token_end_index -= 1 # Detect if the answer is out of the span (in which case this feature is labeled with the CLS index). if not (offsets[token_start_index][0] <= start_char and offsets[token_end_index][1] >= end_char): tokenized_examples["start_positions"].append(cls_index) tokenized_examples["end_positions"].append(cls_index) else: # Otherwise move the token_start_index and token_end_index to the two ends of the answer. # Note: we could go after the last offset if the answer is the last word (edge case). while token_start_index < len(offsets) and offsets[token_start_index][0] <= start_char: token_start_index += 1 tokenized_examples["start_positions"].append(token_start_index - 1) while offsets[token_end_index][1] >= end_char: token_end_index -= 1 tokenized_examples["end_positions"].append(token_end_index + 1) return tokenized_examples if "train" not in raw_datasets: raise ValueError("--do_train requires a train dataset") train_dataset = raw_datasets["train"] if args.max_train_samples is not None: # We will select sample from whole data if agument is specified train_dataset = train_dataset.select(range(args.max_train_samples)) # Create train feature from dataset train_dataset = train_dataset.map( prepare_train_features, batched=True, num_proc=args.preprocessing_num_workers, remove_columns=column_names, load_from_cache_file=not args.overwrite_cache, desc="Running tokenizer on train dataset", ) if args.max_train_samples is not None: # Number of samples might increase during Feature Creation, We select only specified max samples train_dataset = train_dataset.select(range(args.max_train_samples)) # Validation preprocessing def prepare_validation_features(examples): # Tokenize our examples with truncation and maybe padding, but keep the overflows using a stride. This results # in one example possible giving several features when a context is long, each of those features having a # context that overlaps a bit the context of the previous feature. tokenized_examples = tokenizer( examples[question_column_name if pad_on_right else context_column_name], examples[context_column_name if pad_on_right else question_column_name], truncation="only_second" if pad_on_right else "only_first", max_length=max_seq_length, stride=args.doc_stride, return_overflowing_tokens=True, return_offsets_mapping=True, padding="max_length" if args.pad_to_max_length else False, ) # Since one example might give us several features if it has a long context, we need a map from a feature to # its corresponding example. This key gives us just that. sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping") # For evaluation, we will need to convert our predictions to substrings of the context, so we keep the # corresponding example_id and we will store the offset mappings. tokenized_examples["example_id"] = [] for i in range(len(tokenized_examples["input_ids"])): # Grab the sequence corresponding to that example (to know what is the context and what is the question). sequence_ids = tokenized_examples.sequence_ids(i) context_index = 1 if pad_on_right else 0 # One example can give several spans, this is the index of the example containing this span of text. sample_index = sample_mapping[i] tokenized_examples["example_id"].append(examples["id"][sample_index]) # Set to None the offset_mapping that are not part of the context so it's easy to determine if a token # position is part of the context or not. tokenized_examples["offset_mapping"][i] = [ (o if sequence_ids[k] == context_index else None) for k, o in enumerate(tokenized_examples["offset_mapping"][i]) ] return tokenized_examples if "validation" not in raw_datasets: raise ValueError("--do_eval requires a validation dataset") eval_examples = raw_datasets["validation"] if args.max_eval_samples is not None: # We will select sample from whole data eval_examples = eval_examples.select(range(args.max_eval_samples)) # Validation Feature Creation eval_dataset = eval_examples.map( prepare_validation_features, batched=True, num_proc=args.preprocessing_num_workers, remove_columns=column_names, load_from_cache_file=not args.overwrite_cache, desc="Running tokenizer on validation dataset", ) if args.max_eval_samples is not None: # During Feature creation dataset samples might increase, we will select required samples again eval_dataset = eval_dataset.select(range(args.max_eval_samples)) if args.do_predict: if "test" not in raw_datasets: raise ValueError("--do_predict requires a test dataset") predict_examples = raw_datasets["test"] if args.max_predict_samples is not None: # We will select sample from whole data predict_examples = predict_examples.select(range(args.max_predict_samples)) # Predict Feature Creation predict_dataset = predict_examples.map( prepare_validation_features, batched=True, num_proc=args.preprocessing_num_workers, remove_columns=column_names, load_from_cache_file=not args.overwrite_cache, desc="Running tokenizer on prediction dataset", ) if args.max_predict_samples is not None: # During Feature creation dataset samples might increase, we will select required samples again predict_dataset = predict_dataset.select(range(args.max_predict_samples)) # Log a few random samples from the training set: for index in random.sample(range(len(train_dataset)), 3): logger.info(f"Sample {index} of the training set: {train_dataset[index]}.") # DataLoaders creation: if args.pad_to_max_length: # If padding was already done ot max length, we use the default data collator that will just convert everything # to tensors. data_collator = default_data_collator else: # Otherwise, `DataCollatorWithPadding` will apply dynamic padding for us (by padding to the maximum length of # the samples passed). When using mixed precision, we add `pad_to_multiple_of=8` to pad all tensors to multiple # of 8s, which will enable the use of Tensor Cores on NVIDIA hardware with compute capability >= 7.5 (Volta). data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=(8 if accelerator.use_fp16 else None)) train_dataloader = DataLoader( train_dataset, shuffle=True, collate_fn=data_collator, batch_size=args.per_device_train_batch_size ) eval_dataset_for_model = eval_dataset.remove_columns(["example_id", "offset_mapping"]) eval_dataloader = DataLoader( eval_dataset_for_model, collate_fn=data_collator, batch_size=args.per_device_eval_batch_size ) if args.do_predict: predict_dataset_for_model = predict_dataset.remove_columns(["example_id", "offset_mapping"]) predict_dataloader = DataLoader( predict_dataset_for_model, collate_fn=data_collator, batch_size=args.per_device_eval_batch_size ) # Post-processing: def post_processing_function(examples, features, predictions, stage="eval"): # Post-processing: we match the start logits and end logits to answers in the original context. predictions = postprocess_qa_predictions( examples=examples, features=features, predictions=predictions, version_2_with_negative=args.version_2_with_negative, n_best_size=args.n_best_size, max_answer_length=args.max_answer_length, null_score_diff_threshold=args.null_score_diff_threshold, output_dir=args.output_dir, prefix=stage, ) # Format the result to the format the metric expects. if args.version_2_with_negative: formatted_predictions = [ {"id": k, "prediction_text": v, "no_answer_probability": 0.0} for k, v in predictions.items() ] else: formatted_predictions = [{"id": k, "prediction_text": v} for k, v in predictions.items()] references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples] return EvalPrediction(predictions=formatted_predictions, label_ids=references) metric = load_metric("squad_v2" if args.version_2_with_negative else "squad") # Create and fill numpy array of size len_of_validation_data * max_length_of_output_tensor def create_and_fill_np_array(start_or_end_logits, dataset, max_len): """ Create and fill numpy array of size len_of_validation_data * max_length_of_output_tensor Args: start_or_end_logits(:obj:`tensor`): This is the output predictions of the model. We can only enter either start or end logits. eval_dataset: Evaluation dataset max_len(:obj:`int`): The maximum length of the output tensor. ( See the model.eval() part for more details ) """ step = 0 # create a numpy array and fill it with -100. logits_concat = np.full((len(dataset), max_len), -100, dtype=np.float64) # Now since we have create an array now we will populate it with the outputs gathered using accelerator.gather for i, output_logit in enumerate(start_or_end_logits): # populate columns # We have to fill it such that we have to take the whole tensor and replace it on the newly created array # And after every iteration we have to change the step batch_size = output_logit.shape[0] cols = output_logit.shape[1] if step + batch_size < len(dataset): logits_concat[step : step + batch_size, :cols] = output_logit else: logits_concat[step:, :cols] = output_logit[: len(dataset) - step] step += batch_size return logits_concat # Optimizer # Split weights in two groups, one with weight decay and the other not. no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], "weight_decay": args.weight_decay, }, { "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0, }, ] optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate) # Prepare everything with our `accelerator`. model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare( model, optimizer, train_dataloader, eval_dataloader ) # Note -> the training dataloader needs to be prepared before we grab his length below (cause its length will be # shorter in multiprocess) # Scheduler and math around the number of training steps. num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps) if args.max_train_steps is None: args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch else: args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch) lr_scheduler = get_scheduler( name=args.lr_scheduler_type, optimizer=optimizer, num_warmup_steps=args.num_warmup_steps, num_training_steps=args.max_train_steps, ) # Train! total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps logger.info("***** Running training *****") logger.info(f" Num examples = {len(train_dataset)}") logger.info(f" Num Epochs = {args.num_train_epochs}") logger.info(f" Instantaneous batch size per device = {args.per_device_train_batch_size}") logger.info(f" Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}") logger.info(f" Gradient Accumulation steps = {args.gradient_accumulation_steps}") logger.info(f" Total optimization steps = {args.max_train_steps}") # Only show the progress bar once on each machine. progress_bar = tqdm(range(args.max_train_steps), disable=not accelerator.is_local_main_process) completed_steps = 0 for epoch in range(args.num_train_epochs): model.train() for step, batch in enumerate(train_dataloader): outputs = model(**batch) loss = outputs.loss loss = loss / args.gradient_accumulation_steps accelerator.backward(loss) if step % args.gradient_accumulation_steps == 0 or step == len(train_dataloader) - 1: optimizer.step() lr_scheduler.step() optimizer.zero_grad() progress_bar.update(1) completed_steps += 1 if completed_steps >= args.max_train_steps: break # Evaluation logger.info("***** Running Evaluation *****") logger.info(f" Num examples = {len(eval_dataset)}") logger.info(f" Batch size = {args.per_device_eval_batch_size}") all_start_logits = [] all_end_logits = [] for step, batch in enumerate(eval_dataloader): with torch.no_grad(): outputs = model(**batch) start_logits = outputs.start_logits end_logits = outputs.end_logits if not args.pad_to_max_length: # necessary to pad predictions and labels for being gathered start_logits = accelerator.pad_across_processes(start_logits, dim=1, pad_index=-100) end_logits = accelerator.pad_across_processes(end_logits, dim=1, pad_index=-100) all_start_logits.append(accelerator.gather(start_logits).cpu().numpy()) all_end_logits.append(accelerator.gather(end_logits).cpu().numpy()) max_len = max([x.shape[1] for x in all_start_logits]) # Get the max_length of the tensor # concatenate the numpy array start_logits_concat = create_and_fill_np_array(all_start_logits, eval_dataset, max_len) end_logits_concat = create_and_fill_np_array(all_end_logits, eval_dataset, max_len) # delete the list of numpy arrays del all_start_logits del all_end_logits outputs_numpy = (start_logits_concat, end_logits_concat) prediction = post_processing_function(eval_examples, eval_dataset, outputs_numpy) eval_metric = metric.compute(predictions=prediction.predictions, references=prediction.label_ids) logger.info(f"Evaluation metrics: {eval_metric}") # Prediction if args.do_predict: logger.info("***** Running Prediction *****") logger.info(f" Num examples = {len(predict_dataset)}") logger.info(f" Batch size = {args.per_device_eval_batch_size}") all_start_logits = [] all_end_logits = [] for step, batch in enumerate(predict_dataloader): with torch.no_grad(): outputs = model(**batch) start_logits = outputs.start_logits end_logits = outputs.end_logits if not args.pad_to_max_length: # necessary to pad predictions and labels for being gathered start_logits = accelerator.pad_across_processes(start_logits, dim=1, pad_index=-100) end_logits = accelerator.pad_across_processes(start_logits, dim=1, pad_index=-100) all_start_logits.append(accelerator.gather(start_logits).cpu().numpy()) all_end_logits.append(accelerator.gather(end_logits).cpu().numpy()) max_len = max([x.shape[1] for x in all_start_logits]) # Get the max_length of the tensor # concatenate the numpy array start_logits_concat = create_and_fill_np_array(all_start_logits, predict_dataset, max_len) end_logits_concat = create_and_fill_np_array(all_end_logits, predict_dataset, max_len) # delete the list of numpy arrays del all_start_logits del all_end_logits outputs_numpy = (start_logits_concat, end_logits_concat) prediction = post_processing_function(predict_examples, predict_dataset, outputs_numpy) predict_metric = metric.compute(predictions=prediction.predictions, references=prediction.label_ids) logger.info(f"Predict metrics: {predict_metric}") if args.output_dir is not None: accelerator.wait_for_everyone() unwrapped_model = accelerator.unwrap_model(model) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
def __init__(self): # Use the gpu self.device = torch.device('cuda') # Create the generators and discriminators self.generator_A = CycleGANGenerator(3, 3, 64).to(self.device) self.generator_B = CycleGANGenerator(3, 3, 64).to(self.device) self.discriminator_A = CycleGANDiscriminator(3, 64).to(self.device) self.discriminator_B = CycleGANDiscriminator(3, 64).to(self.device) # Print the networks print(self.generator_A) print(self.generator_B) print(self.discriminator_A) print(self.discriminator_B) # Initialize the weights of all networks self.generator_A.apply(self.init_weights) self.generator_B.apply(self.init_weights) self.discriminator_A.apply(self.init_weights) self.discriminator_B.apply(self.init_weights) # Create the optimizers for all the networks self.generator_A_optimizer = optim.Adam(self.generator_A.parameters(), lr=0.0002, betas=(0.5, 0.999)) self.generator_B_optimizer = optim.Adam(self.generator_B.parameters(), lr=0.0002, betas=(0.5, 0.999)) self.discriminator_A_optimizer = optim.Adam( self.discriminator_A.parameters(), lr=0.0002, betas=(0.5, 0.999)) self.discriminator_B_optimizer = optim.Adam( self.discriminator_B.parameters(), lr=0.0002, betas=(0.5, 0.999)) # Create learning rate schedulers for all the optimizers self.generator_A_scheduler = optim.lr_scheduler.LambdaLR( self.generator_A_optimizer, lr_lambda=self.schedule_rate) self.generator_B_scheduler = optim.lr_scheduler.LambdaLR( self.generator_B_optimizer, lr_lambda=self.schedule_rate) self.discriminator_A_scheduler = optim.lr_scheduler.LambdaLR( self.discriminator_A_optimizer, lr_lambda=self.schedule_rate) self.discriminator_B_scheduler = optim.lr_scheduler.LambdaLR( self.discriminator_B_optimizer, lr_lambda=self.schedule_rate) # Create the buffers to store history of images generated by the generators self.generator_A_buffer = ImageBuffer(buffer_size=50) self.generator_B_buffer = ImageBuffer(buffer_size=50) # Define the loss criterions self.cycle_loss = nn.L1Loss() self.gan_loss = nn.MSELoss() # Get the dataset and dataloaders self.dataset = CycleGANDataset( base_dir='/home/paurvi/CycleGAN/datasets/summer2winter_yosemite') self.dataloader = DataLoader(self.dataset, batch_size=1, num_workers=2) # Writers to tensorboard self.steps = 0 self.writer = SummaryWriter( comment= 'cyclegan_cityscapes- 001 changed CycleGANDiscriminator leaakyRelu slope from 0.2 to 0.5' )
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" writer = SummaryWriter("SRNet/25/2/") # dataset = SRDataSet("/home/haibao637/xdata/vimeo90k/vimeo_septuplet/",'train','sep_trainlist.txt') dataset = Vimeo90KDataset( "/home/haibao637/xdata/vimeo90k/vimeo90k_train_GT.lmdb", "/home/haibao637/xdata/vimeo90k/vimeo90k_train_LR7frames.lmdb") val_dataset = SRDataSet("/home/haibao637/xdata/Vid4//", 'val') logdir = "/home/haibao637/xdata/srnet_25.2" if os.path.exists(logdir) == False: os.makedirs(logdir) print(len(dataset)) dataloader = DataLoader(dataset, batch_size=16, num_workers=16, shuffle=False, drop_last=True) val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=True, drop_last=True) device = torch.device("cuda") model = SRNet(3).cuda() # writer.add_graph(model,torch.rand(1,3,3,64,64).cuda()) model = model.cuda() # output_pad = torch.nn.ReplicationPad2d(1) optimizer = torch.optim.Adam(model.parameters(), lr=4e-4, betas=(0.9, 0.99)) # optimizer = torch.optim.Adam([{"params":model.LapPyrNet.parameters(),"lr":1e-4},
x1 = self.downConv1(x) x2 = self.downConv2(x) x = x1 + x2 x = self.downConv(x) return x if __name__ == "__main__": if torch.cuda.is_available() == False: raise RuntimeError("Cuda is not available.") tf = transforms.ToTensor() batch_sz = 50 # batch大小 in_size = 128 # 生成网络的输入向量大小 train_iter = 12 # 生成器训练迭代次数 origin = datasets.MNIST("..\\data\\", True, transform=tf) origin_set = DataLoader(origin, batch_sz, shuffle=True) disc = Disc() generator = Gen(in_size, out_sz=28) disc.cuda() generator.cuda() dopt = optim.Adam(disc.parameters(), lr=2e-5) gopt = optim.Adam(generator.parameters(), lr=2e-2) loss_func = nn.BCELoss() real_labels = Var(torch.ones((batch_sz, 1))).cuda() fake_labels = Var(torch.zeros((batch_sz, 1))).cuda() for k, (bx, _) in enumerate(origin_set): bx = bx.cuda() gen_loss = 0 dis_loss = 0 out = disc(bx) loss = loss_func(out, real_labels)
def main(args): dataset = TCTDataset(args.image_root, "dataset/train.json", get_transforms(True)) dataset_train = TCTDataset(args.image_root, "dataset/train.json", get_transforms(False)) dataset_val = TCTDataset(args.image_root, "dataset/val.json", get_transforms(False)) dataset_test = TCTDataset(args.image_root, "dataset/test.json", get_transforms(False)) dataset = torch.utils.data.Subset(dataset, [i for i in range(50)]) data_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=4, collate_fn=utils.collate_fn) dataset_train = torch.utils.data.Subset(dataset_train, [i for i in range(50)]) data_loader_train = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=4, collate_fn=utils.collate_fn) dataset_val = torch.utils.data.Subset(dataset_val, [i for i in range(50)]) data_loader_val = DataLoader(dataset_val, batch_size=args.batch_size, shuffle=False, num_workers=4, collate_fn=utils.collate_fn) dataset_test = torch.utils.data.Subset(dataset_test, [i for i in range(50)]) data_loader_test = DataLoader(dataset_test, batch_size=args.batch_size, shuffle=False, num_workers=4, collate_fn=utils.collate_fn) coco_api = coco_utils.get_coco_api_from_dataset(dataset) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus model = get_model_instance(num_classes=len(coco_api.cats) + 1) device = torch.device( "cuda:0") if torch.cuda.is_available() else torch.device("cpu") print("device: {}".format(device.type)) optimzier = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimzier, milestones=args.lr_steps, gamma=args.lr_gamma) epoch = 0 log = Log() log_train = Log(log_dir="log/train") log_eval = Log(log_dir="log/eval") model_path_manager = ModelPathManager(max_file_path_size=0) latest_model_path = model_path_manager.latest_model_path() if latest_model_path: checkpoint = torch.load(latest_model_path) model.load_state_dict(checkpoint["model"], strict=False) optimzier.load_state_dict(checkpoint["optimizer"]) lr_scheduler.load_state_dict(checkpoint["lr_scheduler"]) epoch = checkpoint["epoch"] + 1 elif args.pretrain_model: print("loading model from", args.pretrain_model) checkpoint = torch.load(args.pretrain_model) if "model" in checkpoint: model.load_state_dict(checkpoint["model"], strict=False) else: model.load_state_dict(checkpoint, strict=False) model.to(device) print("Start training") train_head = "Epoch : [{:0" + str(len(str(args.epochs))) + "d}]" start_time = time.time() for epoch in range(epoch, args.epochs): train_one_epoch(model, optimzier, data_loader, device, epoch, log, head=train_head.format(epoch)) lr_scheduler.step() save_path = model_path_manager.new_model_path( "train_epoch{:02d}.pth".format(epoch)) torch.save( { "model": model.state_dict(), "optimizer": optimzier.state_dict(), "lr_scheduler": lr_scheduler.state_dict(), "epoch": epoch }, save_path) model_path_manager.record_path(save_path) evaluate(model, data_loader_train, device, epoch, log_train, head="Train:") evaluate(model, data_loader_val, device, epoch, log_eval, head="Evaluate:") # engine.evaluate(model, data_loader_val, device) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print("Training time {}".format(total_time_str)) evaluate(model, data_loader_test, device, None, None, head="Test:")
from torch.autograd import Variable import torch.nn.functional as F from torch.autograd import Variable from model_unet import UNet from torch.utils.data.dataloader import DataLoader from dataset import Dataset os.environ["CUDA_VISIBLE_DEVICES"] = "0" dir_inp = '/home/soroush/codes/test/camvid-master/701_StillsRaw_full/' dir_lbl = '/home/soroush/codes/test/camvid-master/LabeledApproved_full/' image_dataset = Dataset(dir_inp, dir_lbl) saved_model_path = '/home/soroush/codes/test/unet_adam.pth' data_loader = DataLoader(image_dataset, batch_size=80, shuffle=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_unet = UNet().to(device) criterion = nn.CrossEntropyLoss().to(device) learning_rate = 0.0001 optimizer = optim.Adam(model_unet.parameters(), lr=learning_rate) num_epochs = 100 # model_unet.load_state_dict(torch.load(PATH)) for epoch in range(num_epochs): print(epoch) train_epoch_loss = []
def main(): arg_parser = argparse.ArgumentParser() arg_parser.add_argument("--data-path", default=None) arg_parser.add_argument("--dataset") arg_parser.add_argument("--encoder-size", default=5, type=int) arg_parser.add_argument("--z-size", default=5, type=int) arg_parser.add_argument("--nb-epochs", default=10, type=int) arg_parser.add_argument("--columns", default="age,hours-per-week", type=parse_list(str)) arg_parser.add_argument("--percentages", default="0.1,0.2,0.3,0.4", type=parse_list(float)) args = arg_parser.parse_args() accuracies, f1_scores = [], [] vae_accuracies, vae_f1_scores = [], [] columns = { c: i for i, c in enumerate( ADULT_DATASET_COLUMNS[:len(ADULT_DATASET_COLUMNS) - 1]) } for percent in args.percentages: print("Training on data with {} corruption".format(percent)) train_dataset, valid_dataset = load_dataset(args.dataset)( columns=[columns[c] for c in args.columns], percent=percent) train_params = m(learning_rate=0.00001, minibatch_size=64, nb_epochs=args.nb_epochs) vae = VAE(train_dataset.nb_features, args.encoder_size, args.z_size) network_optimizer = optim.Adam(vae.parameters(), lr=train_params.learning_rate) train_data_loader = DataLoader(train_dataset, batch_size=train_params.minibatch_size) valid_data_loader = DataLoader(valid_dataset, batch_size=train_params.minibatch_size) print("Training VAE...") print("=" * 100) vae.fit(train_params, network_optimizer, train_data_loader, valid_data_loader) print("\nTraining classifier on regular data...") classifier = MLP(train_dataset.nb_features, train_dataset.nb_classes, hidden_layer_sizes=None) classifier_optimizer = optim.Adam(classifier.parameters(), lr=train_params.learning_rate) train_params = m(learning_rate=0.0001, minibatch_size=64, nb_epochs=args.nb_epochs) classifier.fit(train_params, classifier_optimizer, train_data_loader) preds_classifier = classifier.predict(valid_data_loader) accuracy = accuracy_score(valid_dataset.y, preds_classifier.argmax(axis=1)) accuracies.append(accuracy) print("Accuracy: {}%".format(accuracy * 100)) print("=" * 100) print("Training classifer on VAE transformed data...") train_vae_data_loader = impute(train_data_loader, vae) classifier = MLP(train_vae_data_loader.dataset.nb_features, train_vae_data_loader.dataset.nb_classes, hidden_layer_sizes=None) classifier_optimizer = optim.Adam(classifier.parameters(), lr=train_params.learning_rate) train_params = m(learning_rate=0.0001, minibatch_size=64, nb_epochs=args.nb_epochs) classifier.fit(train_params, classifier_optimizer, train_vae_data_loader) valid_vae_data_loader = impute(valid_data_loader, vae) # valid_vae_dataset = deepcopy(valid_dataset) # valid_vae_dataset.x = vae.predict(valid_data_loader) # valid_vae_data_loader = DataLoader(valid_vae_dataset) preds_classifier = classifier.predict(valid_vae_data_loader) accuracy = accuracy_score(valid_vae_data_loader.dataset.y, preds_classifier.argmax(axis=1)) vae_accuracies.append(accuracy) print("Accuracy: {}%".format(accuracy * 100)) print("=" * 100) print("*" * 100) plt.plot(args.percentages, accuracies, label="corrupt") plt.plot(args.percentages, vae_accuracies, label="vae") plt.legend() plt.show()
def as_pytorch_dataloader(self, split=TRAIN, **kwargs): dataset = self.as_pytorch_dataset(split=TRAIN) return DataLoader(dataset, **kwargs)
if self.transform is not None: img, target = self.transform(img, target) return img, target def __len__(self): return len(self.imgs) if __name__ == "__main__": # this little example script can be used to visualize the first image # loaded from the dataset. from torch.utils.data.dataloader import DataLoader import matplotlib.pyplot as plt from torchvision import transforms import torch train_data = PennFudanDataset( transform=lambda im, ann: (transforms.ToTensor()(im), ann)) dataloader = DataLoader(train_data, batch_size=1) for batch_data in dataloader: x, y = batch_data plt.imshow(transforms.ToPILImage()(torch.squeeze(x))) plt.show() print(x.shape) print(y) break __all__ = ["PennFudanDataset"]
def test_dataloader(self) -> DataLoader: return DataLoader(self.test_dataset)
hidden_size = 400 out_size = 10 epochs = 10 batch_size = 100 lr = 0.1 train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True) test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor()) train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) print(train_loader) net = Net(input_size, hidden_size, out_size) criterion = nn.CrossEntropyLoss() optmizer = torch.optim.Adam(net.parameters(), lr=lr) correct_train = 0 total_train = 0 for i, (images, lables) in enumerate(train_loader):
def test_dataloader(self, *args, **kwargs) -> DataLoader: return DataLoader(self.dataset, batch_size=self.batch_size, shuffle=False, num_workers=4, pin_memory=True)
def GetTestLoader(self): return DataLoader(VOCDataset(), batch_size=1000, shuffle=True, num_workers=2)
root_img, train_failists_paths, root + filelists_bp + "test_filelist.txt", [0 for _ in range(nbatch[scenario])], complete_test_set_only=True, train_transform=train_transform, eval_transform=eval_transform) return scenario_obj __all__ = ['CORe50'] if __name__ == "__main__": # this below can be taken as a usage example or a simple test script import sys from torch.utils.data.dataloader import DataLoader scenario = CORe50(scenario="nicv2_79") for i, batch in enumerate(scenario.train_stream): print(i, batch) dataset, t = batch.dataset, batch.task_label dl = DataLoader(dataset, batch_size=300) for mb in dl: x, y = mb print(x.shape) print(y.shape) sys.exit(0)
def make_dataloader(dataset, batch_size=16, shuffle=True, key="input_ids"): length_func = lambda x: len(x[key]) if key else None sampler = OrderedBatchSampler(dataset, batch_size=batch_size, length_func=length_func, shuffle=shuffle) return DataLoader(dataset, collate_fn=collate_fn, batch_sampler=sampler)
def test_dataloader(self): dataset = DatasetFromFolder(data_dir=self.dataroot / 'test', scale_factor=4, mode='eval', preupsample=True) return DataLoader(dataset, batch_size=1)
def main(): global tokenizer parser = argparse.ArgumentParser() parser.add_argument("--model_type", default=None, type=str, required=True, help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys())) parser.add_argument("--model_name_or_path", default=None, type=str, required=True, help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS)) parser.add_argument("--pos_model_name_or_path", default=None, type=str, required=True, help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS)) parser.add_argument("--prompt", type=str, default="") parser.add_argument("--padding_text", type=str, default="") parser.add_argument("--temperature", type=float, default=1.0) parser.add_argument("--top_k", type=int, default=0) parser.add_argument("--top_p", type=float, default=0.9) parser.add_argument( "--overwrite_cache", action="store_true", help="Overwrite the cached training and evaluation sets" ) parser.add_argument("--no_cuda", action='store_true', help="Avoid using CUDA when available") parser.add_argument('--seed', type=int, default=42, help="random seed for initialization") parser.add_argument('--test_batch_size', type=int, default=1) parser.add_argument('--test_file_path', type=str, default=None, help="path of parsed plots to generate completion") parser.add_argument( "--cache_dir", default=None, type=str, help="Optional directory to store the pre-trained models downloaded from s3 (instead of the default one)", ) parser.add_argument('--results_path', type=str, default=RESULTS_OUT_PATH, help="path for generated results") parser.add_argument("--ngenres", type=int, default=0, help="Number of genres for embedding.") parser.add_argument("--nfacts", type=int, default=0, help="Number of genres for embedding.") args = parser.parse_args() args.device = torch.device("cuda:2" if torch.cuda.is_available() and not args.no_cuda else "cpu") print("device is " + str(args.device)) args.n_gpu = torch.cuda.device_count() set_seed(args) args.model_type = args.model_type.lower() pos_model = get_pos_model(args) def count_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad) print("num parameres pos: "+ str(count_parameters(pos_model))) model_class, tokenizer_class = MODEL_CLASSES[args.model_type] tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path) model = model_class.from_pretrained(args.model_name_or_path) model.to(args.device) model.eval() print("num parameres xlnet: "+ str(count_parameters(model))) args.length = model.config.max_position_embeddings # No generation bigger than model size if args.length < 0: args.length = MAX_LENGTH # avoid infinite loop print(args) def collate(examples: List[torch.Tensor]): if tokenizer._pad_token is None: return pad_sequence(examples, batch_first=True) seqs, facts, masks, genres = zip(*examples) pad_facts =pad_sequence(facts, batch_first=True, padding_value=tokenizer.pad_token_id) pad_mapping = pad_sequence(masks, batch_first=True, padding_value=tokenizer.pad_token_id) pad_originals = pad_sequence(seqs, batch_first=True, padding_value=tokenizer.pad_token_id) torch.stack(genres) return list(zip(pad_facts, pad_mapping, genres, pad_originals)) test_dataset=PlotFactsOnlyDataset(tokenizer, args, args.test_file_path, block_size=512) test_sampler = SequentialSampler(test_dataset) test_dataloader = DataLoader( test_dataset, sampler=test_sampler, batch_size=args.test_batch_size, collate_fn=collate ) batch_counter=0 spltarr =args.model_name_or_path.split("/") model_name = spltarr[-1] if spltarr[-1] != "" else spltarr[-2] out_path = os.path.join(args.results_path, model_name) for batch in test_dataloader: batch_counter+=1 with torch.no_grad(): pad_facts, pad_mapping, genres, pad_originals = zip(*batch) tpad_mapping = torch.stack(pad_mapping).to(args.device) tpad_facts = torch.stack(pad_facts).to(args.device) tgenres = torch.stack(genres).to(args.device) padding_masks = torch.where(tpad_mapping == tokenizer.pad_token_id, torch.ones_like(tpad_mapping), torch.zeros_like(tpad_mapping)).to(args.device) pos_outputs = pos_model(tpad_facts, fact_embeds=tpad_mapping, genre_idxs=tgenres, input_mask=padding_masks,labels=None) seqs, masks = get_full_seqs_and_masks(pos_outputs[0][0].squeeze(-1).long(), tpad_facts[0], tokenizer) inputs_raw = seqs.unsqueeze(0) masks_raw = masks.unsqueeze(0) genres = tgenres prefix_tensor = tokenizer.encode(PADDING_TEXT,add_special_tokens=False, return_tensors="pt").to(args.device).long() prefix_mask = torch.ones(prefix_tensor.size()).to(args.device) prefix_tensor =prefix_tensor.expand((inputs_raw.size(0),prefix_tensor.size(-1))) prefix_mask =prefix_mask.expand((masks_raw.size(0),prefix_mask.size(-1))) inputs = torch.cat([prefix_tensor, inputs_raw], dim=1) masks = torch.cat([prefix_mask, masks_raw], dim=1) padding_masks = torch.where(masks == tokenizer.pad_token_id, torch.ones_like(masks), torch.zeros_like(masks)) perm_masks = get_perm_masks(masks, order="L2R") target_map = get_target_mapping(masks, args.device) out = sample_sequence( model=model, context=inputs, perm_masks = perm_masks, padding_masks=padding_masks, target_mappings=target_map, temperature=args.temperature, top_k=args.top_k, top_p=args.top_p, device=str(args.device), genre_idxs=genres, tokenizer=tokenizer ) from PlotFactsOnlyDataset import GENRES_LIST genre_text = "unused"#GENRES_LIST[genres[0]] print(genre_text +":\n") text = tokenizer.decode(out[0].tolist(), clean_up_tokenization_spaces=True)[len(PADDING_TEXT):] print(text) original_text = tokenizer.decode(pad_originals[0])[len(PADDING_TEXT):] masked_text = get_text_with_blanks(inputs, target_map)[len(PADDING_TEXT):] os.makedirs(out_path, exist_ok=True) with open(out_path + "/result" + str(batch_counter), "w") as f: f.writelines(f"\ngenre: {genre_text}\n") f.writelines("\ntext:\n\n") f.writelines(text) f.writelines("\n--------masked-------\n") f.writelines(masked_text) f.writelines("\n--------original-------\n") f.writelines(original_text)
def run_epoch(split): is_train = split == 'train' model.train(is_train) data = self.train_dataset if is_train else self.test_dataset loader = DataLoader(data, shuffle=True, pin_memory=True, batch_size=config.batch_size, num_workers=config.num_workers) losses = [] if self.config.tqdm: pbar = tqdm( enumerate(loader), total=len(loader)) if is_train else enumerate(loader) else: pbar = enumerate(loader) for it, (x, y) in pbar: # place data on the correct device x = x.to(self.device) y = y.to(self.device) # forward the model with torch.set_grad_enabled(is_train): logits, loss = model(x, y) loss = loss.mean( ) # collapse all losses if they are scattered on multiple gpus losses.append(loss.item()) if is_train: # backprop and update the parameters model.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), config.grad_norm_clip) optimizer.step() # decay the learning rate based on our progress if config.lr_decay: self.tokens += (y >= 0).sum( ) # number of tokens processed this step (i.e. label is not -100) if self.tokens < config.warmup_tokens: # linear warmup lr_mult = float(self.tokens) / float( max(1, config.warmup_tokens)) else: # cosine learning rate decay progress = float(self.tokens - config.warmup_tokens) / float( max( 1, config.final_tokens - config.warmup_tokens)) lr_mult = max( 0.1, 0.5 * (1.0 + math.cos(math.pi * progress))) lr = config.learning_rate * lr_mult for param_group in optimizer.param_groups: param_group['lr'] = lr else: lr = config.learning_rate # report progress if self.config.tqdm: pbar.set_description( f"epoch {epoch+1} iter {it}: train loss {loss.item():.5f}. lr {lr:e}" ) if not is_train: test_loss = float(np.mean(losses)) logger.info("test loss: %f", test_loss) return test_loss
import torch from torch.utils.data.dataloader import DataLoader from torch.utils.data.sampler import BatchSampler from mlbaselines.sampler import RandomSampler from torch.utils.data.dataset import TensorDataset data = torch.ones((100, 1)) for i in range(100): data[i] = data[i] * i dataset = TensorDataset(data) sampler = RandomSampler(dataset, seed=1) batch_sampler = BatchSampler(sampler, batch_size=2, drop_last=True) loader = DataLoader(dataset, batch_sampler=batch_sampler, num_workers=2, batch_size=1) for b in loader: print(b[0])
def main(): model = initialize_model(args.model_name, args.num_classes) # Initialize model print(args.device) model.to(args.device) # Send to device # Set the optimizer optimizer = torch.optim.Adam(params=model.parameters(), lr=args.lr_start) #optimizer = torch.optim.SGD(params=model.parameters(), lr=args.lr_start, momentum=0.9, weight_decay=0.01) # Set the required transforms img_transforms = { 'train': tf.Compose([ tf.Resize(size=(args.size, args.size), interpolation=Image.BILINEAR), tf.ToTensor(), tf.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), 'val': tf.Compose([ tf.Resize(size=(args.size, args.size), interpolation=Image.BILINEAR), tf.ToTensor(), tf.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), } msk_transforms = { 'train': tf.Compose([ tf.Resize(size=(args.size, args.size), interpolation=Image.NEAREST) ]), 'val': tf.Compose([ tf.Resize(size=(args.size, args.size), interpolation=Image.NEAREST) ]), } # Load the data using dataset creator train_dataset = T_dataset(args.image_dir, args.mask_dir, 'train', img_transforms=img_transforms['train'], msk_transforms=msk_transforms['train']) val_dataset = T_dataset(args.image_dir, args.mask_dir, 'val', img_transforms=img_transforms['val'], msk_transforms=msk_transforms['val']) # Create data loaders train_loader = DataLoader( dataset=train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True ) val_loader = DataLoader( dataset=val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True, drop_last=False ) dataloaders = {"train": train_loader, "val": val_loader} # Create dataloader dictionary for ease of use # Setup the loss fxn to be used criterion = nn.CrossEntropyLoss() # Train the model train_model(model, dataloaders, criterion, optimizer, args.epochs)
def fit_diffeomorphism_model(self, X, t, X_d, learning_rate=1e-2, learning_decay=0.95, n_epochs=50, train_frac=0.8, l2=1e1, batch_size=64, initialize=True, verbose=True, X_val=None, t_val=None, Xd_val=None): """fit_diffeomorphism_model Arguments: X {numpy array [Ntraj,Nt,Ns]} -- state t {numpy array [Ntraj,Nt]} -- time vector X_d {numpy array [Ntraj,Nt,Ns]} -- desired state Keyword Arguments: learning_rate {[type]} -- (default: {1e-2}) learning_decay {float} -- (default: {0.95}) n_epochs {int} -- (default: {50}) train_frac {float} -- ratio of training and testing (default: {0.8}) l2 {[type]} -- L2 penalty term (default: {1e1}) jacobian_penalty {[type]} -- (default: {1.}) batch_size {int} -- (default: {64}) initialize {bool} -- flag to warm start (default: {True}) verbose {bool} -- (default: {True}) X_val {numpy array [Ntraj,Nt,Ns]} -- state in validation set (default: {None}) t_val {numpy array [Ntraj,Nt]} -- time in validation set (default: {None}) Xd_val {numpy array [Ntraj,Nt,Ns]} -- desired state in validation set (default: {None}) Returns: float -- val_losses[-1] """ device = 'cuda' if cuda.is_available() else 'cpu' X, X_dot, X_d, X_d_dot, t = self.process(X=X, t=t, X_d=X_d) # Prepare data for pytorch: manual_seed(42) # Fix seed for reproducibility if self.traj_input: X_tensor = from_numpy( npconcatenate( (X, X_d, X_dot, X_d_dot, np.zeros_like(X)), axis=1)) #[x (1,n), x_d (1,n), x_dot (1,n), zeros (1,n)] else: X_tensor = from_numpy( npconcatenate( (X, X_dot, np.zeros_like(X)), axis=1)) # [x (1,n), x_d (1,n), x_dot (1,n), zeros (1,n)] y_target = X_dot - (dot(self.A_cl, X.T) + dot(self.BK, X_d.T)).T y_tensor = from_numpy(y_target) X_tensor.requires_grad_(True) # Builds dataset with all data dataset = TensorDataset(X_tensor, y_tensor) if X_val is None or t_val is None or Xd_val is None: # Splits randomly into train and validation datasets n_train = int(train_frac * X.shape[0]) n_val = X.shape[0] - n_train train_dataset, val_dataset = random_split(dataset, [n_train, n_val]) # Builds a loader for each dataset to perform mini-batch gradient descent train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size) else: #Uses X,... as training data and X_val,... as validation data X_val, X_dot_val, Xd_val, Xd_dot_val, t_val = self.process( X=X_val, t=t_val, X_d=Xd_val) if self.traj_input: X_val_tensor = from_numpy( npconcatenate((X_val, Xd_val, X_dot_val, Xd_dot_val, np.zeros_like(X_val)), axis=1) ) #[x (1,n), x_d (1,n), x_dot (1,n), zeros (1,n)] else: X_val_tensor = from_numpy( npconcatenate( (X_val, X_dot_val, np.zeros_like(X_val)), axis=1)) # [x (1,n), x_dot (1,n), zeros (1,n)] y_target_val = X_dot_val - dot(self.A_cl, X_val.T + dot(self.BK, Xd_val.T)).T y_val_tensor = from_numpy(y_target_val) X_val_tensor.requires_grad_(True) val_dataset = TensorDataset(X_val_tensor, y_val_tensor) # Builds a loader for each dataset to perform mini-batch gradient descent train_loader = DataLoader(dataset=dataset, batch_size=int(batch_size), shuffle=True) val_loader = DataLoader(dataset=val_dataset, batch_size=int(batch_size)) # Set up optimizer and learning rate scheduler: optimizer = optim.Adam(self.diffeomorphism_model.parameters(), lr=learning_rate, weight_decay=l2) lambda1 = lambda epoch: learning_decay**epoch scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1) def make_train_step(model, loss_fn, optimizer): def train_step(x, y): model.train() # Set model to training mode y_pred = model(x) loss = loss_fn(y, y_pred, model.training) loss.backward() optimizer.step() return loss.item() return train_step batch_loss = [] losses = [] batch_val_loss = [] val_losses = [] train_step = make_train_step( self.diffeomorphism_model, self.diffeomorphism_model.diffeomorphism_loss, optimizer) # Initialize model weights: def init_normal(m): if type(m) == nn.Linear: nn.init.xavier_normal_(m.weight) if initialize: self.diffeomorphism_model.apply(init_normal) # Training loop for i in range(n_epochs): # Uses loader to fetch one mini-batch for training #print('Training epoch ', i) for x_batch, y_batch in train_loader: # Send mini batch data to same location as model: x_batch = x_batch.to(device) y_batch = y_batch.to(device) #print('Training: ', x_batch.shape, y_batch.shape) # Train based on current batch: batch_loss.append(train_step(x_batch, y_batch)) optimizer.zero_grad() losses.append(sum(batch_loss) / len(batch_loss)) batch_loss = [] #print('Validating epoch ', i) with no_grad(): for x_val, y_val in val_loader: # Sends data to same device as model x_val = x_val.to(device) y_val = y_val.to(device) #print('Validation: ', x_val.shape, y_val.shape) self.diffeomorphism_model.eval( ) # Change model model to evaluation #xt_val = x_val[:, :2*self.n] # [x, x_d] #xdot_val = x_val[:, 2*self.n:] # [xdot] y_pred = self.diffeomorphism_model(x_val) # Predict #jacobian_xdot_val, zero_jacobian_val = calc_gradients(xt_val, xdot_val, yhat, None, None, self.diffeomorphism_model.training) batch_val_loss.append( float( self.diffeomorphism_model.diffeomorphism_loss( y_val, y_pred, self.diffeomorphism_model. training))) # Compute validation loss val_losses.append(sum(batch_val_loss) / len(batch_val_loss)) # Save validation loss batch_val_loss = [] scheduler.step(i) if verbose: print(' - Epoch: ', i, ' Training loss:', format(losses[-1], '08f'), ' Validation loss:', format(val_losses[-1], '08f')) print( 'Improvement metric (for early stopping): ', sum( abs( array(val_losses[-min(3, len(val_losses)):]) - val_losses[-1])) / (3 * val_losses[-min(3, len(val_losses))])) if i > n_epochs / 4 and sum( abs( array(val_losses[-min(3, len(val_losses)):]) - val_losses[-1])) / ( 3 * val_losses[-min(3, len(val_losses))]) < 0.01: #print('Early stopping activated') break return val_losses[-1]