Example #1
0
def generator_from_data(dataset,
                        generator_type='regress',
                        features_list=None,
                        n_epochs=100,
                        n_layers=3,
                        n_hiddens=200,
                        p_dropout=0,
                        num_bins=100,
                        training_args=None):
    """NOTE: Training epochs `n_epochs` should scale with the number of features.
    """
    if generator_type == 'oracle':
        n_features = dataset[0][0].shape[-1]
        generator = GeneratorOracle(n_features,
                                    gaussian=dataset.gaussian,
                                    rho=dataset.rho,
                                    normalize=dataset.normalize)

        return generator, None

    else:  # Generator needs to be trained

        # All default training arguments are hidden here
        default_args = DDICT(
            optimizer='Adam',
            batch_size=128,
            lr=0.003,
            lr_step_size=20,
            lr_decay=0.5,
            num_bins=10,
        )

        # Custom training arguments
        args = default_args
        if training_args is not None:
            for k in training_args:
                args[k] = training_args[k]

        # Data
        n_features = dataset[0][0].shape[-1]
        dataloader = DataLoader(dataset,
                                batch_size=args.batch_size,
                                shuffle=True)

        if generator_type == 'classify':
            generator = GeneratorClassify(n_features,
                                          n_layers,
                                          n_hiddens,
                                          num_bins=num_bins,
                                          init_dataset=dataset)
        elif generator_type == 'regress':
            generator = GeneratorRegress(n_features, n_layers, n_hiddens)
        else:
            raise ValueError('generator_type has to be classify or regress')

        optimizer = getattr(optim, args.optimizer)(generator.parameters(),
                                                   lr=args.lr)
        scheduler = optim.lr_scheduler.StepLR(optimizer,
                                              step_size=args.lr_step_size,
                                              gamma=args.lr_decay)

        tr_loss = []
        for epoch in range(n_epochs):
            tr_loss += [
                train_generator(generator,
                                dataloader,
                                optimizer,
                                features_list,
                                log_times=0)
            ]
            scheduler.step()

        return generator, tr_loss
Example #2
0
trn_imgs = imgs[trn_ndx]
trn_lbls = lbls[trn_ndx]
vld_imgs = imgs[vld_ndx]
vld_lbls = lbls[vld_ndx]


# In[5]:


training_set = Bengaliai_DS(trn_imgs, trn_lbls, transform=augs)
validation_set = Bengaliai_DS(vld_imgs, vld_lbls)

batch_size = 96

training_loader = DataLoader(training_set, batch_size=batch_size, num_workers=6, shuffle=True)
validation_loader = DataLoader(validation_set, batch_size=batch_size, num_workers=6, shuffle=False)


# ---
# ### model

# In[6]:


N_EPOCHS = 120

reduction = 'mean'

checkpoint_name = 'notebook_purepytorch_from_embedding_SomeAugs_Mu1_{:d}.pth'
Example #3
0
def main():
    word_embd_dim = 100  # if using pre-trained choose word_embd_dim from [50, 100, 200, 300]
    pos_embd_dim = 15
    hidden_dim = 125
    MLP_inner_dim = 100
    epochs = 30
    learning_rate = 0.01
    dropout_layers_probability = 0.0
    weight_decay = 0.0  # TODO have to be 0.0 if using training on some vector (aka some trained and some no)
    alpha = 0.25  # 0.0 means no word dropout
    # TODO if using pre-trained require min_freq=1
    min_freq = 1  # minimum term-frequency to include in vocabulary, use 1 if you wish to use all words
    BiLSTM_layers = 3
    use_pre_trained = True
    vectors = f'glove.6B.{word_embd_dim}d' if use_pre_trained else ''
    path_train = "train.labeled"
    path_test = "test.labeled"

    run_description = f"KiperwasserDependencyParser\n" \
                      f"-------------------------------------------------------------------------------------------\n" \
                      f"word_embd_dim = {word_embd_dim}\n" \
                      f"pos_embd_dim = {pos_embd_dim}\n" \
                      f"hidden_dim = {hidden_dim}\n" \
                      f"MLP_inner_dim = {MLP_inner_dim}\n" \
                      f"epochs = {epochs}\n" \
                      f"learning_rate = {learning_rate}\n" \
                      f"dropout_layers_probability = {dropout_layers_probability}\n" \
                      f"weight_decay = {weight_decay}\n" \
                      f"alpha = {alpha}\n" \
                      f"min_freq = {min_freq}\n" \
                      f"BiLSTM_layers = {BiLSTM_layers}\n" \
                      f"use_pre_trained = {use_pre_trained}\n" \
                      f"vectors = {vectors}\n" \
                      f"path_train = {path_train}\n" \
                      f"path_test = {path_test}\n"

    current_machine_date_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                              time.localtime(int(time.time())))
    print(f"{current_machine_date_time}\n" f"{run_description}")

    path_to_save_model = os.path.join('saved_models',
                                      f'model {current_machine_date_time}.pt')
    """TRAIN DATA"""
    # TODO add path test if using pre=trained with freeze?
    # TODO change to test when needed!!!!
    train_word_dict, train_pos_dict = get_vocabs_counts(
        [path_train, path_test, 'comp.unlabeled'])
    train = DependencyDataset(path=path_train,
                              word_dict=train_word_dict,
                              pos_dict=train_pos_dict,
                              word_embd_dim=word_embd_dim,
                              pos_embd_dim=pos_embd_dim,
                              test=False,
                              use_pre_trained=use_pre_trained,
                              pre_trained_vectors_name=vectors,
                              min_freq=min_freq)
    train_dataloader = DataLoader(train, shuffle=True)
    model = KiperwasserDependencyParser(train, hidden_dim, MLP_inner_dim,
                                        BiLSTM_layers,
                                        dropout_layers_probability)
    """TEST DATA"""

    test = DependencyDataset(path=path_test,
                             word_dict=train_word_dict,
                             pos_dict=train_pos_dict,
                             test=[
                                 train.word_idx_mappings,
                                 train.pos_idx_mappings, train.word_vectors
                             ])
    test_dataloader = DataLoader(test, shuffle=False)
    """TRAIN THE PARSER ON TRAIN DATA"""
    train_accuracy_list, train_loss_list, test_accuracy_list, test_loss_list = \
        train_kiperwasser_parser(model, train_dataloader, test_dataloader, epochs, learning_rate, weight_decay, alpha)

    print(f'\ntrain_accuracy_list = {train_accuracy_list}'
          f'\ntrain_loss_list = {train_loss_list}'
          f'\ntest_accuracy_list = {test_accuracy_list}'
          f'\ntest_loss_list = {test_loss_list}')
    """SAVE MODEL"""
    torch.save(model.state_dict(), path_to_save_model.replace(':', '-'))
    """PLOT GRAPHS"""
Example #4
0
import os
import argparse

from itertools import product
import pandas as pd
from torchvision import models

import pandas as pd

from minicifar import minicifar_train, minicifar_test, train_sampler, valid_sampler
from torch.utils.data.dataloader import DataLoader

import numpy as np
import random

trainloader = DataLoader(minicifar_train, batch_size=32, sampler=train_sampler)
validloader = DataLoader(minicifar_train, batch_size=32, sampler=valid_sampler)
full_trainloader = DataLoader(minicifar_train, batch_size=32)
testloader = DataLoader(minicifar_test, batch_size=32)

cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13':
    [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [
        64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M',
        512, 512, 512, 'M'
    ],
    'VGG19': [
        64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512,
        512, 'M', 512, 512, 512, 512, 'M'
Example #5
0
 def tng_dataloader(self):
     dataset = DatasetFromFolder(data_dir=self.dataroot / 'train',
                                 scale_factor=4,
                                 patch_size=96,
                                 preupsample=True)
     return DataLoader(dataset, batch_size=16)
Example #6
0
 def val_dataloader(self) -> DataLoader:
     return DataLoader(self.val_fold)
Example #7
0
 def train_dataloader(self) -> DataLoader:
     return DataLoader(self.train_fold)
Example #8
0
from functions import*
import os
'''
bbvi without Rao_Blackwellization and Control Variates
'''
num_epochs=30
batchSize=12223
num_S=5#č®­ē»ƒēš„é‡‡ę ·ę•°é‡
dim=28*28+1
eta=0.00001#ę­„é•æ
num_St=2000#굋čƕēš„é‡‡ę ·ę•°é‡
#čÆ»å–ę•°ę®
transform=transforms.ToTensor()
train_data=DatasetFromCSV('./dataset/train_images_csv.csv','./dataset/train_labels_csv.csv',transforms=transform)
test_data=DatasetFromCSV('./dataset/test_images_csv.csv','./dataset/test_labels_csv.csv',transforms=transform)
train_loader=DataLoader(train_data,batch_size=batchSize,shuffle=True)

#定义分åøƒå‚ę•°
para=torch.zeros(dim*2,requires_grad=True)
#para[dim:]=torch.ones(dim)*(-1)


#éœ€č¦å‚Ø存ē»“ęžœ
elbo_list=[]

#AdaGrad
G=torch.zeros((dim*2,dim*2))

#开始čæ­ä»£
for epoch in range(num_epochs):
    for i ,data in enumerate(train_loader):
Example #9
0
        transforms.Resize(configuration.image_size),
        transforms.CenterCrop(configuration.image_size),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ]))
classes = dataset.classes
for class_name in classes:
    if not os.path.isdir(Path(configuration.predicted_dir, class_name)):
        os.makedirs(Path(configuration.predicted_dir, class_name),
                    exist_ok=False)
# create dataloader
device = torch.device("cuda:0" if (
    torch.cuda.is_available() and configuration.num_gpus > 0) else "cpu")

dataloader = DataLoader(dataset,
                        batch_size=configuration.batch_size,
                        shuffle=True,
                        num_workers=configuration.num_workers)

# X_tr, Y_tr, X_te, Y_te = dataset.get_split()

optimizer = optim.SGD(net.parameters(), **configuration.optimizer_args)
net = net.to(device)

# train
losses = []
for epoch in range(configuration.epochs):
    net.train()
    for batch_idx, (x, y) in enumerate(dataloader):
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        # print('x', x.shape)
def main():
    args = parse_args()

    # Initialize the accelerator. We will let the accelerator handle device placement for us in this example.
    accelerator = Accelerator()
    # Make one log on every process with the configuration for debugging.
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
    )
    logger.info(accelerator.state)

    # Setup logging, we only want one process per machine to log things on the screen.
    # accelerator.is_local_main_process is only True for one process per machine.
    logger.setLevel(logging.INFO if accelerator.is_local_main_process else logging.ERROR)
    if accelerator.is_local_main_process:
        datasets.utils.logging.set_verbosity_warning()
        transformers.utils.logging.set_verbosity_info()
    else:
        datasets.utils.logging.set_verbosity_error()
        transformers.utils.logging.set_verbosity_error()

    # If passed along, set the training seed now.
    if args.seed is not None:
        set_seed(args.seed)

    # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
    # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
    # (the dataset will be downloaded automatically from the datasets Hub).
    #
    # For CSV/JSON files, this script will use the column called 'text' or the first column if no column called
    # 'text' is found. You can easily tweak this behavior (see below).
    #
    # In distributed training, the load_dataset function guarantee that only one local process can concurrently
    # download the dataset.
    if args.dataset_name is not None:
        # Downloading and loading a dataset from the hub.
        raw_datasets = load_dataset(args.dataset_name, args.dataset_config_name)
    else:
        data_files = {}
        if args.train_file is not None:
            data_files["train"] = args.train_file
        if args.validation_file is not None:
            data_files["validation"] = args.validation_file
        if args.test_file is not None:
            data_files["test"] = args.test_file
        extension = args.train_file.split(".")[-1]
        raw_datasets = load_dataset(extension, data_files=data_files, field="data")
    # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
    # https://huggingface.co/docs/datasets/loading_datasets.html.

    # Load pretrained model and tokenizer
    #
    # In distributed training, the .from_pretrained methods guarantee that only one local process can concurrently
    # download model & vocab.

    if args.config_name:
        config = AutoConfig.from_pretrained(args.config_name)
    elif args.model_name_or_path:
        config = AutoConfig.from_pretrained(args.model_name_or_path)
    else:
        config = CONFIG_MAPPING[args.model_type]()
        logger.warning("You are instantiating a new config instance from scratch.")

    if args.tokenizer_name:
        tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name, use_fast=True)
    elif args.model_name_or_path:
        tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, use_fast=True)
    else:
        raise ValueError(
            "You are instantiating a new tokenizer from scratch. This is not supported by this script."
            "You can do it from another script, save it, and load it from here, using --tokenizer_name."
        )

    if args.model_name_or_path:
        model = AutoModelForQuestionAnswering.from_pretrained(
            args.model_name_or_path,
            from_tf=bool(".ckpt" in args.model_name_or_path),
            config=config,
        )
    else:
        logger.info("Training new model from scratch")
        model = AutoModelForQuestionAnswering.from_config(config)

    # Preprocessing the datasets.
    # Preprocessing is slighlty different for training and evaluation.

    column_names = raw_datasets["train"].column_names

    question_column_name = "question" if "question" in column_names else column_names[0]
    context_column_name = "context" if "context" in column_names else column_names[1]
    answer_column_name = "answers" if "answers" in column_names else column_names[2]

    # Padding side determines if we do (question|context) or (context|question).
    pad_on_right = tokenizer.padding_side == "right"

    if args.max_seq_length > tokenizer.model_max_length:
        logger.warning(
            f"The max_seq_length passed ({args.max_seq_length}) is larger than the maximum length for the"
            f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
        )

    max_seq_length = min(args.max_seq_length, tokenizer.model_max_length)

    # Training preprocessing
    def prepare_train_features(examples):
        # Tokenize our examples with truncation and maybe padding, but keep the overflows using a stride. This results
        # in one example possible giving several features when a context is long, each of those features having a
        # context that overlaps a bit the context of the previous feature.
        tokenized_examples = tokenizer(
            examples[question_column_name if pad_on_right else context_column_name],
            examples[context_column_name if pad_on_right else question_column_name],
            truncation="only_second" if pad_on_right else "only_first",
            max_length=max_seq_length,
            stride=args.doc_stride,
            return_overflowing_tokens=True,
            return_offsets_mapping=True,
            padding="max_length" if args.pad_to_max_length else False,
        )

        # Since one example might give us several features if it has a long context, we need a map from a feature to
        # its corresponding example. This key gives us just that.
        sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
        # The offset mappings will give us a map from token to character position in the original context. This will
        # help us compute the start_positions and end_positions.
        offset_mapping = tokenized_examples.pop("offset_mapping")

        # Let's label those examples!
        tokenized_examples["start_positions"] = []
        tokenized_examples["end_positions"] = []

        for i, offsets in enumerate(offset_mapping):
            # We will label impossible answers with the index of the CLS token.
            input_ids = tokenized_examples["input_ids"][i]
            cls_index = input_ids.index(tokenizer.cls_token_id)

            # Grab the sequence corresponding to that example (to know what is the context and what is the question).
            sequence_ids = tokenized_examples.sequence_ids(i)

            # One example can give several spans, this is the index of the example containing this span of text.
            sample_index = sample_mapping[i]
            answers = examples[answer_column_name][sample_index]
            # If no answers are given, set the cls_index as answer.
            if len(answers["answer_start"]) == 0:
                tokenized_examples["start_positions"].append(cls_index)
                tokenized_examples["end_positions"].append(cls_index)
            else:
                # Start/end character index of the answer in the text.
                start_char = answers["answer_start"][0]
                end_char = start_char + len(answers["text"][0])

                # Start token index of the current span in the text.
                token_start_index = 0
                while sequence_ids[token_start_index] != (1 if pad_on_right else 0):
                    token_start_index += 1

                # End token index of the current span in the text.
                token_end_index = len(input_ids) - 1
                while sequence_ids[token_end_index] != (1 if pad_on_right else 0):
                    token_end_index -= 1

                # Detect if the answer is out of the span (in which case this feature is labeled with the CLS index).
                if not (offsets[token_start_index][0] <= start_char and offsets[token_end_index][1] >= end_char):
                    tokenized_examples["start_positions"].append(cls_index)
                    tokenized_examples["end_positions"].append(cls_index)
                else:
                    # Otherwise move the token_start_index and token_end_index to the two ends of the answer.
                    # Note: we could go after the last offset if the answer is the last word (edge case).
                    while token_start_index < len(offsets) and offsets[token_start_index][0] <= start_char:
                        token_start_index += 1
                    tokenized_examples["start_positions"].append(token_start_index - 1)
                    while offsets[token_end_index][1] >= end_char:
                        token_end_index -= 1
                    tokenized_examples["end_positions"].append(token_end_index + 1)

        return tokenized_examples

    if "train" not in raw_datasets:
        raise ValueError("--do_train requires a train dataset")
    train_dataset = raw_datasets["train"]
    if args.max_train_samples is not None:
        # We will select sample from whole data if agument is specified
        train_dataset = train_dataset.select(range(args.max_train_samples))
    # Create train feature from dataset
    train_dataset = train_dataset.map(
        prepare_train_features,
        batched=True,
        num_proc=args.preprocessing_num_workers,
        remove_columns=column_names,
        load_from_cache_file=not args.overwrite_cache,
        desc="Running tokenizer on train dataset",
    )
    if args.max_train_samples is not None:
        # Number of samples might increase during Feature Creation, We select only specified max samples
        train_dataset = train_dataset.select(range(args.max_train_samples))

    # Validation preprocessing
    def prepare_validation_features(examples):
        # Tokenize our examples with truncation and maybe padding, but keep the overflows using a stride. This results
        # in one example possible giving several features when a context is long, each of those features having a
        # context that overlaps a bit the context of the previous feature.
        tokenized_examples = tokenizer(
            examples[question_column_name if pad_on_right else context_column_name],
            examples[context_column_name if pad_on_right else question_column_name],
            truncation="only_second" if pad_on_right else "only_first",
            max_length=max_seq_length,
            stride=args.doc_stride,
            return_overflowing_tokens=True,
            return_offsets_mapping=True,
            padding="max_length" if args.pad_to_max_length else False,
        )

        # Since one example might give us several features if it has a long context, we need a map from a feature to
        # its corresponding example. This key gives us just that.
        sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")

        # For evaluation, we will need to convert our predictions to substrings of the context, so we keep the
        # corresponding example_id and we will store the offset mappings.
        tokenized_examples["example_id"] = []

        for i in range(len(tokenized_examples["input_ids"])):
            # Grab the sequence corresponding to that example (to know what is the context and what is the question).
            sequence_ids = tokenized_examples.sequence_ids(i)
            context_index = 1 if pad_on_right else 0

            # One example can give several spans, this is the index of the example containing this span of text.
            sample_index = sample_mapping[i]
            tokenized_examples["example_id"].append(examples["id"][sample_index])

            # Set to None the offset_mapping that are not part of the context so it's easy to determine if a token
            # position is part of the context or not.
            tokenized_examples["offset_mapping"][i] = [
                (o if sequence_ids[k] == context_index else None)
                for k, o in enumerate(tokenized_examples["offset_mapping"][i])
            ]

        return tokenized_examples

    if "validation" not in raw_datasets:
        raise ValueError("--do_eval requires a validation dataset")
    eval_examples = raw_datasets["validation"]
    if args.max_eval_samples is not None:
        # We will select sample from whole data
        eval_examples = eval_examples.select(range(args.max_eval_samples))
    # Validation Feature Creation
    eval_dataset = eval_examples.map(
        prepare_validation_features,
        batched=True,
        num_proc=args.preprocessing_num_workers,
        remove_columns=column_names,
        load_from_cache_file=not args.overwrite_cache,
        desc="Running tokenizer on validation dataset",
    )

    if args.max_eval_samples is not None:
        # During Feature creation dataset samples might increase, we will select required samples again
        eval_dataset = eval_dataset.select(range(args.max_eval_samples))

    if args.do_predict:
        if "test" not in raw_datasets:
            raise ValueError("--do_predict requires a test dataset")
        predict_examples = raw_datasets["test"]
        if args.max_predict_samples is not None:
            # We will select sample from whole data
            predict_examples = predict_examples.select(range(args.max_predict_samples))
        # Predict Feature Creation
        predict_dataset = predict_examples.map(
            prepare_validation_features,
            batched=True,
            num_proc=args.preprocessing_num_workers,
            remove_columns=column_names,
            load_from_cache_file=not args.overwrite_cache,
            desc="Running tokenizer on prediction dataset",
        )
        if args.max_predict_samples is not None:
            # During Feature creation dataset samples might increase, we will select required samples again
            predict_dataset = predict_dataset.select(range(args.max_predict_samples))

    # Log a few random samples from the training set:
    for index in random.sample(range(len(train_dataset)), 3):
        logger.info(f"Sample {index} of the training set: {train_dataset[index]}.")

    # DataLoaders creation:
    if args.pad_to_max_length:
        # If padding was already done ot max length, we use the default data collator that will just convert everything
        # to tensors.
        data_collator = default_data_collator
    else:
        # Otherwise, `DataCollatorWithPadding` will apply dynamic padding for us (by padding to the maximum length of
        # the samples passed). When using mixed precision, we add `pad_to_multiple_of=8` to pad all tensors to multiple
        # of 8s, which will enable the use of Tensor Cores on NVIDIA hardware with compute capability >= 7.5 (Volta).
        data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=(8 if accelerator.use_fp16 else None))

    train_dataloader = DataLoader(
        train_dataset, shuffle=True, collate_fn=data_collator, batch_size=args.per_device_train_batch_size
    )

    eval_dataset_for_model = eval_dataset.remove_columns(["example_id", "offset_mapping"])
    eval_dataloader = DataLoader(
        eval_dataset_for_model, collate_fn=data_collator, batch_size=args.per_device_eval_batch_size
    )

    if args.do_predict:
        predict_dataset_for_model = predict_dataset.remove_columns(["example_id", "offset_mapping"])
        predict_dataloader = DataLoader(
            predict_dataset_for_model, collate_fn=data_collator, batch_size=args.per_device_eval_batch_size
        )

    # Post-processing:
    def post_processing_function(examples, features, predictions, stage="eval"):
        # Post-processing: we match the start logits and end logits to answers in the original context.
        predictions = postprocess_qa_predictions(
            examples=examples,
            features=features,
            predictions=predictions,
            version_2_with_negative=args.version_2_with_negative,
            n_best_size=args.n_best_size,
            max_answer_length=args.max_answer_length,
            null_score_diff_threshold=args.null_score_diff_threshold,
            output_dir=args.output_dir,
            prefix=stage,
        )
        # Format the result to the format the metric expects.
        if args.version_2_with_negative:
            formatted_predictions = [
                {"id": k, "prediction_text": v, "no_answer_probability": 0.0} for k, v in predictions.items()
            ]
        else:
            formatted_predictions = [{"id": k, "prediction_text": v} for k, v in predictions.items()]

        references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
        return EvalPrediction(predictions=formatted_predictions, label_ids=references)

    metric = load_metric("squad_v2" if args.version_2_with_negative else "squad")

    # Create and fill numpy array of size len_of_validation_data * max_length_of_output_tensor
    def create_and_fill_np_array(start_or_end_logits, dataset, max_len):
        """
        Create and fill numpy array of size len_of_validation_data * max_length_of_output_tensor

        Args:
            start_or_end_logits(:obj:`tensor`):
                This is the output predictions of the model. We can only enter either start or end logits.
            eval_dataset: Evaluation dataset
            max_len(:obj:`int`):
                The maximum length of the output tensor. ( See the model.eval() part for more details )
        """

        step = 0
        # create a numpy array and fill it with -100.
        logits_concat = np.full((len(dataset), max_len), -100, dtype=np.float64)
        # Now since we have create an array now we will populate it with the outputs gathered using accelerator.gather
        for i, output_logit in enumerate(start_or_end_logits):  # populate columns
            # We have to fill it such that we have to take the whole tensor and replace it on the newly created array
            # And after every iteration we have to change the step

            batch_size = output_logit.shape[0]
            cols = output_logit.shape[1]

            if step + batch_size < len(dataset):
                logits_concat[step : step + batch_size, :cols] = output_logit
            else:
                logits_concat[step:, :cols] = output_logit[: len(dataset) - step]

            step += batch_size

        return logits_concat

    # Optimizer
    # Split weights in two groups, one with weight decay and the other not.
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
            "weight_decay": args.weight_decay,
        },
        {
            "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
            "weight_decay": 0.0,
        },
    ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate)

    # Prepare everything with our `accelerator`.
    model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(
        model, optimizer, train_dataloader, eval_dataloader
    )

    # Note -> the training dataloader needs to be prepared before we grab his length below (cause its length will be
    # shorter in multiprocess)

    # Scheduler and math around the number of training steps.
    num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
    if args.max_train_steps is None:
        args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
    else:
        args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch)

    lr_scheduler = get_scheduler(
        name=args.lr_scheduler_type,
        optimizer=optimizer,
        num_warmup_steps=args.num_warmup_steps,
        num_training_steps=args.max_train_steps,
    )

    # Train!
    total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps

    logger.info("***** Running training *****")
    logger.info(f"  Num examples = {len(train_dataset)}")
    logger.info(f"  Num Epochs = {args.num_train_epochs}")
    logger.info(f"  Instantaneous batch size per device = {args.per_device_train_batch_size}")
    logger.info(f"  Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}")
    logger.info(f"  Gradient Accumulation steps = {args.gradient_accumulation_steps}")
    logger.info(f"  Total optimization steps = {args.max_train_steps}")

    # Only show the progress bar once on each machine.
    progress_bar = tqdm(range(args.max_train_steps), disable=not accelerator.is_local_main_process)
    completed_steps = 0

    for epoch in range(args.num_train_epochs):
        model.train()
        for step, batch in enumerate(train_dataloader):
            outputs = model(**batch)
            loss = outputs.loss
            loss = loss / args.gradient_accumulation_steps
            accelerator.backward(loss)
            if step % args.gradient_accumulation_steps == 0 or step == len(train_dataloader) - 1:
                optimizer.step()
                lr_scheduler.step()
                optimizer.zero_grad()
                progress_bar.update(1)
                completed_steps += 1

            if completed_steps >= args.max_train_steps:
                break

    # Evaluation
    logger.info("***** Running Evaluation *****")
    logger.info(f"  Num examples = {len(eval_dataset)}")
    logger.info(f"  Batch size = {args.per_device_eval_batch_size}")

    all_start_logits = []
    all_end_logits = []
    for step, batch in enumerate(eval_dataloader):
        with torch.no_grad():
            outputs = model(**batch)
            start_logits = outputs.start_logits
            end_logits = outputs.end_logits

            if not args.pad_to_max_length:  # necessary to pad predictions and labels for being gathered
                start_logits = accelerator.pad_across_processes(start_logits, dim=1, pad_index=-100)
                end_logits = accelerator.pad_across_processes(end_logits, dim=1, pad_index=-100)

            all_start_logits.append(accelerator.gather(start_logits).cpu().numpy())
            all_end_logits.append(accelerator.gather(end_logits).cpu().numpy())

    max_len = max([x.shape[1] for x in all_start_logits])  # Get the max_length of the tensor

    # concatenate the numpy array
    start_logits_concat = create_and_fill_np_array(all_start_logits, eval_dataset, max_len)
    end_logits_concat = create_and_fill_np_array(all_end_logits, eval_dataset, max_len)

    # delete the list of numpy arrays
    del all_start_logits
    del all_end_logits

    outputs_numpy = (start_logits_concat, end_logits_concat)
    prediction = post_processing_function(eval_examples, eval_dataset, outputs_numpy)
    eval_metric = metric.compute(predictions=prediction.predictions, references=prediction.label_ids)
    logger.info(f"Evaluation metrics: {eval_metric}")

    # Prediction
    if args.do_predict:
        logger.info("***** Running Prediction *****")
        logger.info(f"  Num examples = {len(predict_dataset)}")
        logger.info(f"  Batch size = {args.per_device_eval_batch_size}")

        all_start_logits = []
        all_end_logits = []
        for step, batch in enumerate(predict_dataloader):
            with torch.no_grad():
                outputs = model(**batch)
                start_logits = outputs.start_logits
                end_logits = outputs.end_logits

                if not args.pad_to_max_length:  # necessary to pad predictions and labels for being gathered
                    start_logits = accelerator.pad_across_processes(start_logits, dim=1, pad_index=-100)
                    end_logits = accelerator.pad_across_processes(start_logits, dim=1, pad_index=-100)

                all_start_logits.append(accelerator.gather(start_logits).cpu().numpy())
                all_end_logits.append(accelerator.gather(end_logits).cpu().numpy())

        max_len = max([x.shape[1] for x in all_start_logits])  # Get the max_length of the tensor
        # concatenate the numpy array
        start_logits_concat = create_and_fill_np_array(all_start_logits, predict_dataset, max_len)
        end_logits_concat = create_and_fill_np_array(all_end_logits, predict_dataset, max_len)

        # delete the list of numpy arrays
        del all_start_logits
        del all_end_logits

        outputs_numpy = (start_logits_concat, end_logits_concat)
        prediction = post_processing_function(predict_examples, predict_dataset, outputs_numpy)
        predict_metric = metric.compute(predictions=prediction.predictions, references=prediction.label_ids)
        logger.info(f"Predict metrics: {predict_metric}")

    if args.output_dir is not None:
        accelerator.wait_for_everyone()
        unwrapped_model = accelerator.unwrap_model(model)
        unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
    def __init__(self):
        # Use the gpu
        self.device = torch.device('cuda')

        # Create the generators and discriminators
        self.generator_A = CycleGANGenerator(3, 3, 64).to(self.device)
        self.generator_B = CycleGANGenerator(3, 3, 64).to(self.device)
        self.discriminator_A = CycleGANDiscriminator(3, 64).to(self.device)
        self.discriminator_B = CycleGANDiscriminator(3, 64).to(self.device)

        # Print the networks
        print(self.generator_A)
        print(self.generator_B)
        print(self.discriminator_A)
        print(self.discriminator_B)

        # Initialize the weights of all networks
        self.generator_A.apply(self.init_weights)
        self.generator_B.apply(self.init_weights)
        self.discriminator_A.apply(self.init_weights)
        self.discriminator_B.apply(self.init_weights)

        # Create the optimizers for all the networks
        self.generator_A_optimizer = optim.Adam(self.generator_A.parameters(),
                                                lr=0.0002,
                                                betas=(0.5, 0.999))
        self.generator_B_optimizer = optim.Adam(self.generator_B.parameters(),
                                                lr=0.0002,
                                                betas=(0.5, 0.999))
        self.discriminator_A_optimizer = optim.Adam(
            self.discriminator_A.parameters(), lr=0.0002, betas=(0.5, 0.999))
        self.discriminator_B_optimizer = optim.Adam(
            self.discriminator_B.parameters(), lr=0.0002, betas=(0.5, 0.999))

        # Create learning rate schedulers for all the optimizers
        self.generator_A_scheduler = optim.lr_scheduler.LambdaLR(
            self.generator_A_optimizer, lr_lambda=self.schedule_rate)
        self.generator_B_scheduler = optim.lr_scheduler.LambdaLR(
            self.generator_B_optimizer, lr_lambda=self.schedule_rate)
        self.discriminator_A_scheduler = optim.lr_scheduler.LambdaLR(
            self.discriminator_A_optimizer, lr_lambda=self.schedule_rate)
        self.discriminator_B_scheduler = optim.lr_scheduler.LambdaLR(
            self.discriminator_B_optimizer, lr_lambda=self.schedule_rate)

        # Create the buffers to store history of images generated by the generators
        self.generator_A_buffer = ImageBuffer(buffer_size=50)
        self.generator_B_buffer = ImageBuffer(buffer_size=50)

        # Define the loss criterions
        self.cycle_loss = nn.L1Loss()
        self.gan_loss = nn.MSELoss()

        # Get the dataset and dataloaders
        self.dataset = CycleGANDataset(
            base_dir='/home/paurvi/CycleGAN/datasets/summer2winter_yosemite')
        self.dataloader = DataLoader(self.dataset, batch_size=1, num_workers=2)

        # Writers to tensorboard
        self.steps = 0
        self.writer = SummaryWriter(
            comment=
            'cyclegan_cityscapes- 001 changed CycleGANDiscriminator leaakyRelu slope from 0.2 to 0.5'
        )
Example #12
0
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"

writer = SummaryWriter("SRNet/25/2/")
# dataset = SRDataSet("/home/haibao637/xdata/vimeo90k/vimeo_septuplet/",'train','sep_trainlist.txt')
dataset = Vimeo90KDataset(
    "/home/haibao637/xdata/vimeo90k/vimeo90k_train_GT.lmdb",
    "/home/haibao637/xdata/vimeo90k/vimeo90k_train_LR7frames.lmdb")
val_dataset = SRDataSet("/home/haibao637/xdata/Vid4//", 'val')
logdir = "/home/haibao637/xdata/srnet_25.2"
if os.path.exists(logdir) == False:
    os.makedirs(logdir)
print(len(dataset))
dataloader = DataLoader(dataset,
                        batch_size=16,
                        num_workers=16,
                        shuffle=False,
                        drop_last=True)
val_dataloader = DataLoader(val_dataset,
                            batch_size=1,
                            shuffle=True,
                            drop_last=True)
device = torch.device("cuda")
model = SRNet(3).cuda()

# writer.add_graph(model,torch.rand(1,3,3,64,64).cuda())
model = model.cuda()

# output_pad = torch.nn.ReplicationPad2d(1)
optimizer = torch.optim.Adam(model.parameters(), lr=4e-4, betas=(0.9, 0.99))
# optimizer = torch.optim.Adam([{"params":model.LapPyrNet.parameters(),"lr":1e-4},
Example #13
0
        x1 = self.downConv1(x)
        x2 = self.downConv2(x)
        x = x1 + x2
        x = self.downConv(x)
        return x


if __name__ == "__main__":
    if torch.cuda.is_available() == False:
        raise RuntimeError("Cuda is not available.")
    tf = transforms.ToTensor()
    batch_sz = 50  # batch大小
    in_size = 128  # ē”Ÿęˆē½‘ē»œēš„č¾“å…„向量大小
    train_iter = 12  # ē”Ÿęˆå™Øč®­ē»ƒčæ­ä»£ę¬”ꕰ
    origin = datasets.MNIST("..\\data\\", True, transform=tf)
    origin_set = DataLoader(origin, batch_sz, shuffle=True)
    disc = Disc()
    generator = Gen(in_size, out_sz=28)
    disc.cuda()
    generator.cuda()
    dopt = optim.Adam(disc.parameters(), lr=2e-5)
    gopt = optim.Adam(generator.parameters(), lr=2e-2)
    loss_func = nn.BCELoss()
    real_labels = Var(torch.ones((batch_sz, 1))).cuda()
    fake_labels = Var(torch.zeros((batch_sz, 1))).cuda()
    for k, (bx, _) in enumerate(origin_set):
        bx = bx.cuda()
        gen_loss = 0
        dis_loss = 0
        out = disc(bx)
        loss = loss_func(out, real_labels)
Example #14
0
def main(args):
    dataset = TCTDataset(args.image_root, "dataset/train.json",
                         get_transforms(True))
    dataset_train = TCTDataset(args.image_root, "dataset/train.json",
                               get_transforms(False))
    dataset_val = TCTDataset(args.image_root, "dataset/val.json",
                             get_transforms(False))
    dataset_test = TCTDataset(args.image_root, "dataset/test.json",
                              get_transforms(False))
    dataset = torch.utils.data.Subset(dataset, [i for i in range(50)])
    data_loader = DataLoader(dataset,
                             batch_size=args.batch_size,
                             shuffle=True,
                             num_workers=4,
                             collate_fn=utils.collate_fn)
    dataset_train = torch.utils.data.Subset(dataset_train,
                                            [i for i in range(50)])
    data_loader_train = DataLoader(dataset_train,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=4,
                                   collate_fn=utils.collate_fn)
    dataset_val = torch.utils.data.Subset(dataset_val, [i for i in range(50)])
    data_loader_val = DataLoader(dataset_val,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=4,
                                 collate_fn=utils.collate_fn)
    dataset_test = torch.utils.data.Subset(dataset_test,
                                           [i for i in range(50)])
    data_loader_test = DataLoader(dataset_test,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=4,
                                  collate_fn=utils.collate_fn)

    coco_api = coco_utils.get_coco_api_from_dataset(dataset)
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
    model = get_model_instance(num_classes=len(coco_api.cats) + 1)
    device = torch.device(
        "cuda:0") if torch.cuda.is_available() else torch.device("cpu")
    print("device: {}".format(device.type))

    optimzier = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimzier, milestones=args.lr_steps, gamma=args.lr_gamma)
    epoch = 0

    log = Log()
    log_train = Log(log_dir="log/train")
    log_eval = Log(log_dir="log/eval")
    model_path_manager = ModelPathManager(max_file_path_size=0)

    latest_model_path = model_path_manager.latest_model_path()
    if latest_model_path:
        checkpoint = torch.load(latest_model_path)
        model.load_state_dict(checkpoint["model"], strict=False)
        optimzier.load_state_dict(checkpoint["optimizer"])
        lr_scheduler.load_state_dict(checkpoint["lr_scheduler"])
        epoch = checkpoint["epoch"] + 1
    elif args.pretrain_model:
        print("loading model from", args.pretrain_model)
        checkpoint = torch.load(args.pretrain_model)
        if "model" in checkpoint:
            model.load_state_dict(checkpoint["model"], strict=False)
        else:
            model.load_state_dict(checkpoint, strict=False)

    model.to(device)

    print("Start training")
    train_head = "Epoch : [{:0" + str(len(str(args.epochs))) + "d}]"
    start_time = time.time()
    for epoch in range(epoch, args.epochs):
        train_one_epoch(model,
                        optimzier,
                        data_loader,
                        device,
                        epoch,
                        log,
                        head=train_head.format(epoch))
        lr_scheduler.step()

        save_path = model_path_manager.new_model_path(
            "train_epoch{:02d}.pth".format(epoch))
        torch.save(
            {
                "model": model.state_dict(),
                "optimizer": optimzier.state_dict(),
                "lr_scheduler": lr_scheduler.state_dict(),
                "epoch": epoch
            }, save_path)
        model_path_manager.record_path(save_path)

        evaluate(model,
                 data_loader_train,
                 device,
                 epoch,
                 log_train,
                 head="Train:")
        evaluate(model,
                 data_loader_val,
                 device,
                 epoch,
                 log_eval,
                 head="Evaluate:")
        # engine.evaluate(model, data_loader_val, device)

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print("Training time {}".format(total_time_str))

    evaluate(model, data_loader_test, device, None, None, head="Test:")
Example #15
0
from torch.autograd import Variable
import torch.nn.functional as F
from torch.autograd import Variable
from model_unet import UNet
from torch.utils.data.dataloader import DataLoader
from dataset import Dataset


os.environ["CUDA_VISIBLE_DEVICES"] = "0"

dir_inp = '/home/soroush/codes/test/camvid-master/701_StillsRaw_full/'
dir_lbl = '/home/soroush/codes/test/camvid-master/LabeledApproved_full/'

image_dataset = Dataset(dir_inp, dir_lbl)
saved_model_path = '/home/soroush/codes/test/unet_adam.pth'
data_loader = DataLoader(image_dataset, batch_size=80, shuffle=True)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_unet = UNet().to(device)
criterion = nn.CrossEntropyLoss().to(device)
learning_rate = 0.0001
optimizer = optim.Adam(model_unet.parameters(), lr=learning_rate)
num_epochs = 100

# model_unet.load_state_dict(torch.load(PATH))

for epoch in range(num_epochs):
    print(epoch)
    train_epoch_loss = []
Example #16
0
def main():
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument("--data-path", default=None)
    arg_parser.add_argument("--dataset")
    arg_parser.add_argument("--encoder-size", default=5, type=int)
    arg_parser.add_argument("--z-size", default=5, type=int)
    arg_parser.add_argument("--nb-epochs", default=10, type=int)
    arg_parser.add_argument("--columns",
                            default="age,hours-per-week",
                            type=parse_list(str))
    arg_parser.add_argument("--percentages",
                            default="0.1,0.2,0.3,0.4",
                            type=parse_list(float))
    args = arg_parser.parse_args()

    accuracies, f1_scores = [], []
    vae_accuracies, vae_f1_scores = [], []

    columns = {
        c: i
        for i, c in enumerate(
            ADULT_DATASET_COLUMNS[:len(ADULT_DATASET_COLUMNS) - 1])
    }

    for percent in args.percentages:
        print("Training on data with {} corruption".format(percent))
        train_dataset, valid_dataset = load_dataset(args.dataset)(
            columns=[columns[c] for c in args.columns], percent=percent)

        train_params = m(learning_rate=0.00001,
                         minibatch_size=64,
                         nb_epochs=args.nb_epochs)
        vae = VAE(train_dataset.nb_features, args.encoder_size, args.z_size)
        network_optimizer = optim.Adam(vae.parameters(),
                                       lr=train_params.learning_rate)

        train_data_loader = DataLoader(train_dataset,
                                       batch_size=train_params.minibatch_size)
        valid_data_loader = DataLoader(valid_dataset,
                                       batch_size=train_params.minibatch_size)

        print("Training VAE...")
        print("=" * 100)
        vae.fit(train_params, network_optimizer, train_data_loader,
                valid_data_loader)

        print("\nTraining classifier on regular data...")
        classifier = MLP(train_dataset.nb_features,
                         train_dataset.nb_classes,
                         hidden_layer_sizes=None)
        classifier_optimizer = optim.Adam(classifier.parameters(),
                                          lr=train_params.learning_rate)

        train_params = m(learning_rate=0.0001,
                         minibatch_size=64,
                         nb_epochs=args.nb_epochs)

        classifier.fit(train_params, classifier_optimizer, train_data_loader)
        preds_classifier = classifier.predict(valid_data_loader)
        accuracy = accuracy_score(valid_dataset.y,
                                  preds_classifier.argmax(axis=1))
        accuracies.append(accuracy)

        print("Accuracy: {}%".format(accuracy * 100))
        print("=" * 100)
        print("Training classifer on VAE transformed data...")
        train_vae_data_loader = impute(train_data_loader, vae)
        classifier = MLP(train_vae_data_loader.dataset.nb_features,
                         train_vae_data_loader.dataset.nb_classes,
                         hidden_layer_sizes=None)
        classifier_optimizer = optim.Adam(classifier.parameters(),
                                          lr=train_params.learning_rate)

        train_params = m(learning_rate=0.0001,
                         minibatch_size=64,
                         nb_epochs=args.nb_epochs)

        classifier.fit(train_params, classifier_optimizer,
                       train_vae_data_loader)

        valid_vae_data_loader = impute(valid_data_loader, vae)

        # valid_vae_dataset = deepcopy(valid_dataset)
        # valid_vae_dataset.x = vae.predict(valid_data_loader)
        # valid_vae_data_loader = DataLoader(valid_vae_dataset)
        preds_classifier = classifier.predict(valid_vae_data_loader)

        accuracy = accuracy_score(valid_vae_data_loader.dataset.y,
                                  preds_classifier.argmax(axis=1))
        vae_accuracies.append(accuracy)

        print("Accuracy: {}%".format(accuracy * 100))
        print("=" * 100)
        print("*" * 100)

    plt.plot(args.percentages, accuracies, label="corrupt")
    plt.plot(args.percentages, vae_accuracies, label="vae")
    plt.legend()
    plt.show()
Example #17
0
 def as_pytorch_dataloader(self, split=TRAIN, **kwargs):
     dataset = self.as_pytorch_dataset(split=TRAIN)
     return DataLoader(dataset, **kwargs)
        if self.transform is not None:
            img, target = self.transform(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)


if __name__ == "__main__":
    # this little example script can be used to visualize the first image
    # loaded from the dataset.
    from torch.utils.data.dataloader import DataLoader
    import matplotlib.pyplot as plt
    from torchvision import transforms
    import torch

    train_data = PennFudanDataset(
        transform=lambda im, ann: (transforms.ToTensor()(im), ann))
    dataloader = DataLoader(train_data, batch_size=1)

    for batch_data in dataloader:
        x, y = batch_data
        plt.imshow(transforms.ToPILImage()(torch.squeeze(x)))
        plt.show()
        print(x.shape)
        print(y)
        break

__all__ = ["PennFudanDataset"]
Example #19
0
 def test_dataloader(self) -> DataLoader:
     return DataLoader(self.test_dataset)
hidden_size = 400
out_size = 10
epochs = 10
batch_size = 100
lr = 0.1

train_dataset = datasets.MNIST(root='./data',
                               train=True,
                               transform=transforms.ToTensor(),
                               download=True)
test_dataset = datasets.MNIST(root='./data',
                              train=False,
                              transform=transforms.ToTensor())

train_loader = DataLoader(dataset=train_dataset,
                          batch_size=batch_size,
                          shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
                         batch_size=batch_size,
                         shuffle=False)

print(train_loader)

net = Net(input_size, hidden_size, out_size)
criterion = nn.CrossEntropyLoss()
optmizer = torch.optim.Adam(net.parameters(), lr=lr)

correct_train = 0
total_train = 0

for i, (images, lables) in enumerate(train_loader):
Example #21
0
 def test_dataloader(self, *args, **kwargs) -> DataLoader:
     return DataLoader(self.dataset,
                       batch_size=self.batch_size,
                       shuffle=False,
                       num_workers=4,
                       pin_memory=True)
Example #22
0
 def GetTestLoader(self):
     return DataLoader(VOCDataset(),
                       batch_size=1000,
                       shuffle=True,
                       num_workers=2)
Example #23
0
        root_img,
        train_failists_paths,
        root + filelists_bp + "test_filelist.txt",
        [0 for _ in range(nbatch[scenario])],
        complete_test_set_only=True,
        train_transform=train_transform,
        eval_transform=eval_transform)

    return scenario_obj


__all__ = ['CORe50']

if __name__ == "__main__":

    # this below can be taken as a usage example or a simple test script
    import sys
    from torch.utils.data.dataloader import DataLoader

    scenario = CORe50(scenario="nicv2_79")
    for i, batch in enumerate(scenario.train_stream):
        print(i, batch)
        dataset, t = batch.dataset, batch.task_label
        dl = DataLoader(dataset, batch_size=300)

        for mb in dl:
            x, y = mb
            print(x.shape)
            print(y.shape)
        sys.exit(0)
Example #24
0
def make_dataloader(dataset, batch_size=16, shuffle=True, key="input_ids"):
    length_func = lambda x: len(x[key]) if key else None
    sampler = OrderedBatchSampler(dataset, batch_size=batch_size, 
                                  length_func=length_func, shuffle=shuffle)
    return DataLoader(dataset, collate_fn=collate_fn, batch_sampler=sampler)
Example #25
0
 def test_dataloader(self):
     dataset = DatasetFromFolder(data_dir=self.dataroot / 'test',
                                 scale_factor=4,
                                 mode='eval',
                                 preupsample=True)
     return DataLoader(dataset, batch_size=1)
def main():
    global tokenizer
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_type", default=None, type=str, required=True,
                        help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()))
    parser.add_argument("--model_name_or_path", default=None, type=str, required=True,
                        help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS))
    parser.add_argument("--pos_model_name_or_path", default=None, type=str, required=True,
                        help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS))
    parser.add_argument("--prompt", type=str, default="")
    parser.add_argument("--padding_text", type=str, default="")
    parser.add_argument("--temperature", type=float, default=1.0)
    parser.add_argument("--top_k", type=int, default=0)
    parser.add_argument("--top_p", type=float, default=0.9)
    parser.add_argument(
        "--overwrite_cache", action="store_true", help="Overwrite the cached training and evaluation sets"
    )
    parser.add_argument("--no_cuda", action='store_true',
                        help="Avoid using CUDA when available")
    parser.add_argument('--seed', type=int, default=42,
                        help="random seed for initialization")

    parser.add_argument('--test_batch_size', type=int, default=1)

    parser.add_argument('--test_file_path', type=str, default=None,
                        help="path of parsed plots to generate completion")
    parser.add_argument(
        "--cache_dir",
        default=None,
        type=str,
        help="Optional directory to store the pre-trained models downloaded from s3 (instead of the default one)",
    )

    parser.add_argument('--results_path', type=str, default=RESULTS_OUT_PATH,
                        help="path for generated results")
    parser.add_argument("--ngenres", type=int, default=0, help="Number of genres for embedding.")
    parser.add_argument("--nfacts", type=int, default=0, help="Number of genres for embedding.")
    args = parser.parse_args()

    args.device = torch.device("cuda:2" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    print("device is " + str(args.device))
    args.n_gpu = torch.cuda.device_count()

    set_seed(args)

    args.model_type = args.model_type.lower()

    pos_model = get_pos_model(args)

    def count_parameters(model):
        return sum(p.numel() for p in model.parameters() if p.requires_grad)


    print("num parameres pos: "+ str(count_parameters(pos_model)))

    model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
    tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path)
    model = model_class.from_pretrained(args.model_name_or_path)
    model.to(args.device)
    model.eval()
    print("num parameres xlnet: "+ str(count_parameters(model)))

    args.length = model.config.max_position_embeddings  # No generation bigger than model size
    if args.length < 0:
        args.length = MAX_LENGTH  # avoid infinite loop

    print(args)

    def collate(examples: List[torch.Tensor]):
        if tokenizer._pad_token is None:
            return pad_sequence(examples, batch_first=True)

        seqs, facts, masks, genres = zip(*examples)

        pad_facts =pad_sequence(facts, batch_first=True, padding_value=tokenizer.pad_token_id)
        pad_mapping = pad_sequence(masks, batch_first=True, padding_value=tokenizer.pad_token_id)
        pad_originals = pad_sequence(seqs, batch_first=True, padding_value=tokenizer.pad_token_id)
        torch.stack(genres)
        return list(zip(pad_facts, pad_mapping, genres, pad_originals))

    test_dataset=PlotFactsOnlyDataset(tokenizer, args, args.test_file_path, block_size=512)
    test_sampler = SequentialSampler(test_dataset)
    test_dataloader = DataLoader(
        test_dataset, sampler=test_sampler, batch_size=args.test_batch_size, collate_fn=collate
    )

    batch_counter=0
    spltarr =args.model_name_or_path.split("/")
    model_name = spltarr[-1] if spltarr[-1] != "" else spltarr[-2]
    out_path = os.path.join(args.results_path, model_name)
    for batch in test_dataloader:
        batch_counter+=1
        with torch.no_grad():
            pad_facts, pad_mapping, genres, pad_originals = zip(*batch)
            tpad_mapping = torch.stack(pad_mapping).to(args.device)
            tpad_facts = torch.stack(pad_facts).to(args.device)
            tgenres = torch.stack(genres).to(args.device)
            padding_masks = torch.where(tpad_mapping == tokenizer.pad_token_id,
                                        torch.ones_like(tpad_mapping), torch.zeros_like(tpad_mapping)).to(args.device)

            pos_outputs = pos_model(tpad_facts, fact_embeds=tpad_mapping, genre_idxs=tgenres, input_mask=padding_masks,labels=None)
            seqs, masks = get_full_seqs_and_masks(pos_outputs[0][0].squeeze(-1).long(), tpad_facts[0], tokenizer)


            inputs_raw = seqs.unsqueeze(0)
            masks_raw = masks.unsqueeze(0)
            genres = tgenres


            prefix_tensor = tokenizer.encode(PADDING_TEXT,add_special_tokens=False, return_tensors="pt").to(args.device).long()
            prefix_mask = torch.ones(prefix_tensor.size()).to(args.device)
            prefix_tensor =prefix_tensor.expand((inputs_raw.size(0),prefix_tensor.size(-1)))
            prefix_mask =prefix_mask.expand((masks_raw.size(0),prefix_mask.size(-1)))
            inputs = torch.cat([prefix_tensor, inputs_raw], dim=1)
            masks = torch.cat([prefix_mask, masks_raw], dim=1)

            padding_masks = torch.where(masks == tokenizer.pad_token_id,
                                        torch.ones_like(masks), torch.zeros_like(masks))

            perm_masks = get_perm_masks(masks, order="L2R")
            target_map = get_target_mapping(masks, args.device)

            out = sample_sequence(
                model=model,
                context=inputs,
                perm_masks = perm_masks,
                padding_masks=padding_masks,
                target_mappings=target_map,
                temperature=args.temperature,
                top_k=args.top_k,
                top_p=args.top_p,
                device=str(args.device),
                genre_idxs=genres,
                tokenizer=tokenizer
            )

            from PlotFactsOnlyDataset import GENRES_LIST
            genre_text = "unused"#GENRES_LIST[genres[0]]
            print(genre_text +":\n")
            text = tokenizer.decode(out[0].tolist(), clean_up_tokenization_spaces=True)[len(PADDING_TEXT):]
            print(text)
            original_text = tokenizer.decode(pad_originals[0])[len(PADDING_TEXT):]
            masked_text = get_text_with_blanks(inputs, target_map)[len(PADDING_TEXT):]
            os.makedirs(out_path, exist_ok=True)
            with open(out_path + "/result" + str(batch_counter), "w") as f:
                f.writelines(f"\ngenre:  {genre_text}\n")
                f.writelines("\ntext:\n\n")
                f.writelines(text)
                f.writelines("\n--------masked-------\n")
                f.writelines(masked_text)
                f.writelines("\n--------original-------\n")
                f.writelines(original_text)
Example #27
0
        def run_epoch(split):
            is_train = split == 'train'
            model.train(is_train)
            data = self.train_dataset if is_train else self.test_dataset
            loader = DataLoader(data,
                                shuffle=True,
                                pin_memory=True,
                                batch_size=config.batch_size,
                                num_workers=config.num_workers)

            losses = []
            if self.config.tqdm:
                pbar = tqdm(
                    enumerate(loader),
                    total=len(loader)) if is_train else enumerate(loader)
            else:
                pbar = enumerate(loader)
            for it, (x, y) in pbar:

                # place data on the correct device
                x = x.to(self.device)
                y = y.to(self.device)

                # forward the model
                with torch.set_grad_enabled(is_train):
                    logits, loss = model(x, y)
                    loss = loss.mean(
                    )  # collapse all losses if they are scattered on multiple gpus
                    losses.append(loss.item())

                if is_train:

                    # backprop and update the parameters
                    model.zero_grad()
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   config.grad_norm_clip)
                    optimizer.step()

                    # decay the learning rate based on our progress
                    if config.lr_decay:
                        self.tokens += (y >= 0).sum(
                        )  # number of tokens processed this step (i.e. label is not -100)
                        if self.tokens < config.warmup_tokens:
                            # linear warmup
                            lr_mult = float(self.tokens) / float(
                                max(1, config.warmup_tokens))
                        else:
                            # cosine learning rate decay
                            progress = float(self.tokens -
                                             config.warmup_tokens) / float(
                                                 max(
                                                     1, config.final_tokens -
                                                     config.warmup_tokens))
                            lr_mult = max(
                                0.1,
                                0.5 * (1.0 + math.cos(math.pi * progress)))
                        lr = config.learning_rate * lr_mult
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr
                    else:
                        lr = config.learning_rate

                    # report progress
                    if self.config.tqdm:
                        pbar.set_description(
                            f"epoch {epoch+1} iter {it}: train loss {loss.item():.5f}. lr {lr:e}"
                        )

            if not is_train:
                test_loss = float(np.mean(losses))
                logger.info("test loss: %f", test_loss)
                return test_loss
Example #28
0
import torch

from torch.utils.data.dataloader import DataLoader
from torch.utils.data.sampler import BatchSampler
from mlbaselines.sampler import RandomSampler
from torch.utils.data.dataset import TensorDataset

data = torch.ones((100, 1))

for i in range(100):
    data[i] = data[i] * i

dataset = TensorDataset(data)

sampler = RandomSampler(dataset, seed=1)

batch_sampler = BatchSampler(sampler, batch_size=2, drop_last=True)

loader = DataLoader(dataset,
                    batch_sampler=batch_sampler,
                    num_workers=2,
                    batch_size=1)

for b in loader:
    print(b[0])
def main():

    model = initialize_model(args.model_name, args.num_classes)  # Initialize model

    print(args.device)

    model.to(args.device)  # Send to device

    # Set the optimizer
    optimizer = torch.optim.Adam(params=model.parameters(), lr=args.lr_start)
    #optimizer = torch.optim.SGD(params=model.parameters(), lr=args.lr_start, momentum=0.9, weight_decay=0.01)

    # Set the required transforms
    img_transforms = {
        'train': tf.Compose([
            tf.Resize(size=(args.size, args.size), interpolation=Image.BILINEAR),
            tf.ToTensor(),
            tf.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
        'val': tf.Compose([
            tf.Resize(size=(args.size, args.size), interpolation=Image.BILINEAR),
            tf.ToTensor(),
            tf.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
    }

    msk_transforms = {
        'train': tf.Compose([
            tf.Resize(size=(args.size, args.size), interpolation=Image.NEAREST)
        ]),
        'val': tf.Compose([
            tf.Resize(size=(args.size, args.size), interpolation=Image.NEAREST)
        ]),
    }

    # Load the data using dataset creator
    train_dataset = T_dataset(args.image_dir, args.mask_dir, 'train', img_transforms=img_transforms['train'], msk_transforms=msk_transforms['train'])
    val_dataset = T_dataset(args.image_dir, args.mask_dir, 'val', img_transforms=img_transforms['val'], msk_transforms=msk_transforms['val'])

    # Create data loaders
    train_loader = DataLoader(
        dataset=train_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers,
        pin_memory=True,
        drop_last=True
    )
    val_loader = DataLoader(
        dataset=val_dataset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.num_workers,
        pin_memory=True,
        drop_last=False
    )

    dataloaders = {"train": train_loader, "val": val_loader}  # Create dataloader dictionary for ease of use

    # Setup the loss fxn to be used
    criterion = nn.CrossEntropyLoss()

    # Train the model
    train_model(model, dataloaders, criterion, optimizer, args.epochs)
    def fit_diffeomorphism_model(self,
                                 X,
                                 t,
                                 X_d,
                                 learning_rate=1e-2,
                                 learning_decay=0.95,
                                 n_epochs=50,
                                 train_frac=0.8,
                                 l2=1e1,
                                 batch_size=64,
                                 initialize=True,
                                 verbose=True,
                                 X_val=None,
                                 t_val=None,
                                 Xd_val=None):
        """fit_diffeomorphism_model 
        
        Arguments:
            X {numpy array [Ntraj,Nt,Ns]} -- state
            t {numpy array [Ntraj,Nt]} -- time vector
            X_d {numpy array [Ntraj,Nt,Ns]} -- desired state
        
        Keyword Arguments:
            learning_rate {[type]} --  (default: {1e-2})
            learning_decay {float} --  (default: {0.95})
            n_epochs {int} --  (default: {50})
            train_frac {float} -- ratio of training and testing (default: {0.8})
            l2 {[type]} -- L2 penalty term (default: {1e1})
            jacobian_penalty {[type]} --  (default: {1.})
            batch_size {int} --  (default: {64})
            initialize {bool} -- flag to warm start (default: {True})
            verbose {bool} --  (default: {True})
            X_val {numpy array [Ntraj,Nt,Ns]} -- state in validation set (default: {None})
            t_val {numpy array [Ntraj,Nt]} -- time in validation set (default: {None})
            Xd_val {numpy array [Ntraj,Nt,Ns]} -- desired state in validation set (default: {None})
        
        Returns:
            float -- val_losses[-1]
        """
        device = 'cuda' if cuda.is_available() else 'cpu'
        X, X_dot, X_d, X_d_dot, t = self.process(X=X, t=t, X_d=X_d)

        # Prepare data for pytorch:
        manual_seed(42)  # Fix seed for reproducibility
        if self.traj_input:
            X_tensor = from_numpy(
                npconcatenate(
                    (X, X_d, X_dot, X_d_dot, np.zeros_like(X)),
                    axis=1))  #[x (1,n), x_d (1,n), x_dot (1,n), zeros (1,n)]
        else:
            X_tensor = from_numpy(
                npconcatenate(
                    (X, X_dot, np.zeros_like(X)),
                    axis=1))  # [x (1,n), x_d (1,n), x_dot (1,n), zeros (1,n)]
        y_target = X_dot - (dot(self.A_cl, X.T) + dot(self.BK, X_d.T)).T
        y_tensor = from_numpy(y_target)
        X_tensor.requires_grad_(True)

        # Builds dataset with all data
        dataset = TensorDataset(X_tensor, y_tensor)

        if X_val is None or t_val is None or Xd_val is None:
            # Splits randomly into train and validation datasets
            n_train = int(train_frac * X.shape[0])
            n_val = X.shape[0] - n_train
            train_dataset, val_dataset = random_split(dataset,
                                                      [n_train, n_val])
            # Builds a loader for each dataset to perform mini-batch gradient descent
            train_loader = DataLoader(dataset=train_dataset,
                                      batch_size=batch_size,
                                      shuffle=True)
            val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size)
        else:
            #Uses X,... as training data and X_val,... as validation data
            X_val, X_dot_val, Xd_val, Xd_dot_val, t_val = self.process(
                X=X_val, t=t_val, X_d=Xd_val)
            if self.traj_input:
                X_val_tensor = from_numpy(
                    npconcatenate((X_val, Xd_val, X_dot_val, Xd_dot_val,
                                   np.zeros_like(X_val)),
                                  axis=1)
                )  #[x (1,n), x_d (1,n), x_dot (1,n), zeros (1,n)]
            else:
                X_val_tensor = from_numpy(
                    npconcatenate(
                        (X_val, X_dot_val, np.zeros_like(X_val)),
                        axis=1))  # [x (1,n), x_dot (1,n), zeros (1,n)]
            y_target_val = X_dot_val - dot(self.A_cl,
                                           X_val.T + dot(self.BK, Xd_val.T)).T
            y_val_tensor = from_numpy(y_target_val)
            X_val_tensor.requires_grad_(True)
            val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
            # Builds a loader for each dataset to perform mini-batch gradient descent
            train_loader = DataLoader(dataset=dataset,
                                      batch_size=int(batch_size),
                                      shuffle=True)
            val_loader = DataLoader(dataset=val_dataset,
                                    batch_size=int(batch_size))

        # Set up optimizer and learning rate scheduler:
        optimizer = optim.Adam(self.diffeomorphism_model.parameters(),
                               lr=learning_rate,
                               weight_decay=l2)
        lambda1 = lambda epoch: learning_decay**epoch
        scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)

        def make_train_step(model, loss_fn, optimizer):
            def train_step(x, y):
                model.train()  # Set model to training mode
                y_pred = model(x)
                loss = loss_fn(y, y_pred, model.training)
                loss.backward()
                optimizer.step()
                return loss.item()

            return train_step

        batch_loss = []
        losses = []
        batch_val_loss = []
        val_losses = []
        train_step = make_train_step(
            self.diffeomorphism_model,
            self.diffeomorphism_model.diffeomorphism_loss, optimizer)

        # Initialize model weights:
        def init_normal(m):
            if type(m) == nn.Linear:
                nn.init.xavier_normal_(m.weight)

        if initialize:
            self.diffeomorphism_model.apply(init_normal)

        # Training loop
        for i in range(n_epochs):
            # Uses loader to fetch one mini-batch for training
            #print('Training epoch ', i)
            for x_batch, y_batch in train_loader:
                # Send mini batch data to same location as model:
                x_batch = x_batch.to(device)
                y_batch = y_batch.to(device)
                #print('Training: ', x_batch.shape, y_batch.shape)
                # Train based on current batch:
                batch_loss.append(train_step(x_batch, y_batch))
                optimizer.zero_grad()
            losses.append(sum(batch_loss) / len(batch_loss))
            batch_loss = []

            #print('Validating epoch ', i)
            with no_grad():
                for x_val, y_val in val_loader:
                    # Sends data to same device as model
                    x_val = x_val.to(device)
                    y_val = y_val.to(device)

                    #print('Validation: ', x_val.shape, y_val.shape)

                    self.diffeomorphism_model.eval(
                    )  # Change model model to evaluation
                    #xt_val = x_val[:, :2*self.n]  # [x, x_d]
                    #xdot_val = x_val[:, 2*self.n:]  # [xdot]
                    y_pred = self.diffeomorphism_model(x_val)  # Predict
                    #jacobian_xdot_val, zero_jacobian_val = calc_gradients(xt_val, xdot_val, yhat, None, None, self.diffeomorphism_model.training)
                    batch_val_loss.append(
                        float(
                            self.diffeomorphism_model.diffeomorphism_loss(
                                y_val, y_pred, self.diffeomorphism_model.
                                training)))  # Compute validation loss
                val_losses.append(sum(batch_val_loss) /
                                  len(batch_val_loss))  # Save validation loss
                batch_val_loss = []

            scheduler.step(i)
            if verbose:
                print(' - Epoch: ', i, ' Training loss:',
                      format(losses[-1], '08f'), ' Validation loss:',
                      format(val_losses[-1], '08f'))
                print(
                    'Improvement metric (for early stopping): ',
                    sum(
                        abs(
                            array(val_losses[-min(3, len(val_losses)):]) -
                            val_losses[-1])) /
                    (3 * val_losses[-min(3, len(val_losses))]))
            if i > n_epochs / 4 and sum(
                    abs(
                        array(val_losses[-min(3, len(val_losses)):]) -
                        val_losses[-1])) / (
                            3 * val_losses[-min(3, len(val_losses))]) < 0.01:
                #print('Early stopping activated')
                break

        return val_losses[-1]