def collapse_channels(dataset:datasets.ClassificationDataset):
    if dataset.dataformat=="NHWC":
        axis=3
    else:
        axis=1
    dataset.x_train = dataset.x_train.mean(axis=axis,keepdims=True)
    dataset.x_test  = dataset.x_test.mean(axis=axis,keepdims=True)
def expand_channels(dataset:datasets.ClassificationDataset,c:int):
    if dataset.dataformat=="NHWC":
        axis=3
    else:
        axis=1

    dataset.x_train = np.repeat(dataset.x_train,c,axis=axis)
    dataset.x_test = np.repeat(dataset.x_test, c, axis=axis)
def resize(dataset:datasets.ClassificationDataset,h:int,w:int,c:int):

    if dataset.dataformat=="NCHW":
        dataset.x_train=np.transpose(dataset.x_train,axes=(0,2,3,1))
        dataset.x_test = np.transpose(dataset.x_test, axes=(0, 2, 3, 1))

    subsets = [dataset.x_train, dataset.x_test]
    new_subsets=[np.zeros((s.shape[0],h,w,c)) for s in subsets]

    for (subset,new_subset) in zip(subsets,new_subsets):
        for i in range(subset.shape[0]):
            img=subset[i, :]
            if c==1:
                #remove channel axis, resize, put again
                img=img[:,:,0]
                img= cv2.resize(img, dsize=(h, w))
                img = img[:, :, np.newaxis]
            else:
                #resize
                img = cv2.resize(img, dsize=(h, w))

            new_subset[i,:]=img

    dataset.x_train = new_subsets[0]
    dataset.x_test = new_subsets[1]

    if dataset.dataformat=="NCHW":
        dataset.x_train = np.transpose(dataset.x_train,axes=(0,3,1,2))
        dataset.x_test = np.transpose(dataset.x_test, axes=(0, 3, 1, 2))
def print_summary(dataset: datasets.ClassificationDataset,
                  p: training.Parameters, o: training.Options,
                  min_accuracy: float):
    print("Parameters: ", p)
    print("Options: ", o)
    print("Min accuracy: ", min_accuracy)
    print(f"Dataset {p.dataset}.")
    print(dataset.summary())
    print(f"Model {p.model}.")

    if len(p.savepoints):
        epochs_str = ", ".join([str(sp) for sp in p.savepoints])
        print(f"Savepoints at epochs {epochs_str}.")
def adapt_dataset(dataset:datasets.ClassificationDataset, dataset_template:str):
    dataset_template = datasets.get_classification(dataset_template)
    h,w,c= dataset_template.input_shape
    del dataset_template
    oh,ow,oc=dataset.input_shape

    # fix channels
    if c !=oc and oc==1:
        expand_channels(dataset,c)

    elif c != oc and c ==1:
        collapse_channels(dataset)
    else:
        raise ValueError(f"Cannot transform image with {oc} channels into image with {c} channels.")

    #fix size
    if h!=oh or w!=ow:
        resize(dataset,h,w,c)

    dataset.input_shape=(h,w,c)
Esempio n. 6
0
def main():
    parser = HfArgumentParser(
        (ModelArguments, DataTrainingArguments, TrainingArguments))
    model_args, data_args, training_args = parser.parse_args_into_dataclasses()

    if (os.path.exists(training_args.output_dir)
            and os.listdir(training_args.output_dir) and training_args.do_train
            and not training_args.overwrite_output_dir):
        raise ValueError(
            f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome."
        )

    # Setup logging
    logger = logging.getLogger(__name__)
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO
        if training_args.local_rank in [-1, 0] else logging.WARN,
        handlers=[
            logging.FileHandler(training_args.logging_dir + "/logging.log",
                                'w',
                                encoding='utf-8'),
            logging.StreamHandler()
        ])
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        training_args.local_rank,
        training_args.device,
        training_args.n_gpu,
        bool(training_args.local_rank != -1),
        training_args.fp16,
    )
    logger.info("Training/evaluation parameters %s", training_args)

    # Get model name
    model_name = model_args.model_name_or_path \
        if model_args.model_name_or_path is not None \
        else MODEL[model_args.model.lower()] \
        if model_args.model.lower() in MODEL \
        else model_args.model

    # Set seed
    set_seed(training_args.seed)

    # Set model
    config = AutoConfig.from_pretrained(
        model_args.config_name if model_args.config_name else model_name,
        cache_dir=model_args.cache_dir,
        num_labels=data_args.num_labels)
    tokenizer = AutoTokenizer.from_pretrained(
        model_args.tokenizer_name if model_args.tokenizer_name else model_name,
        cache_dir=model_args.cache_dir)
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name, config=config, cache_dir=model_args.cache_dir)

    # Set dataset
    train = ClassificationDataset(
        data_args.data_dir, tokenizer, data_args.task_name,
        data_args.max_seq_length, data_args.overwrite_cache,
        "train") if training_args.do_train else None
    dev = ClassificationDataset(data_args.data_dir, tokenizer,
                                data_args.task_name, data_args.max_seq_length,
                                data_args.overwrite_cache,
                                "dev") if training_args.do_eval else None
    test = ClassificationDataset(data_args.data_dir, tokenizer,
                                 data_args.task_name, data_args.max_seq_length,
                                 data_args.overwrite_cache,
                                 "test") if training_args.do_predict else None

    # Set trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train,
        eval_dataset=dev,
        compute_metrics=metrics_fn,
    )

    # Training
    if training_args.do_train:
        trainer.train(
            model_path=model_name if os.path.isdir(model_name) else None)
        trainer.save_model()
        if trainer.is_world_master():
            tokenizer.save_pretrained(training_args.output_dir)

    # Validation
    if training_args.do_eval:
        logger.info("*** Evaluate ***")
        result = trainer.evaluate()
        output_eval_file = os.path.join(training_args.output_dir,
                                        "eval_results.txt")
        if trainer.is_world_master():
            with open(output_eval_file, "w") as writer:
                logger.info("***** Eval results *****")
                for key, value in result.items():
                    logger.info("  %s = %s", key, value)
                    writer.write("%s = %s\n" % (key, value))

        logger.info("Validation set result : {}".format(result))

    # Test prediction
    if training_args.do_predict:
        logger.info("*** Test ***")
        predictions = trainer.predict(test_dataset=test)
        output_test_file = os.path.join(training_args.output_dir,
                                        "test_results.txt")
        if trainer.is_world_master():
            with open(output_test_file, "w") as writer:
                logger.info("***** Test results *****")
                logger.info("{}".format(predictions))
                writer.write("prediction : \n{}\n\n".format(
                    prediction(predictions.predictions).tolist()))
                if predictions.label_ids is not None:
                    writer.write("ground truth : \n{}\n\n".format(
                        predictions.label_ids.tolist()))
                    writer.write("metrics : \n{}\n\n".format(
                        predictions.metrics))
Esempio n. 7
0
import numpy as np
import time
import argparse

parser = argparse.ArgumentParser(description='inf')
parser.add_argument('--train-path', type=str, default='data/class_train.csv')
parser.add_argument('--test-path', type=str, default='data/class_test.csv')
parser.add_argument('--data-path', type=str, default='data/class_images/')
parser.add_argument('--batch-size', type=int, default=64)
parser.add_argument('--use-cuda', dest='use_cuda', action='store_true')
parser.set_defaults(use_cuda=False)
args = parser.parse_args()

gpu = args.use_cuda

train_data = ClassificationDataset(args.train_path, args.data_path, train=True)
test_data = ClassificationDataset(args.test_path, args.data_path, train=False)

train_loader = torch.utils.data.DataLoader(train_data,
                                           batch_size=args.batch_size,
                                           shuffle=True,
                                           num_workers=6)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=16)

model = torchvision.models.resnet18(pretrained=True)
model.fc = nn.Linear(512, 1)
if gpu:
    model = model.to('cuda')

optimizer = optim.Adam(model.parameters(), lr=1e-3)
bce_loss = nn.BCEWithLogitsLoss()
def main():

    # Get arguments
    parser = HfArgumentParser(
        (ModelArguments, DataTrainingArguments, TrainingArguments))
    model_args, data_args, training_args = parser.parse_args_into_dataclasses()

    # Path check and set logger
    path_checker(training_args)
    set_logger(training_args)

    # Get model name
    model_name = model_args.model_name_or_path \
        if model_args.model_name_or_path is not None \
        else MODEL[model_args.model.lower()] \
        if model_args.model.lower() in MODEL \
        else model_args.model

    # Set seed
    set_seed(training_args.seed)

    # Set model
    config = AutoConfig.from_pretrained(
        model_args.config_name if model_args.config_name else model_name,
        cache_dir=model_args.cache_dir,
        num_labels=data_args.num_labels)
    tokenizer = AutoTokenizer.from_pretrained(
        model_args.tokenizer_name if model_args.tokenizer_name else model_name,
        cache_dir=model_args.cache_dir)
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name, config=config, cache_dir=model_args.cache_dir)

    # Set dataset
    train = ClassificationDataset(
        data_args.data_dir,
        tokenizer,
        data_args.task_name,
        data_args.max_seq_length,
        data_args.overwrite_cache,
        mode="train") if training_args.do_train else None
    dev = ClassificationDataset(data_args.data_dir,
                                tokenizer,
                                data_args.task_name,
                                data_args.max_seq_length,
                                data_args.overwrite_cache,
                                mode="dev") if training_args.do_eval else None
    test = ClassificationDataset(
        data_args.data_dir,
        tokenizer,
        data_args.task_name,
        data_args.max_seq_length,
        data_args.overwrite_cache,
        mode="test") if training_args.do_predict else None

    # Set trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train,
        eval_dataset=dev,
        compute_metrics=metrics_fn,
    )

    # Set runner
    runner = Runner(model_name=model_name,
                    trainer=trainer,
                    tokenizer=tokenizer,
                    training_args=training_args,
                    test=test)

    # Start
    runner()