def main():

    import argparse
    parser = argparse.ArgumentParser(description="Pytorch Image CNN training from Configure Files")
    parser.add_argument('--config_file', required=True, help="This scripts only accepts parameters from Json files")
    input_args = parser.parse_args()

    config_file = input_args.config_file

    args = parse_config(config_file)
    if args.name is None:
        args.name = get_stem(config_file)

    torch.set_default_tensor_type('torch.FloatTensor')
    best_prec1 = 0

    args.script_name = get_stem(__file__)
    current_time_str = get_date_str()
    # if args.resume is None:
    if args.save_directory is None:
        save_directory = get_dir(os.path.join(project_root, 'ckpts2', '{:s}'.format(args.name), '{:s}-{:s}'.format(args.ID, current_time_str)))
    else:
        save_directory = get_dir(os.path.join(project_root, 'ckpts2', args.save_directory))
    # else:
    #     save_directory = os.path.dirname(args.resume)
    print("Save to {}".format(save_directory))
    log_file = os.path.join(save_directory, 'log-{0}.txt'.format(current_time_str))
    logger = log_utils.get_logger(log_file)
    log_utils.print_config(vars(args), logger)


    print_func = logger.info
    print_func('ConfigFile: {}'.format(config_file))
    args.log_file = log_file

    if args.device:
        os.environ["CUDA_VISIBLE_DEVICES"]=args.device


    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                world_size=args.world_size)

    if args.pretrained:
        print_func("=> using pre-trained model '{}'".format(args.arch))
        visual_model = models.__dict__[args.arch](pretrained=True, num_classes=args.num_classes)
    else:
        print_func("=> creating model '{}'".format(args.arch))
        visual_model = models.__dict__[args.arch](pretrained=False, num_classes=args.num_classes)

    if args.freeze:
        visual_model = CNN_utils.freeze_all_except_fc(visual_model)



    if os.path.isfile(args.text_ckpt):
        print_func("=> loading checkpoint '{}'".format(args.text_ckpt))
        text_data = torch.load(args.text_ckpt, map_location=lambda storage, loc:storage)
        text_model = TextCNN(text_data['args_model'])
        # load_state_dict(text_model, text_data['state_dict'])
        text_model.load_state_dict(text_data['state_dict'], strict=True)
        text_model.eval()
        print_func("=> loaded checkpoint '{}' for text classification"
              .format(args.text_ckpt))
        args.vocab_size = text_data['args_model'].vocab_size
    else:
        print_func("=> no checkpoint found at '{}'".format(args.text_ckpt))
        return


    args.tag2clsidx = text_data['args_data'].tag2idx
    args.vocab_size = len(args.tag2clsidx)

    args.text_embed = loadpickle(args.text_embed)
    args.idx2tag = loadpickle(args.idx2tag)['idx2tag']



    if args.gpu is not None:
        visual_model = visual_model.cuda(args.gpu)
        text_model = text_model.cuda((args.gpu))
    elif args.distributed:
        visual_model.cuda()
        visual_model = torch.nn.parallel.DistributedDataParallel(visual_model)
    else:
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            visual_model.features = torch.nn.DataParallel(visual_model.features)
            visual_model.cuda()
        else:
            visual_model = torch.nn.DataParallel(visual_model).cuda()
            text_model = torch.nn.DataParallel(text_model).cuda()


    criterion = nn.CrossEntropyLoss(ignore_index=-1).cuda(args.gpu)

    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, visual_model.parameters()), lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.lr_schedule:
        print_func("Using scheduled learning rate")
        scheduler = lr_scheduler.MultiStepLR(
            optimizer, [int(i) for i in args.lr_schedule.split(',')], gamma=0.1)
    else:
        scheduler = lr_scheduler.ReduceLROnPlateau(
            optimizer, 'min', patience=args.lr_patience)

    # optimizer = torch.optim.SGD(model.parameters(), args.lr,
    #                             momentum=args.momentum,
    #                             weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print_func("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)

            import collections
            if isinstance(checkpoint, collections.OrderedDict):
                load_state_dict(visual_model, checkpoint)


            else:
                load_state_dict(visual_model, checkpoint['state_dict'])
                print_func("=> loaded checkpoint '{}' (epoch {})"
                      .format(args.resume, checkpoint['epoch']))

        else:
            print_func("=> no checkpoint found at '{}'".format(args.resume))



    cudnn.benchmark = True

    model_total_params = sum(p.numel() for p in visual_model.parameters())
    model_grad_params = sum(p.numel() for p in visual_model.parameters() if p.requires_grad)
    print_func("Total Parameters: {0}\t Gradient Parameters: {1}".format(model_total_params, model_grad_params))

    # Data loading code
    val_dataset = get_instance(custom_datasets, '{0}'.format(args.valloader), args)
    if val_dataset is None:
        val_loader = None
    else:
        val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size, shuffle=False,
                                             num_workers=args.workers, pin_memory=True, collate_fn=none_collate)

    if args.evaluate:
        print_func('Validation Only')
        validate(val_loader, visual_model, criterion, args, print_func)
        return
    else:

        train_dataset = get_instance(custom_datasets, '{0}'.format(args.trainloader), args)

        if args.distributed:
            train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
        else:
            train_sampler = None

        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
            num_workers=args.workers, pin_memory=True, sampler=train_sampler, collate_fn=none_collate)




    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        if args.lr_schedule:
            # CNN_utils.adjust_learning_rate(optimizer, epoch, args.lr)
            scheduler.step()
        current_lr = optimizer.param_groups[0]['lr']

        print_func("Epoch: [{}], learning rate: {}".format(epoch, current_lr))

        # train for one epoch
        train(train_loader, visual_model, text_model, criterion, optimizer, epoch, args, print_func)

        # evaluate on validation set
        if val_loader:
            prec1, val_loss = validate(val_loader, visual_model, criterion, args, print_func)
        else:
            prec1 = 0
            val_loss = 0
        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        CNN_utils.save_checkpoint({
            'epoch': epoch + 1,
            'arch': args.arch,
            'state_dict': visual_model.state_dict(),
            'best_prec1': best_prec1,
            'optimizer' : optimizer.state_dict(),
        }, is_best, file_directory=save_directory, epoch=epoch)

        if not args.lr_schedule:
            scheduler.step(val_loss)
Esempio n. 2
0
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="Pytorch Image CNN training from Configure Files")
    parser.add_argument(
        '--config_file',
        required=True,
        help="This scripts only accepts parameters from Json files")
    input_args = parser.parse_args()

    config_file = input_args.config_file

    args = parse_config(config_file)
    if args.name is None:
        args.name = get_stem(config_file)

    torch.set_default_tensor_type('torch.FloatTensor')
    best_prec1 = 0

    args.script_name = get_stem(__file__)
    current_time_str = get_date_str()

    print_func = print

    if args.device:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.device

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size)

    if args.pretrained:
        print_func("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True,
                                           num_classes=args.num_classes)
    else:
        print_func("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=False,
                                           num_classes=args.num_classes)

    if args.gpu is not None:
        model = model.cuda(args.gpu)
    elif args.distributed:
        model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(model)
    else:
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            # model = torch.nn.DataParallel(model).cuda()
            model = model.cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print_func("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            import collections
            if isinstance(checkpoint, collections.OrderedDict):
                load_state_dict(model,
                                checkpoint,
                                exclude_layers=['fc.weight', 'fc.bias'])

            else:
                load_state_dict(
                    model,
                    checkpoint['state_dict'],
                    exclude_layers=['module.fc.weight', 'module.fc.bias'])
                print_func("=> loaded checkpoint '{}' (epoch {})".format(
                    args.resume, checkpoint['epoch']))
        else:
            print_func("=> no checkpoint found at '{}'".format(args.resume))
            return
    else:
        print_func(
            "=> This script is for fine-tuning only, please double check '{}'".
            format(args.resume))
        print_func("Now using randomly initialized parameters!")

    cudnn.benchmark = True

    from PyUtils.pickle_utils import loadpickle
    from PublicEmotionDatasets.Deepemotion.constants import emotion2idx, idx2emotion
    from PyUtils.dict_utils import string_list2dict
    import numpy as np
    from torchvision.datasets.folder import default_loader
    tag_wordvectors = loadpickle(args.tag_embeddings)
    tag_words = []
    tag_matrix = []
    label_words = []
    label_matrix = []

    for x_tag in tag_wordvectors:
        tag_words.append(x_tag)
        tag_matrix.append(tag_wordvectors[x_tag])
        if x_tag in emotion2idx:
            label_words.append(x_tag)
            label_matrix.append(tag_wordvectors[x_tag])
    idx2tag, tag2idx = string_list2dict(tag_words)
    idx2label, label2idx = string_list2dict(label_words)
    tag_matrix = np.array(tag_matrix)
    label_matrix = np.array(label_matrix)
    label_matrix = label_matrix.squeeze(1)
    tag_matrix = tag_matrix.squeeze(1)
    val_list = loadpickle(args.val_file)
    from CNNs.datasets.multilabel import get_val_simple_transform
    val_transform = get_val_simple_transform()
    model.eval()

    correct = 0
    total = len(val_list) * 1.0
    for i, (input_image_file, target, _) in enumerate(val_list):
        # measure data loading time

        image_path = os.path.join(args.data_dir, input_image_file)
        input_image = default_loader(image_path)
        input_image = val_transform(input_image)

        if args.gpu is not None:
            input_image = input_image.cuda(args.gpu, non_blocking=True)
        input_image = input_image.unsqueeze(0).cuda()

        # target_idx = target.nonzero() [:,1]

        # compute output
        output, output_proj = model(input_image)

        output_proj = output_proj.cpu().data.numpy()

        dot_product_label = cosine_similarity(output_proj, label_matrix)[0]
        output_label = np.argmax(dot_product_label)
        if output_label == target:
            correct += 1

        dot_product_tag = cosine_similarity(output_proj, tag_matrix)[0]
        out_tags = np.argsort(dot_product_tag)[::-1][:10]

        print("* {} Image: {} GT label: {}, predicted label: {}".format(
            i, input_image_file, idx2emotion[target], idx2label[output_label]))
        print(" == closest tags: {}".format(', '.join([
            '{}({:.02f})'.format(idx2tag[x], dot_product_tag[x])
            for x in out_tags
        ])))
    print("Accuracy {:.4f}".format(correct / total))
dataset_directory = '/home/zwei/datasets/PublicEmotion/EMOTIC'





data_split = 'test'
text_annotation_file = os.path.join(dataset_directory, 'annotations/samples', '{}.txt'.format(data_split))

annotaitons_person_crop = []
annotaitons_whole_image = []
raw_annotation_list = load_json_list(text_annotation_file)[0]

for s_annotation in tqdm.tqdm(raw_annotation_list, desc="Processing data"):
    s_file_name = os.path.join(s_annotation['folder'],  s_annotation['filename'])
    s_file_name_stem = get_stem(s_file_name)
    s_file_extension = s_file_name.split('.')[-1]
    if isinstance(s_annotation['person'], list):
        pass
    else:
        s_annotation['person'] = [s_annotation['person']]


    for s_person_idx, s_person in enumerate(s_annotation['person']):
        s_bbox = s_person['body_bbox']

        annotated_categories = []
        if not isinstance(s_person['annotations_categories'], list):
            s_person['annotations_categories'] = [s_person['annotations_categories']]
            s_person['combined_continuous'] = s_person['annotations_continuous']
        for s_single_annotation in s_person['annotations_categories']:
def main():
    best_prec1 = 0

    args = parser.parser.parse_args()
    if args.config is not None:
        args = parse_config(args.config)

    script_name_stem = get_stem(__file__)
    current_time_str = get_date_str()

    if args.save_directory is None:
        raise FileNotFoundError(
            "Saving directory should be specified for feature extraction tasks"
        )
    save_directory = get_dir(args.save_directory)

    print("Save to {}".format(save_directory))
    log_file = os.path.join(save_directory,
                            'log-{0}.txt'.format(current_time_str))
    logger = log_utils.get_logger(log_file)
    log_utils.print_config(vars(args), logger)
    print_func = logger.info
    args.log_file = log_file

    if args.device:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.device

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn(
            'You have chosen a specific GPU. This will completely disable data parallelism.'
        )

    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size)

    if args.arch == 'resnet50_feature_extractor':
        print_func("=> using pre-trained model '{}' to LOAD FEATURES".format(
            args.arch))
        model = models.__dict__[args.arch](pretrained=True,
                                           num_classes=args.num_classes,
                                           param_name=args.paramname)

    else:
        print_func(
            "This is only for feature extractors!, Please double check the parameters!"
        )
        return

    # if args.freeze:
    #     model = CNN_utils.freeze_all_except_fc(model)

    if args.gpu is not None:
        model = model.cuda(args.gpu)
    else:
        print_func(
            'Please only specify one GPU since we are working in batch size 1 model'
        )
        return

    cudnn.benchmark = True

    model_total_params = sum(p.numel() for p in model.parameters())
    model_grad_params = sum(p.numel() for p in model.parameters()
                            if p.requires_grad)
    print_func("Total Parameters: {0}\t Gradient Parameters: {1}".format(
        model_total_params, model_grad_params))

    # Data loading code
    val_dataset = get_instance(custom_datasets,
                               '{0}'.format(args.dataset.name), args,
                               **args.dataset.args)
    import tqdm
    import numpy as np

    if args.individual_feat:
        feature_save_directory = get_dir(
            os.path.join(save_directory, 'individual-features'))
        created_paths = set()
    else:
        data_dict = {}
        feature_save_directory = os.path.join(save_directory, 'feature.pkl')

    model.eval()
    for s_data in tqdm.tqdm(val_dataset, desc="Extracting Features"):
        if s_data is None:
            continue
        s_image_name = s_data[1]
        s_image_data = s_data[0]
        if args.gpu is not None:
            s_image_data = s_image_data.cuda(args.gpu, non_blocking=True)

        output = model(s_image_data.unsqueeze_(0))
        output = output.cpu().data.numpy()
        image_rel_path = os.path.join(
            *(s_image_name.split(os.sep)[-args.rel_path_depth:]))

        if args.individual_feat:
            image_directory = os.path.dirname(image_rel_path)
            if image_directory in created_paths:
                np.save(
                    os.path.join(feature_save_directory,
                                 '{}.npy'.format(image_rel_path)), output)
            else:
                get_dir(os.path.join(feature_save_directory, image_directory))
                np.save(
                    os.path.join(feature_save_directory,
                                 '{}.npy'.format(image_rel_path)), output)
                created_paths.add(image_directory)
        else:
            data_dict[image_rel_path] = output
        # image_name = os.path.basename(s_image_name)
        #
        # if args.individual_feat:
        #         # image_name = os.path.basename(s_image_name)
        #
        #         np.save(os.path.join(feature_save_directory, '{}.npy'.format(image_name)), output)
        #         # created_paths.add(image_directory)
        # else:
        #         data_dict[get_stem(image_name)] = output

    if args.individual_feat:
        print_func("Done")
    else:
        from PyUtils.pickle_utils import save2pickle
        print_func("Saving to a single big file!")

        save2pickle(feature_save_directory, data_dict)
        print_func("Done")
Esempio n. 5
0
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="Pytorch Image CNN training from Configure Files")
    parser.add_argument(
        '--config_file',
        required=True,
        help="This scripts only accepts parameters from Json files")
    input_args = parser.parse_args()

    config_file = input_args.config_file

    args = parse_config(config_file)
    if args.name is None:
        args.name = get_stem(config_file)

    torch.set_default_tensor_type('torch.FloatTensor')
    # best_prec1 = 0

    args.script_name = get_stem(__file__)
    # current_time_str = get_date_str()

    print_func = print

    if args.device:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.device

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size)

    if args.pretrained:
        print_func("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True,
                                           num_classes=args.num_classes)
    else:
        print_func("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=False,
                                           num_classes=args.num_classes)

    if args.gpu is not None:
        model = model.cuda(args.gpu)
    elif args.distributed:
        model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(model)
    else:
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()
            # model = model.cuda()

    if args.visual_model:
        if os.path.isfile(args.visual_model):
            print_func("=> loading checkpoint '{}'".format(args.visual_model))
            checkpoint = torch.load(args.visual_model)
            model.load_state_dict(checkpoint['state_dict'], strict=True)
            # import collections
            # if isinstance(checkpoint, collections.OrderedDict):
            #     load_state_dict(model, checkpoint, exclude_layers=['fc.weight', 'fc.bias'])
            #
            #
            # else:
            #     load_state_dict(model, checkpoint['state_dict'], exclude_layers=['module.fc.weight', 'module.fc.bias'])
            #     print_func("=> loaded checkpoint '{}' (epoch {})"
            #           .format(args.visual_model, checkpoint['epoch']))
        else:
            print_func("=> no checkpoint found at '{}'".format(
                args.visual_model))
            return
    else:
        print_func(
            "=> This script is for fine-tuning only, please double check '{}'".
            format(args.visual_model))
        print_func("Now using randomly initialized parameters!")

    cudnn.benchmark = True

    from PyUtils.pickle_utils import loadpickle

    import numpy as np
    from PublicEmotionDatasets.Emotic.constants import emotion_full_words_690 as emotion_self_words

    from torchvision.datasets.folder import default_loader
    tag_wordvectors = loadpickle(args.text_embed)

    print_func(" => loading word2vec parameters: {}".format(args.text_embed))

    emotic_emotion_explaintations = {}

    for x_key in emotion_self_words:
        x_words = emotion_self_words[x_key].split(',')
        x_feature = [tag_wordvectors[x] for x in x_words]

        item = {}
        item['pred'] = []
        item['label'] = []
        item['target_matrix'] = np.array(x_feature)
        item['description'] = x_words
        emotic_emotion_explaintations[x_key] = item

    val_list = loadpickle(args.val_file)
    image_directory = args.data_dir
    from CNNs.datasets.multilabel import get_val_simple_transform
    val_transform = get_val_simple_transform()
    model.eval()

    import tqdm
    for i, (input_image_file, target, _,
            _) in tqdm.tqdm(enumerate(val_list),
                            desc="Evaluating Peace",
                            total=len(val_list)):
        # measure data loading time

        image_path = os.path.join(image_directory, input_image_file)
        input_image = default_loader(image_path)
        input_image = val_transform(input_image)

        if args.gpu is not None:
            input_image = input_image.cuda(args.gpu, non_blocking=True)
        input_image = input_image.unsqueeze(0).cuda()

        # target_idx = target.nonzero() [:,1]

        # compute output
        output, output_proj = model(input_image)

        output_proj = output_proj.cpu().data.numpy()
        target_labels = set([x[0] for x in target.most_common()])

        for x_key in emotic_emotion_explaintations:

            dot_product_label = cosine_similarity(
                output_proj,
                emotic_emotion_explaintations[x_key]['target_matrix'])[0]
            pred_score = np.average(dot_product_label)
            emotic_emotion_explaintations[x_key]['pred'].append(pred_score)
            if x_key in target_labels:
                emotic_emotion_explaintations[x_key]['label'].append(1)
            else:
                emotic_emotion_explaintations[x_key]['label'].append(0)

    from sklearn.metrics import average_precision_score
    full_AP = []
    for x_key in emotic_emotion_explaintations:
        full_pred = np.array(emotic_emotion_explaintations[x_key]['pred'])
        full_label = np.array(emotic_emotion_explaintations[x_key]['label'])
        AP = average_precision_score(full_label, full_pred)
        if np.isnan(AP):
            print("{} is Nan".format(x_key))
            continue
        full_AP.append(AP)
        print("{}\t{:.4f}".format(x_key, AP * 100))
    AvgAP = np.mean(full_AP)
    print("Avg AP: {:.2f}".format(AvgAP * 100))
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="Pytorch Image CNN training from Configure Files")
    parser.add_argument(
        '--config_file',
        required=True,
        help="This scripts only accepts parameters from Json files")
    input_args = parser.parse_args()

    config_file = input_args.config_file

    args = parse_config(config_file)
    if args.name is None:
        args.name = get_stem(config_file)

    torch.set_default_tensor_type('torch.FloatTensor')
    best_prec1 = 0

    args.script_name = get_stem(__file__)
    current_time_str = get_date_str()
    if args.save_directory is None:
        save_directory = get_dir(
            os.path.join(project_root, args.ckpts_dir,
                         '{:s}'.format(args.name),
                         '{:s}-{:s}'.format(args.ID, current_time_str)))
    else:
        save_directory = get_dir(
            os.path.join(project_root, args.ckpts_dir, args.save_directory))

    print("Save to {}".format(save_directory))
    log_file = os.path.join(save_directory,
                            'log-{0}.txt'.format(current_time_str))
    logger = log_utils.get_logger(log_file)
    log_utils.print_config(vars(args), logger)

    print_func = logger.info
    print_func('ConfigFile: {}'.format(config_file))
    args.log_file = log_file

    if args.device:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.device

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size)

    if args.pretrained:
        print_func("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True,
                                           num_classes=args.num_classes)
    else:
        print_func("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=False,
                                           num_classes=args.num_classes)

    if args.freeze:
        model = CNN_utils.freeze_all_except_fc(model)

    if args.gpu is not None:
        model = model.cuda(args.gpu)
    elif args.distributed:
        model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(model)
    else:
        print_func(
            'Please only specify one GPU since we are working in batch size 1 model'
        )
        return

    if args.resume:
        if os.path.isfile(args.resume):
            print_func("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            import collections
            if not args.evaluate:
                if isinstance(checkpoint, collections.OrderedDict):
                    load_state_dict(model,
                                    checkpoint,
                                    exclude_layers=['fc.weight', 'fc.bias'])

                else:
                    load_state_dict(
                        model,
                        checkpoint['state_dict'],
                        exclude_layers=['module.fc.weight', 'module.fc.bias'])
                    print_func("=> loaded checkpoint '{}' (epoch {})".format(
                        args.resume, checkpoint['epoch']))
            else:
                if isinstance(checkpoint, collections.OrderedDict):
                    load_state_dict(model, checkpoint, strict=True)

                else:
                    load_state_dict(model,
                                    checkpoint['state_dict'],
                                    strict=True)
                    print_func("=> loaded checkpoint '{}' (epoch {})".format(
                        args.resume, checkpoint['epoch']))
        else:
            print_func("=> no checkpoint found at '{}'".format(args.resume))
            return
    else:
        print_func(
            "=> This script is for fine-tuning only, please double check '{}'".
            format(args.resume))
        print_func("Now using randomly initialized parameters!")

    cudnn.benchmark = True

    model_total_params = sum(p.numel() for p in model.parameters())
    model_grad_params = sum(p.numel() for p in model.parameters()
                            if p.requires_grad)
    print_func("Total Parameters: {0}\t Gradient Parameters: {1}".format(
        model_total_params, model_grad_params))

    # Data loading code
    # val_dataset = get_instance(custom_datasets, '{0}'.format(args.valloader), args)
    from PyUtils.pickle_utils import loadpickle
    from torchvision.datasets.folder import default_loader

    val_dataset = loadpickle(args.val_file)
    image_directory = args.data_dir
    from CNNs.datasets.multilabel import get_val_simple_transform
    val_transform = get_val_simple_transform()
    import tqdm
    import numpy as np

    if args.individual_feat:
        feature_save_directory = get_dir(
            os.path.join(save_directory, 'individual-features'))
        created_paths = set()
    else:
        data_dict = {}
        feature_save_directory = os.path.join(save_directory, 'feature.pkl')

    model.eval()

    for s_data in tqdm.tqdm(val_dataset, desc="Extracting Features"):
        if s_data is None:
            continue

        image_path = os.path.join(image_directory, s_data[0])

        try:
            input_image = default_loader(image_path)
        except:
            print("WARN: {} Problematic!, Skip!".format(image_path))

            continue

        input_image = val_transform(input_image)

        if args.gpu is not None:
            input_image = input_image.cuda(args.gpu, non_blocking=True)

        output = model(input_image.unsqueeze_(0))
        output = output.cpu().data.numpy()
        # image_rel_path = os.path.join(*(s_image_name.split(os.sep)[-int(args.rel_path_depth):]))

        if args.individual_feat:
            if image_directory in created_paths:
                np.save(
                    os.path.join(feature_save_directory,
                                 '{}.npy'.format(s_data[0])), output)
            else:
                get_dir(os.path.join(feature_save_directory, image_directory))
                np.save(
                    os.path.join(feature_save_directory,
                                 '{}.npy'.format(s_data[0])), output)
                created_paths.add(image_directory)
        else:
            data_dict[s_data[0]] = output
        # image_name = os.path.basename(s_image_name)
        #
        # if args.individual_feat:
        #         # image_name = os.path.basename(s_image_name)
        #
        #         np.save(os.path.join(feature_save_directory, '{}.npy'.format(image_name)), output)
        #         # created_paths.add(image_directory)
        # else:
        #         data_dict[get_stem(image_name)] = output

    if args.individual_feat:
        print_func("Done")
    else:
        from PyUtils.pickle_utils import save2pickle
        print_func("Saving to a single big file!")

        save2pickle(feature_save_directory, data_dict)
        print_func("Done")
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="Pytorch Image CNN training from Configure Files")
    parser.add_argument(
        '--config_file',
        required=True,
        help="This scripts only accepts parameters from Json files")
    input_args = parser.parse_args()

    config_file = input_args.config_file

    args = parse_config(config_file)
    if args.name is None:
        args.name = get_stem(config_file)

    torch.set_default_tensor_type('torch.FloatTensor')
    best_prec1 = 0

    args.script_name = get_stem(__file__)
    current_time_str = get_date_str()

    print_func = print

    if args.device:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.device

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size)

    if args.pretrained:
        print_func("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True,
                                           num_classes=args.num_classes)
    else:
        print_func("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=False,
                                           num_classes=args.num_classes)

    if args.gpu is not None:
        model = model.cuda(args.gpu)
    elif args.distributed:
        model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(model)
    else:
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            # model = torch.nn.DataParallel(model).cuda()
            model = model.cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print_func("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            import collections
            if isinstance(checkpoint, collections.OrderedDict):
                load_state_dict(model,
                                checkpoint,
                                exclude_layers=['fc.weight', 'fc.bias'])

            else:
                load_state_dict(
                    model,
                    checkpoint['state_dict'],
                    exclude_layers=['module.fc.weight', 'module.fc.bias'])
                print_func("=> loaded checkpoint '{}' (epoch {})".format(
                    args.resume, checkpoint['epoch']))
        else:
            print_func("=> no checkpoint found at '{}'".format(args.resume))
            return
    else:
        print_func(
            "=> This script is for fine-tuning only, please double check '{}'".
            format(args.resume))
        print_func("Now using randomly initialized parameters!")

    cudnn.benchmark = True

    from PyUtils.pickle_utils import loadpickle
    # from PublicEmotionDatasets.Deepemotion.constants import emotion2idx, idx2emotion
    from PyUtils.dict_utils import string_list2dict
    import numpy as np
    from torchvision.datasets.folder import default_loader
    tag_wordvectors = loadpickle(
        '/home/zwei/Dev/AttributeNet3/TextClassification/visualizations/Embeddings/FullVocab_BN_transformed_l2_regularization.pkl'
    )
    tag_words = []
    tag_matrix = []
    label_words = []
    label_matrix = []
    from TextClassification.model_DAN_2constraints import CNN
    text_ckpt = torch.load(
        '/home/zwei/Dev/AttributeNet3/TextClassification/models/model_feature_regularization.pth.tar'
    )
    text_saved_model = text_ckpt['model']
    params = {
        "MAX_SENT_LEN": text_saved_model['MAX_SENT_LEN'],
        "BATCH_SIZE": text_saved_model['BATCH_SIZE'],
        "WORD_DIM": text_saved_model['WORD_DIM'],
        "FILTER_NUM": text_saved_model['FILTER_NUM'],
        "VOCAB_SIZE": text_saved_model['VOCAB_SIZE'],
        "CLASS_SIZE": text_saved_model['CLASS_SIZE'],
        "DROPOUT_PROB": 0.5,
    }

    text_model = CNN(**params).cuda()

    text_model.load_state_dict(text_saved_model.state_dict(), strict=True)
    embedding_tag2idx = text_ckpt['tag2idx']
    text_model.eval()

    from torch.autograd import Variable

    target_keywords_list = ['peace', 'relaxed', 'satisfied']  # peace

    emotion_category = 'Peace'

    target_padded_list = target_keywords_list + [
        len(text_saved_model['VOCAB_SIZE'])
    ] * (text_saved_model['MAX_SENT_LEN'] - len(target_keywords_list))
    target_vector = Variable(
        torch.LongTensor(target_padded_list).unsqueeze(0)).cuda()

    target_feature, _, _ = model(target_vector)
    target_feature = target_feature.squeeze(0)

    #
    # target_keywords_list = ['engagement', 'curious', 'interested']
    # emotion_category = 'Engagement'
    #
    # target_keywords_list = ['embarrassment', 'ashamed', 'guilty', 'shame']
    # emotion_category = 'Embarrassment'

    for x_tag in target_keywords_list:
        tag_matrix.append(tag_wordvectors[x_tag])

    tag_matrix = np.array(tag_matrix)
    tag_matrix = tag_matrix.squeeze(1)
    val_list = loadpickle(
        '/home/zwei/datasets/PublicEmotion/EMOTIC/z_data/test_image_based_single_person_only.pkl'
    )
    image_directory = '/home/zwei/datasets/PublicEmotion/EMOTIC/images'
    from CNNs.datasets.multilabel import get_val_simple_transform
    val_transform = get_val_simple_transform()
    model.eval()

    correct = 0
    total = len(val_list) * 1.0
    full_label = []
    full_pred = []
    import tqdm
    for i, (input_image_file, target, _,
            _) in tqdm.tqdm(enumerate(val_list),
                            desc="Evaluating Peace",
                            total=len(val_list)):
        # measure data loading time

        image_path = os.path.join(image_directory, input_image_file)
        input_image = default_loader(image_path)
        input_image = val_transform(input_image)

        if args.gpu is not None:
            input_image = input_image.cuda(args.gpu, non_blocking=True)
        input_image = input_image.unsqueeze(0).cuda()

        # target_idx = target.nonzero() [:,1]

        # compute output
        output, output_proj = model(input_image)

        output_proj = output_proj.cpu().data.numpy()

        dot_product_label = cosine_similarity(output_proj, tag_matrix)[0]
        pred_score = np.max(dot_product_label)
        full_pred.append(pred_score)
        target_labels = set([x[0] for x in target.most_common()])
        if emotion_category in target_labels:
            full_label.append(1)
        else:
            full_label.append(0)

    from sklearn.metrics import average_precision_score
    full_pred = np.array(full_pred)
    full_label = np.array(full_label)
    AP = average_precision_score(full_label, full_pred)
    print("DB")
Esempio n. 8
0
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="Pytorch Image CNN training from Configure Files")
    parser.add_argument(
        '--config_file',
        required=True,
        help="This scripts only accepts parameters from Json files")
    input_args = parser.parse_args()

    config_file = input_args.config_file

    args = parse_config(config_file)
    if args.name is None:
        args.name = get_stem(config_file)

    torch.set_default_tensor_type('torch.FloatTensor')

    args.script_name = get_stem(__file__)
    current_time_str = get_date_str()
    if args.resume is None:
        if args.save_directory is None:
            save_directory = get_dir(
                os.path.join(project_root, 'ckpts', '{:s}'.format(args.name),
                             '{:s}-{:s}'.format(args.ID, current_time_str)))
        else:
            save_directory = get_dir(
                os.path.join(project_root, 'ckpts', args.save_directory))
    else:
        if args.save_directory is None:
            save_directory = os.path.dirname(args.resume)
        else:
            current_time_str = get_date_str()
            save_directory = get_dir(
                os.path.join(args.save_directory, '{:s}'.format(args.name),
                             '{:s}-{:s}'.format(args.ID, current_time_str)))
    print("Save to {}".format(save_directory))
    log_file = os.path.join(save_directory,
                            'log-{0}.txt'.format(current_time_str))
    logger = log_utils.get_logger(log_file)
    log_utils.print_config(vars(args), logger)

    print_func = logger.info
    print_func('ConfigFile: {}'.format(config_file))
    args.log_file = log_file

    if args.device:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.device

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    #args.distributed = args.world_size > 1
    args.distributed = False

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size)

    num_datasets = args.num_datasets
    # model_list = [None for x in range(num_datasets)]
    # for j in range(num_datasets):
    if args.pretrained:
        print_func("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True,
                                           num_classes=args.class_len)
    else:
        print_func("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=False,
                                           num_classes=args.class_len)

    if args.freeze:
        model = CNN_utils.freeze_all_except_fc(model)

    if args.gpu is not None:
        model = model.cuda(args.gpu)
    elif args.distributed:
        model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(model)
    else:
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # # define loss function (criterion) and optimizer
    # # # Update: here
    # # config = {'loss': {'type': 'simpleCrossEntropyLoss', 'args': {'param': None}}}
    # # criterion = get_instance(loss_funcs, 'loss', config)
    # # criterion = criterion.cuda(args.gpu)
    #
    criterion = nn.CrossEntropyLoss(ignore_index=-1).cuda(args.gpu)
    # criterion = MclassCrossEntropyLoss().cuda(args.gpu)

    # params = list()
    # for j in range(num_datasets):
    #     params += list(model_list[j].parameters())

    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                       model.parameters()),
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.lr_schedule:
        print_func("Using scheduled learning rate")
        scheduler = lr_scheduler.MultiStepLR(
            optimizer, [int(i) for i in args.lr_schedule.split(',')],
            gamma=0.1)
    else:
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                   'min',
                                                   patience=args.lr_patience)
    '''
    if args.resume:
        if os.path.isfile(args.resume):
            print_func("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            import collections
            if not args.evaluate:
                if isinstance(checkpoint, collections.OrderedDict):
                    load_state_dict(model, checkpoint, exclude_layers=['fc.weight', 'fc.bias'])


                else:
                    load_state_dict(model, checkpoint['state_dict'], exclude_layers=['module.fc.weight', 'module.fc.bias'])
                    print_func("=> loaded checkpoint '{}' (epoch {})"
                          .format(args.resume, checkpoint['epoch']))
            else:
                if isinstance(checkpoint, collections.OrderedDict):
                    load_state_dict(model, checkpoint, strict=True)
                else:
                    load_state_dict(model, checkpoint['state_dict'], strict=True)
                    print_func("=> loaded checkpoint '{}' (epoch {})"
                               .format(args.resume, checkpoint['epoch']))
        else:
            print_func("=> no checkpoint found at '{}'".format(args.resume))
            return
    '''

    cudnn.benchmark = True

    model_total_params = sum(p.numel() for p in model.parameters())
    model_grad_params = sum(p.numel() for p in model.parameters()
                            if p.requires_grad)
    print_func("Total Parameters: {0}\t Gradient Parameters: {1}".format(
        model_total_params, model_grad_params))

    # Data loading code
    val_loaders = [None for x in range(num_datasets)]
    test_loaders = [None for x in range(num_datasets)]
    train_loaders = [None for x in range(num_datasets)]
    num_iter = 0
    for k in range(num_datasets):
        args.ind = k

        val_dataset = get_instance(custom_datasets, args.val_loader, args)
        if val_dataset is None or k == num_datasets - 1:
            val_loaders[args.ind] = None
        else:
            val_loaders[args.ind] = torch.utils.data.DataLoader(
                val_dataset,
                batch_size=args.batch_size,
                shuffle=False,
                num_workers=args.workers,
                pin_memory=True,
                collate_fn=none_collate)

        if hasattr(args, 'test_files') and hasattr(args, 'test_loader'):
            test_dataset = get_instance(custom_datasets, args.test_loader,
                                        args)
            test_loaders[args.ind] = torch.utils.data.DataLoader(
                test_dataset,
                batch_size=args.batch_size,
                shuffle=False,
                num_workers=args.workers,
                pin_memory=True,
                collate_fn=none_collate)
        else:
            # test_dataset = None
            test_loaders[args.ind] = None

        #if args.evaluate:
        #    validate(test_loaders[args.ind], model_list[k], criterion, args, print_func)
        #    return
        # if not args.evaluate: #else:
        #     train_samplers = [None for x in range(num_datasets)]
        #     train_dataset = get_instance(custom_datasets, args.train_loader, args)
        #
        #     if args.distributed:
        #         train_samplers[args.ind] = torch.utils.data.distributed.DistributedSampler(train_dataset)
        #     else:
        #         train_samplers[args.ind] = None
        #
        #     train_loaders[args.ind] = torch.utils.data.DataLoader(
        #         train_dataset, batch_size=args.batch_size, shuffle=(train_samplers[args.ind] is None),
        #         num_workers=args.workers, pin_memory=True, sampler=train_samplers[args.ind], collate_fn=none_collate)
        if not args.evaluate:  #else:
            # train_samplers = [None for x in range(num_datasets)]
            train_dataset = get_instance(custom_datasets, args.train_loader,
                                         args)

            num_iter = max(num_iter, len(train_dataset.samples))
            if args.distributed:
                train_samplers = torch.utils.data.distributed.DistributedSampler(
                    train_dataset)
            else:
                train_samplers = None

            train_loaders[args.ind] = torch.utils.data.DataLoader(
                train_dataset,
                batch_size=args.batch_size,
                shuffle=train_samplers is None,
                num_workers=args.workers,
                pin_memory=True,
                sampler=train_samplers,
                collate_fn=none_collate)
    setattr(args, 'num_iter', num_iter)

    # TRAINING
    best_prec1 = [-1 for _ in range(num_datasets)]
    is_best = [None for _ in range(num_datasets)]
    setattr(args, 'lam', 0.5)

    start_data_time = time.time()
    train_loads_iters = [iter(train_loaders[x]) for x in range(num_datasets)]
    print_func("Loaded data in {:.3f} s".format(time.time() - start_data_time))
    for epoch in range(args.start_epoch, args.epochs):

        if args.distributed:
            for x in range(num_datasets):
                train_samplers[x].set_epoch(epoch)
        if args.lr_schedule:
            # CNN_utils.adjust_learning_rate(optimizer, epoch, args.lr)
            scheduler.step()
        current_lr = optimizer.param_groups[0]['lr']

        print_func("Epoch: [{}], learning rate: {}".format(epoch, current_lr))

        # train for one epoch
        train(train_loads_iters, train_loaders, model, criterion, optimizer,
              epoch, args, print_func)

        # evaluate and save
        val_prec1 = [None for x in range(num_datasets)]
        test_prec1 = [None for x in range(num_datasets)]
        for j in range(num_datasets):
            # if j != args.ind:
            #     load_state_dict(model_list[j], model_list[args.ind].state_dict())
            # evaluate on validation set
            if val_loaders[j]:
                val_prec1[j], _ = validate(val_loaders[j], model, criterion,
                                           args, print_func, j)
            else:
                val_prec1[j] = 0
            # remember best prec@1 and save checkpoint
            is_best[j] = val_prec1[j] > best_prec1[j]
            best_prec1[j] = max(val_prec1[j], best_prec1[j])

            if is_best[j]:
                save_ind = j
            else:
                save_ind = "#"
            CNN_utils.save_checkpoint(
                {
                    'epoch': epoch,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1[j],
                    'optimizer': optimizer.state_dict(),
                },
                is_best[j],
                file_directory=save_directory,
                epoch=epoch,
                save_best_only=args.save_best_only,
                ind=save_ind)

            test_prec1[j], _ = validate(test_loaders[j],
                                        model,
                                        criterion,
                                        args,
                                        print_func,
                                        j,
                                        phase='Test')

        print_func("Val precisions: {}".format(val_prec1))
        print_func("Test precisions: {}".format(test_prec1))
def main():
    best_prec1 = 0

    args = parser.parser.parse_args()
    config_file = None
    if args.config is not None:
        config_file = args.config
        args = parse_config(args.config)

    script_name_stem = get_stem(__file__)
    current_time_str = get_date_str()
    if args.resume is None:
        if args.save_directory is None:
            save_directory = get_dir(
                os.path.join(
                    project_root, 'ckpts', '{:s}'.format(args.name),
                    '{:s}-{:s}-{:s}'.format(script_name_stem, args.ID,
                                            current_time_str)))
        else:
            save_directory = get_dir(
                os.path.join(project_root, 'ckpts', args.save_directory))
    else:
        save_directory = os.path.dirname(args.resume)
    print("Save to {}".format(save_directory))
    log_file = os.path.join(save_directory,
                            'log-{0}.txt'.format(current_time_str))
    logger = log_utils.get_logger(log_file)
    log_utils.print_config(vars(args), logger)

    print_func = logger.info
    if config_file is not None:
        print_func('ConfigFile: {}'.format(config_file))
    else:
        print_func('ConfigFile: None, params from argparse')

    args.log_file = log_file

    if args.device:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.device

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size)

    # create model
    if args.arch == 'resnet50otherinits':
        print_func("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True,
                                           num_classes=args.num_classes,
                                           param_name=args.paramname)
    elif args.arch == 'resnet50_feature_extractor':
        print_func("=> using pre-trained model '{}' to LOAD FEATURES".format(
            args.arch))
        model = models.__dict__[args.arch](pretrained=True,
                                           num_classes=args.num_classes,
                                           param_name=args.paramname)

    else:
        if args.pretrained:
            print_func("=> using pre-trained model '{}'".format(args.arch))
            model = models.__dict__[args.arch](pretrained=True,
                                               num_classes=args.num_classes)
        else:
            print_func("=> creating model '{}'".format(args.arch))
            model = models.__dict__[args.arch](pretrained=False,
                                               num_classes=args.num_classes)

    if args.freeze:
        model = CNN_utils.freeze_all_except_fc(model)

    if args.gpu is not None:
        model = model.cuda(args.gpu)
    elif args.distributed:
        model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(model)
    else:
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    # # Update: here
    # config = {'loss': {'type': 'simpleCrossEntropyLoss', 'args': {'param': None}}}
    # criterion = get_instance(loss_funcs, 'loss', config)
    # criterion = criterion.cuda(args.gpu)

    criterion = nn.CrossEntropyLoss(ignore_index=-1).cuda(args.gpu)

    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                       model.parameters()),
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.lr_schedule:
        print_func("Using scheduled learning rate")
        scheduler = lr_scheduler.MultiStepLR(
            optimizer, [int(i) for i in args.lr_schedule.split(',')],
            gamma=0.1)
    else:
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                   'min',
                                                   patience=args.lr_patience)

    # optimizer = torch.optim.SGD(model.parameters(), args.lr,
    #                             momentum=args.momentum,
    #                             weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print_func("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print_func("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print_func("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    model_total_params = sum(p.numel() for p in model.parameters())
    model_grad_params = sum(p.numel() for p in model.parameters()
                            if p.requires_grad)
    print_func("Total Parameters: {0}\t Gradient Parameters: {1}".format(
        model_total_params, model_grad_params))

    # Data loading code
    val_dataset = get_instance(custom_datasets,
                               '{0}_val'.format(args.dataset.name), args,
                               **args.dataset.args)
    if val_dataset is None:
        val_loader = None
    else:
        val_loader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=True,
                                                 collate_fn=none_collate)

    if args.evaluate:
        validate(val_loader, model, criterion, args, print_func)
        return
    else:

        train_dataset = get_instance(custom_datasets,
                                     '{0}_train'.format(args.dataset.name),
                                     args, **args.dataset.args)

        if args.distributed:
            train_sampler = torch.utils.data.distributed.DistributedSampler(
                train_dataset)
        else:
            train_sampler = None

        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=args.batch_size,
            shuffle=(train_sampler is None),
            num_workers=args.workers,
            pin_memory=True,
            sampler=train_sampler,
            collate_fn=none_collate)

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        if args.lr_schedule:
            # CNN_utils.adjust_learning_rate(optimizer, epoch, args.lr)
            scheduler.step()
        current_lr = optimizer.param_groups[0]['lr']

        print_func("Epoch: [{}], learning rate: {}".format(epoch, current_lr))

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args,
              print_func)

        # evaluate on validation set
        if val_loader:
            prec1, val_loss = validate(val_loader, model, criterion, args,
                                       print_func)
        else:
            prec1 = 0
            val_loss = 0
        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        CNN_utils.save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            file_directory=save_directory,
            epoch=epoch)

        if not args.lr_schedule:
            scheduler.step(val_loss)
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="Pytorch Image CNN training from Configure Files")
    parser.add_argument(
        '--config_file',
        required=True,
        help="This scripts only accepts parameters from Json files")
    input_args = parser.parse_args()

    config_file = input_args.config_file

    args = parse_config(config_file)
    if args.name is None:
        args.name = get_stem(config_file)

    torch.set_default_tensor_type('torch.FloatTensor')
    best_prec1 = 0

    args.script_name = get_stem(__file__)
    current_time_str = get_date_str()

    print_func = print

    if args.device:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.device

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size)

    if args.pretrained:
        print_func("=> using pre-trained model '{}'".format(args.arch))
        visual_model = models.__dict__[args.arch](pretrained=True,
                                                  num_classes=args.num_classes)
    else:
        print_func("=> creating model '{}'".format(args.arch))
        visual_model = models.__dict__[args.arch](pretrained=False,
                                                  num_classes=args.num_classes)

    if args.gpu is not None:
        visual_model = visual_model.cuda(args.gpu)
    elif args.distributed:
        visual_model.cuda()
        visual_model = torch.nn.parallel.DistributedDataParallel(visual_model)
    else:
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            visual_model.features = torch.nn.DataParallel(
                visual_model.features)
            visual_model.cuda()
        else:
            # model = torch.nn.DataParallel(model).cuda()
            visual_model = visual_model.cuda()

    from PyUtils.pickle_utils import loadpickle

    import numpy as np
    from PublicEmotionDatasets.Emotic.constants import emotion_explainations_words_690 as emotion_self_words

    from torchvision.datasets.folder import default_loader
    tag_wordvectors = loadpickle(
        '/home/zwei/Dev/AttributeNet3/TextClassification/visualizations/Embeddings/FullVocab_BN_transformed_l2_regularization.pkl'
    )
    tag_words = []
    tag_matrix = []
    label_words = []
    label_matrix = []
    from TextClassification.model_DAN_2constraints import CNN_Embed_v2 as CNN
    text_ckpt = torch.load(
        '/home/zwei/Dev/AttributeNet3/TextClassification/models/model_feature_regularization.pth.tar'
    )
    text_saved_model = text_ckpt['model']
    params = {
        "MAX_SENT_LEN": text_saved_model.MAX_SENT_LEN,
        "BATCH_SIZE": text_saved_model.BATCH_SIZE,
        "WORD_DIM": text_saved_model.WORD_DIM,
        "FILTER_NUM": text_saved_model.FILTER_NUM,
        "VOCAB_SIZE": text_saved_model.VOCAB_SIZE,
        "CLASS_SIZE": text_saved_model.CLASS_SIZE,
        "DROPOUT_PROB": 0.5,
    }

    text_generator = CNN(**params).cuda()

    text_generator.load_state_dict(text_saved_model.state_dict(), strict=True)
    embedding_tag2idx = text_ckpt['tag2idx']
    text_generator.eval()

    text_model = Text_Transformation(300, 300, 8)
    text_model = text_model.cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print_func("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            # text_model.load_state_dict(checkpoint['text_state_dict'])
            load_state_dict(text_model, checkpoint['text_state_dict'])

            import collections
            if isinstance(checkpoint, collections.OrderedDict):
                load_state_dict(visual_model,
                                checkpoint,
                                exclude_layers=['fc.weight', 'fc.bias'])

            else:
                load_state_dict(
                    visual_model,
                    checkpoint['state_dict'],
                    exclude_layers=['module.fc.weight', 'module.fc.bias'])
                print_func("=> loaded checkpoint '{}' (epoch {})".format(
                    args.resume, checkpoint['epoch']))
        else:
            print_func("=> no checkpoint found at '{}'".format(args.resume))
            return
    else:
        print_func(
            "=> This script is for fine-tuning only, please double check '{}'".
            format(args.resume))
        print_func("Now using randomly initialized parameters!")

    cudnn.benchmark = True

    from torch.autograd import Variable

    emotic_emotion_explaintations = {}

    for x_key in emotion_self_words:
        x_words = emotion_self_words[x_key].split(',')
        x_feature = [embedding_tag2idx[x] for x in x_words] + \
                    [text_saved_model.VOCAB_SIZE+1]*(text_saved_model.MAX_SENT_LEN - len(x_words))
        x_feature = Variable(torch.LongTensor(x_feature).unsqueeze(0)).cuda()

        tag_matrix = text_generator(x_feature)
        _, tag_feature = text_model(tag_matrix)
        # tag_matrix = tag_matrix.squeeze(1)
        item = {}
        item['pred'] = []
        item['label'] = []
        item['target_matrix'] = tag_feature.cpu().data.numpy()
        item['description'] = x_words
        emotic_emotion_explaintations[x_key] = item

    val_list = loadpickle(
        '/home/zwei/datasets/PublicEmotion/EMOTIC/z_data/test_image_based_single_person_only.pkl'
    )
    image_directory = '/home/zwei/datasets/PublicEmotion/EMOTIC/images'
    from CNNs.datasets.multilabel import get_val_simple_transform
    val_transform = get_val_simple_transform()
    visual_model.eval()

    import tqdm
    for i, (input_image_file, target, _,
            _) in tqdm.tqdm(enumerate(val_list),
                            desc="Evaluating Peace",
                            total=len(val_list)):
        # measure data loading time

        image_path = os.path.join(image_directory, input_image_file)
        input_image = default_loader(image_path)
        input_image = val_transform(input_image)

        if args.gpu is not None:
            input_image = input_image.cuda(args.gpu, non_blocking=True)
        input_image = input_image.unsqueeze(0).cuda()

        # target_idx = target.nonzero() [:,1]

        # compute output
        output, output_proj = visual_model(input_image)

        output_proj = output_proj.cpu().data.numpy()
        target_labels = set([x[0] for x in target.most_common()])

        for x_key in emotic_emotion_explaintations:

            dot_product_label = cosine_similarity(
                output_proj,
                emotic_emotion_explaintations[x_key]['target_matrix'])[0]
            pred_score = np.average(dot_product_label)
            emotic_emotion_explaintations[x_key]['pred'].append(pred_score)
            if x_key in target_labels:
                emotic_emotion_explaintations[x_key]['label'].append(1)
            else:
                emotic_emotion_explaintations[x_key]['label'].append(0)

    from sklearn.metrics import average_precision_score
    full_AP = []
    for x_key in emotic_emotion_explaintations:
        full_pred = np.array(emotic_emotion_explaintations[x_key]['pred'])
        full_label = np.array(emotic_emotion_explaintations[x_key]['label'])
        AP = average_precision_score(full_label, full_pred)
        if np.isnan(AP):
            print("{} is Nan".format(x_key))
            continue
        full_AP.append(AP)
        print("{}\t{:.4f}".format(x_key, AP * 100))
    AvgAP = np.mean(full_AP)
    print("Avg AP: {:.2f}".format(AvgAP * 100))
Esempio n. 11
0
create_directories = []
copy_image_list = []
for s_file in image_list_files:
    image_list = []

    with open(s_file, 'r') as of_:
        lines = of_.readlines()
        for s_line in lines:
            s_line_parts = s_line.strip().split(' ')
            s_image_fullpath = s_line_parts[0]
            s_image_category = int(s_line_parts[1])
            s_image_name_path = os.path.join(
                *s_image_fullpath.split(os.sep)[-2:])
            s_target_full_path = os.path.join(target_image_directory,
                                              s_image_name_path)
            image_list.append([s_image_name_path, s_image_category])
            s_src_full_path = os.path.join(src_image_directory,
                                           s_image_name_path)
            copy_image_list.append([s_src_full_path, s_target_full_path])
            create_directories.append(os.path.dirname(s_target_full_path))
    save2pickle('{}.pkl'.format(get_stem(s_file)), image_list)
for s_created_directory in list(set(create_directories)):
    if not os.path.exists(s_created_directory):
        os.mkdir(s_created_directory)

import tqdm
import shutil

for s_copy_item in tqdm.tqdm(copy_image_list):
    shutil.copyfile(s_copy_item[0], s_copy_item[1])
# Created: 14/Mar/2019 17:20

import tqdm
import os
from PyUtils.pickle_utils import loadpickle
from PublicEmotionDatasets.Advertisement.constants import idx2emotion
from PyUtils.file_utils import get_stem
import shutil

image_directory = '/home/zwei/datasets/PublicEmotion/Advertisement/images'

image_annotations = loadpickle(
    '/home/zwei/datasets/PublicEmotion/Advertisement/train_list.pkl')
target_directories = '/home/zwei/Dev/AttributeNet3/PublicEmotionDatasets/Advertisement/convert_stuff/examples'

for idx, s_image_annotation in enumerate(tqdm.tqdm(image_annotations)):
    if idx % 500 == 0:
        s_image_path = os.path.join(image_directory, s_image_annotation[0])
        if os.path.exists(s_image_path):
            extension_name = s_image_annotation[0].strip().split('.')[1]
            target_name = '{}-{}-{}.{}'.format(
                os.path.dirname(s_image_annotation[0]),
                get_stem(s_image_annotation[0]),
                idx2emotion[s_image_annotation[1] + 1], extension_name)
            target_path = os.path.join(target_directories, target_name)
            shutil.copyfile(s_image_path, target_path)
            print("{}\t{}\t{}".format(s_image_annotation[0],
                                      s_image_annotation[1],
                                      idx2emotion[s_image_annotation[1] + 1]))
        else:
            print("!!!! {} Not Found!".format(s_image_path))