def main():
    text_ckpt = torch.load(
        '/home/zwei/Dev/AttributeNet3/TextClassificationV2/ckpts/TextCNN_googlenews_690Removed_NLTDAN_1M_Static.pth.tar',
        map_location=lambda storage, loc: storage)
    args_model = text_ckpt['args_model']
    args_data = text_ckpt['args_data']
    text_model = TextCNN(args_model)
    model_tag2idx = args_data.tag2idx
    text_model.load_state_dict(text_ckpt['state_dict'], strict=True)
    vocab_idx2tag = loadpickle(
        '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/EmotionNetFinal/tag2idx.pkl'
    )['idx2tag']
    dataset = loadpickle(
        '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/EmotionNetFinal/CNNsplit_tag_labels+full_tagidx_train+face.pkl'
    )
    text_model.eval()
    emotion_tags = loadpickle(
        '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/EmotionNetFinal/etag2idx.pkl'
    )['key2idx']
    idx2emotion = loadpickle(
        '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/EmotionNetFinal/etag2idx.pkl'
    )['idx2key']
    image_url_dict = loadpickle(
        '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/RetrieveSelected778/data_v2/dataset_image_urls.pkl'
    )

    for data_idx, s_data in enumerate(dataset):
        if data_idx % 10000 != 0:
            continue
        x_tags = [vocab_idx2tag[x] for x in s_data[2]]
        x_tag_ids = []
        x_tag_names = []
        x_emotion_tags = []
        for x_tag in x_tags:
            if x_tag in model_tag2idx:
                x_tag_names.append(x_tag)

                x_tag_ids.append(model_tag2idx[x_tag])

            else:
                pass
                # x_tag_ids.append(args_model.vocab_size)

            if x_tag in emotion_tags:
                x_emotion_tags.append(x_tag)

        x_tag_ids = pad_sentences(x_tag_ids, args_model.max_len,
                                  args_model.vocab_size + 1)
        x_tag_ids = torch.LongTensor(x_tag_ids).unsqueeze(0)
        predicts = F.softmax(text_model(x_tag_ids)[0],
                             dim=1).squeeze(0).cpu().data.numpy()
        image_cid = int(get_image_cid_from_url(s_data[0], location=1))
        arg_max_predict = np.argsort(predicts)[::-1][:10]
        if image_cid in image_url_dict:
            print("{}".format(image_url_dict[image_cid]))
            print(", ".join(x_emotion_tags))
            print(", ".join(x_tag_names))
            print(', '.join('{}({:.2f})'.format(idx2emotion[i], predicts[i])
                            for i in arg_max_predict))
Exemple #2
0
def main():
    text_ckpt = torch.load(
        '/home/zwei/Dev/AttributeNet3/TextClassificationV2/ckpts/TextCNN_googlenews_NLT_Static.pth.tar'
    )
    args_model = text_ckpt['args_model']
    args_data = text_ckpt['args_data']
    text_model = TextCNN(args_model)
    model_tag2idx = args_data.tag2idx
    text_model.load_state_dict(text_ckpt['state_dict'], strict=True)
    vocab_idx2tag = loadpickle(
        '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/EmotionNetFinal/tag2idx.pkl'
    )['idx2tag']
    dataset = loadpickle(
        '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/EmotionNetFinal/CNNsplit_tagidx_36534_test.pkl'
    )
    text_model.eval()
    emotion_tags = loadpickle(
        '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/EmotionNetFinal/etag2idx.pkl'
    )['key2idx']

    image_url_dict = loadpickle(
        '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/RetrieveSelected778/data_v2/dataset_image_urls.pkl'
    )
    new_dataset = []

    for data_idx, s_data in tqdm.tqdm(enumerate(dataset), total=len(dataset)):

        x_tags = [vocab_idx2tag[x] for x in s_data[1]]
        x_tag_ids = []
        x_tag_names = []
        x_emotion_tags = []
        for x_tag in x_tags:
            if x_tag in model_tag2idx:
                x_tag_ids.append(model_tag2idx[x_tag])
                x_tag_names.append(x_tag)
                if x_tag in emotion_tags:
                    x_emotion_tags.append(x_tag)
        x_tag_ids = pad_sentences(x_tag_ids, args_model.max_len,
                                  args_model.vocab_size + 1)
        x_tag_ids = torch.LongTensor(x_tag_ids).unsqueeze(0)
        predicts = F.softmax(text_model(x_tag_ids)[0],
                             dim=1).squeeze(0).cpu().data.numpy()
        new_dataset.append([s_data[0], predicts.tolist()])

        if data_idx % 50000 == 0:
            image_cid = int(get_image_cid_from_url(s_data[0], location=1))

            if image_cid in image_url_dict:
                print("{}".format(image_url_dict[image_cid]))
                print(", ".join(x_emotion_tags))
                print(", ".join(x_tag_names))
                print(', '.join(
                    '{}({:.2f})'.format(idx2emotion[i], predicts[i])
                    for i in range(len(predicts))))

    save2pickle(
        '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/EmotionNetFinal/CNNsplit_distill8_test.pkl',
        new_dataset)
Exemple #3
0
def read_AMT_complete_mtrain_mtest(data_path=None):

    if data_path is None:
        data_path = '/home/zwei/Dev/AttributeNet3/MturkCollectedData/data/mturk_annotations.pkl'
    annotated_data = loadpickle(data_path)

    predefined_vocabularies = loadpickle(
        '/home/zwei/Dev/AttributeNet3/TextClassification/pre_extract_w2v/params/selftrained_extracted_w2v_wordnet_synsets_py3.pl'
    )

    data = []
    for s_image_cid in tqdm.tqdm(annotated_data,
                                 desc="Processing Annotated Data"):
        s_data = annotated_data[s_image_cid]
        s_image_emotions = []
        for x in s_data['image_emotion']:
            s_image_emotions.extend(x)
        s_image_emotions = Counter(s_image_emotions)

        goodtags = []
        raw_tags = s_data['tags']
        for s_raw_tag in raw_tags:
            if s_raw_tag in predefined_vocabularies:
                goodtags.append(s_raw_tag)
        if len(goodtags) < 1:
            continue
        data.append([goodtags, s_image_emotions, s_image_cid])

    random.seed(0)
    random.shuffle(data)

    dev_idx = len(data) // 10
    val_data = data[:dev_idx]
    train_data = data[dev_idx:]

    updated_train_data = []
    for s_data in train_data:
        goodtags, s_emotion_counter, s_image_cid = s_data

        s_emotion_label = counter2multilabel(s_emotion_counter, emotion2idx)

        updated_train_data.append([goodtags, s_emotion_label, s_image_cid])

    updated_val_data = []
    for s_data in val_data:
        goodtags, s_emotion_counter, s_image_cid = s_data
        s_emotion_label = counter2multilabel(s_emotion_counter, emotion2idx)
        updated_val_data.append([goodtags, s_emotion_label, s_image_cid])

    print("Train: {}\tVal: {}".format(len(updated_train_data),
                                      len(updated_val_data)))

    return updated_train_data, updated_val_data
def deepsentiment_s_test(args):
    image_information = loadpickle(args.test_file)
    dataset = ImageRelLists(image_paths=image_information,
                            image_root=args.data_dir,
                            transform=get_val_simple_transform(),
                            target_transform=None)
    return dataset
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="Pytorch Image CNN training from Configure Files")
    parser.add_argument(
        '--config_file',
        required=True,
        help="This scripts only accepts parameters from Json files")
    input_args = parser.parse_args()

    config_file = input_args.config_file

    args = parse_config(config_file)
    class_lens = args.class_len

    for ind in range(len(class_lens)):
        print("-------------------------------")
        train_dataset = loadpickle(args.train_files[ind])
        print(len(train_dataset))
        image_directory = args.data_dirs[ind]

        for s_data in tqdm.tqdm(train_dataset, desc="Extracting Features"):
            if s_data is None:
                continue

            image_path = os.path.join(image_directory,
                                      s_data[0]).replace("\\", "/")
def multilabel_idxcount_v2_val(args):
    image_information = loadpickle(args.val_file)
    dataset = ImageRelLists(image_paths=image_information,
                            image_root=args.data_dir,
                            transform=get_val_simple_transform(),
                            target_transform=multilabelidxcount2KL(
                                args.num_classes))
    return dataset


# if __name__ == '__main__':
#     # x_transform = multilabel2multihot(500)
#     # x = x_transform([4, 10])
#     # print("DEB")
#     from argparse import Namespace
#     from CNNs.dataloaders.utils import none_collate
#
#     args = Namespace(num_classes=742)
#     annotation_file = '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/RetrieveSelected778/data_v2/CNNsplit_{}.pkl'
#     data_dir = '/home/zwei/datasets/stockimage_742/images-256'
#     dataset = multilabel_val(args, annotation_file, data_dir)
#     val_loader = torch.utils.data.DataLoader(dataset,
#                                              batch_size=10, shuffle=False,
#                                              num_workers=4, pin_memory=True, collate_fn=none_collate)
#     import tqdm
#
#     for s_images, s_labels in tqdm.tqdm(val_loader):
#         pass

# print("Done")
def main():
    """
    img_id_kws = {}
    print("Reading keyword files")
    for file in glob.glob(kw_folder + "*.json"):
        print(file)
        with open(file, 'r') as of_:
            # keyword file
            lines = of_.readlines()
            for l in lines:
                # image
                d = json.loads(l)
                if d['cid'] not in img_id_kws:
                    tags = []
                    for t in d['tags']:
                        words = t.split('^')[0].split()
                        for w in words:
                            tags.append(w)
                    img_id_kws[d['cid']] = tags
    save2pickle(os.path.join(adobe_folder, "img_id_kws.pkl"), img_id_kws)
    """

    img_id_kws = loadpickle(os.path.join(adobe_folder, "img_id_kws.pkl"))
    sentences = list(img_id_kws.values())
    #save2pickle(os.path.join(adobe_folder, "sentences.pkl"), sentences)
    # train word2vec model
    model_folder = "/nfs/bigfovea/add_disk0/eugenia/Emotion/wordembedding_models/"
    model_file = "w2v_adobe.model"
	
    print("Training Word2Vec model")
        model = Word2Vec(sentences, min_count=1, size= 50, workers=16, window=3, sg=1)
def categorical_train(args):
    image_information = loadpickle(args.train_file)
    category_counts = {}
    for s_idx in image_information:
        category_counts[s_idx] = len(image_information[s_idx])
    dataset = SampleLoader(categories=image_information, categories_counts=category_counts, root=args.data_dir, transform=get_train_simple_transform(), target_transform=None, sample_size=args.sample_size)
    return dataset
def multilabel_idxcount_v2_train(args):
    image_information = loadpickle(args.train_file)
    dataset = ImageRelLists(image_paths=image_information,
                            image_root=args.data_dir,
                            transform=get_train_fix_size_transform(),
                            target_transform=multilabelidxcount2KL(
                                args.num_classes))
    return dataset
def categorical_train(args):
    image_categorical_dict = loadpickle(args.train_file)
    dataset = SampleLoader(category_dict=image_categorical_dict,
                           root=args.data_dir,
                           transform=get_train_fix_size_transform(),
                           target_transform=None,
                           sample_size=args.sample_size)
    return dataset
Exemple #11
0
def multilabel_idxcount_v2_val(args):
    image_information = loadpickle(args.val_file)
    dataset = ImageRelLists(image_paths=image_information,
                            image_root=args.data_dir,
                            transform=get_val_simple_transform(),
                            target_transform=multilabelidxcount2multihot(
                                args.num_classes))
    return dataset
Exemple #12
0
def feature_list(args, annotation_file, data_dir, rel_path_h=None):
    image_paths = loadpickle(annotation_file)

    dataset = ImageNamesRelLists(image_paths=image_paths,
                                 image_root=data_dir,
                                 transform=get_val_simple_transform())

    return dataset
Exemple #13
0
def singlelabel_v2_val(args):
    #FIXME:
    image_information = loadpickle(args.val_file)
    dataset = ImageRelLists(image_paths=image_information,
                            image_root=args.data_dir,
                            transform=get_val_simple_transform(),
                            target_transform=None)
    return dataset
def deepsentiment_m_val(args):
    image_information = loadpickle(args.val_files[args.ind])
    dataset = ImageRelLists(
        image_paths=image_information,  #[:n_samples],
        image_root=args.data_dirs[args.ind],
        transform=get_val_simple_transform(),
        target_transform=None)
    return dataset
Exemple #15
0
def singlelabel_test(args, annotation_file, data_dir):
    #FIXME:
    annotation_file = annotation_file.format('test')
    image_information = loadpickle(annotation_file)
    dataset = ImageRelLists(image_paths=image_information,
                            image_root=data_dir,
                            transform=get_val_simple_transform(),
                            target_transform=None)
    return dataset
Exemple #16
0
def simple_multilabel_val(args):
    #FIXME:
    # annotation_file = annotation_file.format('train')
    image_information = loadpickle(args.val_file)
    dataset = ImageRelLists(image_paths=image_information,
                            image_root=args.data_dir,
                            transform=get_val_simple_transform(),
                            target_transform=simple_multitrans())
    return dataset
Exemple #17
0
def multilabel_BCE_test(args, annotation_file, data_dir):
    #FIXME:
    annotation_file = annotation_file.format('test')
    image_information = loadpickle(annotation_file)
    dataset = ImageRelLists(image_paths=image_information,
                            image_root=data_dir,
                            transform=get_val_simple_transform(),
                            target_transform=multilabel2multi1(
                                args.num_classes))
    return dataset
Exemple #18
0
def multilabel_v2_val(args):
    #FIXME:
    # annotation_file = annotation_file.format('val')
    image_information = loadpickle(args.val_file)
    dataset = ImageRelLists(image_paths=image_information,
                            image_root=args.data_dir,
                            transform=get_val_simple_transform(),
                            target_transform=multilabel2multihot(
                                args.num_classes))
    return dataset
Exemple #19
0
def read_690_complete_mtrain_mtest_wo_emotion(data_path=None, subset_N=None):

    if data_path is None:
        data_path = os.path.join(
            project_root,
            'AdobeStockSelection/EmotionNetFinal/CNNsplit_tag_labels+full_tagidx_train+face.pkl'
        )
    annotated_data = loadpickle(data_path)
    emotion2idx = loadpickle(
        os.path.join(
            project_root,
            'AdobeStockSelection/EmotionNetFinal/etag2idx.pkl'))['key2idx']
    idx2tag = loadpickle(
        os.path.join(
            project_root,
            'AdobeStockSelection/EmotionNetFinal/tag2idx.pkl'))['idx2tag']
    # predefined_vocabularies = loadpickle('/home/zwei/Dev/AttributeNet3/TextClassification/pre_extract_w2v/params/selftrained_extracted_w2v_wordnet_synsets_py3.pl')

    data = []
    if subset_N is None:
        subset = annotated_data
    else:
        subset = annotated_data[:subset_N]

    for s_data in tqdm.tqdm(subset, desc="Processing Annotated Data"):

        s_image_cid = int(get_image_cid_from_url(s_data[0], location=1))
        raw_tags = [idx2tag[x] for x in s_data[2]]
        updated_tags = []
        for s_tag in raw_tags:
            if s_tag not in emotion2idx:
                updated_tags.append(s_tag)

        data.append([updated_tags, s_data[1], s_image_cid])

    random.seed(0)
    random.shuffle(data)

    dev_idx = 2000
    val_data = data[:dev_idx]
    train_data = data[dev_idx:]

    return train_data, val_data
def test_690_contain(tag2idx):
    emotion690_vocabulary = loadpickle('/home/zwei/Dev/AttributeNet3/AdobeStockSelection/RetrieveSelected778/data_v6_690_xmas/etag2idx.pkl')
    selected_emotion690 = emotion690_vocabulary['key2idx']

    all_found = True
    for x in selected_emotion690:
        if x not in tag2idx:
                print("{} Not Found".format(x))
                all_found = False
    if all_found:
        print("All the 690 words can be found in this dict")
def ifContainCoreWords(tag2idx):
    emotion690_vocabulary = loadpickle(os.path.join(project_root, '/AdobeStockSelection/RetrieveSelected778/data_v6_690_xmas/etag2idx.pkl'))
    selected_emotion690 = emotion690_vocabulary['key2idx']

    all_found = True
    for x in selected_emotion690:
        if x not in tag2idx:
                print("{} Not Found".format(x))
                all_found = False
    if all_found:
        print("All the 690 words can be found in this dict")
Exemple #22
0
def main():

    import argparse
    parser = argparse.ArgumentParser(description="Pytorch Image CNN training from Configure Files")
    parser.add_argument('--config_file', required=True, help="This scripts only accepts parameters from Json files")
    input_args = parser.parse_args()

    config_file = input_args.config_file

    args = parse_config(config_file)
    class_lens = args.class_len


    for ind in range(len(class_lens)):
        print("-------------------------------")
        train_dataset = loadpickle(args.train_files[ind])
        add_dataset = loadpickle(args.train_files[ind].replace(".pkl", "_try.pkl"))
        print(len(train_dataset), len(add_dataset))

        file_name = args.train_files[ind]
        save2pickle(file_name.replace(".pkl", "_new.pkl"), train_dataset + add_dataset)
def read_AMT_complete_mtrain_mtest(data_path=None):

    if data_path is None:
        data_path = '/home/zwei/Dev/AttributeNet3/MturkCollectedData/data/mturk_annotations.pkl'
    annotated_data = loadpickle(data_path)

    for idx, s_image_cid in enumerate(annotated_data):
        if idx > 1000:
            break
        s_data = annotated_data[s_image_cid]
        print("{}\t{}\t{}".format(idx, len(annotated_data), len(s_data['emotion-tags'])))
        print("{}".format(s_data['image_url']))
        print(', '.join(s_data['emotion-tags']))
Exemple #24
0
# Usage(TODO): create the vocabulary
# Email: [email protected]
# Created: 15/Feb/2019 12:46

import glob
import os
import tqdm
from PyUtils.pickle_utils import loadpickle, save2pickle
from PyUtils.json_utils import load_json_list
from PyUtils.dict_utils import string_list2dict
from nltk.corpus import wordnet
from AdobeStockTools.TagUtils import remove_hat, has_digits
from AdobeStockTools.AdobeStockUnitls import get_image_cid_from_url
raw_annotation_files = glob.glob(os.path.join('/home/zwei/Dev/AttributeNet3/AdobeStockSelection/RetrieveSelected778/selected_keywords_retrieve_v2', '*.json'))

predefined_vocabularies = set(loadpickle('/home/zwei/Dev/AttributeNet3/AdobeStockSelection/RetrieveSelected778/data_v2/tag_frequencies_selected.pkl').keys())

valid_annotation_list = loadpickle('/home/zwei/Dev/AttributeNet3/AdobeStockSelection/RetrieveSelected778/data_v2/CNNsplit_train.pkl')
train_cid_list = []
for s_item in tqdm.tqdm(valid_annotation_list, desc="Processing image cids"):
    train_cid_list.append(int(get_image_cid_from_url(s_item[0], location=1)))

train_cid_set = set(train_cid_list)

processedCIDs = set()
vocabularies = set()
bad_vocabularies = set()
for s_file in tqdm.tqdm(raw_annotation_files):
    keyword_raw_annotations = load_json_list(s_file)
    for s_annotation in keyword_raw_annotations:
        s_cid = s_annotation['cid']
def main():

    import argparse
    parser = argparse.ArgumentParser(description="Pytorch Image CNN training from Configure Files")
    parser.add_argument('--config_file', required=True, help="This scripts only accepts parameters from Json files")
    input_args = parser.parse_args()

    config_file = input_args.config_file

    args = parse_config(config_file)
    if args.name is None:
        args.name = get_stem(config_file)

    torch.set_default_tensor_type('torch.FloatTensor')
    best_prec1 = 0

    args.script_name = get_stem(__file__)
    current_time_str = get_date_str()
    # if args.resume is None:
    if args.save_directory is None:
        save_directory = get_dir(os.path.join(project_root, 'ckpts2', '{:s}'.format(args.name), '{:s}-{:s}'.format(args.ID, current_time_str)))
    else:
        save_directory = get_dir(os.path.join(project_root, 'ckpts2', args.save_directory))
    # else:
    #     save_directory = os.path.dirname(args.resume)
    print("Save to {}".format(save_directory))
    log_file = os.path.join(save_directory, 'log-{0}.txt'.format(current_time_str))
    logger = log_utils.get_logger(log_file)
    log_utils.print_config(vars(args), logger)


    print_func = logger.info
    print_func('ConfigFile: {}'.format(config_file))
    args.log_file = log_file

    if args.device:
        os.environ["CUDA_VISIBLE_DEVICES"]=args.device


    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                world_size=args.world_size)

    if args.pretrained:
        print_func("=> using pre-trained model '{}'".format(args.arch))
        visual_model = models.__dict__[args.arch](pretrained=True, num_classes=args.num_classes)
    else:
        print_func("=> creating model '{}'".format(args.arch))
        visual_model = models.__dict__[args.arch](pretrained=False, num_classes=args.num_classes)

    if args.freeze:
        visual_model = CNN_utils.freeze_all_except_fc(visual_model)



    if os.path.isfile(args.text_ckpt):
        print_func("=> loading checkpoint '{}'".format(args.text_ckpt))
        text_data = torch.load(args.text_ckpt, map_location=lambda storage, loc:storage)
        text_model = TextCNN(text_data['args_model'])
        # load_state_dict(text_model, text_data['state_dict'])
        text_model.load_state_dict(text_data['state_dict'], strict=True)
        text_model.eval()
        print_func("=> loaded checkpoint '{}' for text classification"
              .format(args.text_ckpt))
        args.vocab_size = text_data['args_model'].vocab_size
    else:
        print_func("=> no checkpoint found at '{}'".format(args.text_ckpt))
        return


    args.tag2clsidx = text_data['args_data'].tag2idx
    args.vocab_size = len(args.tag2clsidx)

    args.text_embed = loadpickle(args.text_embed)
    args.idx2tag = loadpickle(args.idx2tag)['idx2tag']



    if args.gpu is not None:
        visual_model = visual_model.cuda(args.gpu)
        text_model = text_model.cuda((args.gpu))
    elif args.distributed:
        visual_model.cuda()
        visual_model = torch.nn.parallel.DistributedDataParallel(visual_model)
    else:
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            visual_model.features = torch.nn.DataParallel(visual_model.features)
            visual_model.cuda()
        else:
            visual_model = torch.nn.DataParallel(visual_model).cuda()
            text_model = torch.nn.DataParallel(text_model).cuda()


    criterion = nn.CrossEntropyLoss(ignore_index=-1).cuda(args.gpu)

    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, visual_model.parameters()), lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.lr_schedule:
        print_func("Using scheduled learning rate")
        scheduler = lr_scheduler.MultiStepLR(
            optimizer, [int(i) for i in args.lr_schedule.split(',')], gamma=0.1)
    else:
        scheduler = lr_scheduler.ReduceLROnPlateau(
            optimizer, 'min', patience=args.lr_patience)

    # optimizer = torch.optim.SGD(model.parameters(), args.lr,
    #                             momentum=args.momentum,
    #                             weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print_func("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)

            import collections
            if isinstance(checkpoint, collections.OrderedDict):
                load_state_dict(visual_model, checkpoint)


            else:
                load_state_dict(visual_model, checkpoint['state_dict'])
                print_func("=> loaded checkpoint '{}' (epoch {})"
                      .format(args.resume, checkpoint['epoch']))

        else:
            print_func("=> no checkpoint found at '{}'".format(args.resume))



    cudnn.benchmark = True

    model_total_params = sum(p.numel() for p in visual_model.parameters())
    model_grad_params = sum(p.numel() for p in visual_model.parameters() if p.requires_grad)
    print_func("Total Parameters: {0}\t Gradient Parameters: {1}".format(model_total_params, model_grad_params))

    # Data loading code
    val_dataset = get_instance(custom_datasets, '{0}'.format(args.valloader), args)
    if val_dataset is None:
        val_loader = None
    else:
        val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size, shuffle=False,
                                             num_workers=args.workers, pin_memory=True, collate_fn=none_collate)

    if args.evaluate:
        print_func('Validation Only')
        validate(val_loader, visual_model, criterion, args, print_func)
        return
    else:

        train_dataset = get_instance(custom_datasets, '{0}'.format(args.trainloader), args)

        if args.distributed:
            train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
        else:
            train_sampler = None

        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
            num_workers=args.workers, pin_memory=True, sampler=train_sampler, collate_fn=none_collate)




    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        if args.lr_schedule:
            # CNN_utils.adjust_learning_rate(optimizer, epoch, args.lr)
            scheduler.step()
        current_lr = optimizer.param_groups[0]['lr']

        print_func("Epoch: [{}], learning rate: {}".format(epoch, current_lr))

        # train for one epoch
        train(train_loader, visual_model, text_model, criterion, optimizer, epoch, args, print_func)

        # evaluate on validation set
        if val_loader:
            prec1, val_loss = validate(val_loader, visual_model, criterion, args, print_func)
        else:
            prec1 = 0
            val_loss = 0
        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        CNN_utils.save_checkpoint({
            'epoch': epoch + 1,
            'arch': args.arch,
            'state_dict': visual_model.state_dict(),
            'best_prec1': best_prec1,
            'optimizer' : optimizer.state_dict(),
        }, is_best, file_directory=save_directory, epoch=epoch)

        if not args.lr_schedule:
            scheduler.step(val_loss)
Exemple #26
0
# Copyright (c) 2019 Zijun Wei.
# Licensed under the MIT License.
# Author: Zijun Wei
# Usage(TODO):
# Email: [email protected]
# Created: 26/Mar/2019 11:42

from PyUtils.pickle_utils import loadpickle

keydicts = loadpickle(
    '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/EmotionNetFinal/etag2idx.pkl'
)

key2idx = keydicts['key2idx']

keys = []
for idx, s_key in enumerate(key2idx):
    keys.append('{}: {}'.format(idx + 1, s_key))

print(", ".join(keys))
Exemple #27
0
# Copyright (c) 2019 Zijun Wei.
# Licensed under the MIT License.
# Author: Zijun Wei
# Usage(TODO):
# Email: [email protected]
# Created: 16/Mar/2019 11:15

from PyUtils.pickle_utils import loadpickle, save2pickle
import random

random.seed(0)

train_data = loadpickle(
    '/home/zwei/datasets/PublicEmotion/Deepsentiment/z_data/train_3.pkl')

split = len(train_data) // 10
random.shuffle(train_data)
train_val_data = train_data[:split]
train_train_data = train_data[split:]

save2pickle(
    '/home/zwei/datasets/PublicEmotion/Deepsentiment/z_data/train_3_90_list.pkl',
    train_train_data)
save2pickle(
    '/home/zwei/datasets/PublicEmotion/Deepsentiment/z_data/train_3_10_list.pkl',
    train_val_data)
print("DB")
Exemple #28
0
sys.path.append(project_root)

import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                    level=logging.INFO)
import tqdm
from PyUtils.file_utils import get_dir
from PyUtils.pickle_utils import loadpickle
from gensim import models
import random
random.seed(0)

user_root = os.path.expanduser('~')

training_data = loadpickle(
    os.path.join(user_root,
                 'Dev/AttributeNet3/LanguageData/raw_tag_sentences.pkl'))
save_directory = get_dir(
    os.path.join(user_root, 'Dev/AttributeNet3/LanguageData/word2vec_models'))

training_sentences = training_data['data']
max_len = training_data['max_len']

embedding_dim = 300
window_size = max_len

texts = []
shuffle_times = 10


def random_drop(s_text, drop_rate=0.1):
#
# This file is part of the AttributeNet3 project.
#
# @author Zijun Wei <*****@*****.**>
# @copyright (c) Adobe Inc.
# 2020-Jun-15.
# 08: 51
# All Rights Reserved
#

from PyUtils.pickle_utils import loadpickle, save2pickle
import tqdm

annotation_file = '/Dataset_release/SE30K8/annotations/mturk_annotations_240.pkl.keep'
raw_annotations = loadpickle(annotation_file)

# updated_annotations = {}
for s_idx, (s_key, s_item) in enumerate(tqdm.tqdm(raw_annotations.items())):
    # if s_idx % 500 == 0:
    #     print(s_item['image_url'])
    s_emotion_annotations = s_item['image_emotion']
    for s_emotion in s_emotion_annotations:
        if len(s_emotion) > 1:
            print(s_key)
print("DB")
Exemple #30
0
# Copyright (c) 2019 Zijun Wei.
# Licensed under the MIT License.
# Author: Zijun Wei
# Usage(TODO):
# Email: [email protected]
# Created: 27/Feb/2019 16:19

import os
from PyUtils.pickle_utils import loadpickle, save2pickle
from PyUtils.dict_utils import string_list2dict
import tqdm

user_root = os.path.expanduser('~')
dataset_dir = os.path.join(user_root, 'datasets/PublicEmotion', 'Deepemotion')
z_data_dir = os.path.join(dataset_dir, 'z_data')
emotion_categories = sorted(['fear', 'sadness', 'excitement', 'amusement', 'anger', 'awe', 'contentment', 'disgust'])
idx2emotion, emotion2idx = string_list2dict(emotion_categories)
data_split = 'train_sample'
dataset = loadpickle(os.path.join(dataset_dir, '{}.pkl'.format(data_split)))
dataset_8 = []
for s_data in tqdm.tqdm(dataset):
    s_data_category = os.path.dirname(s_data[0])
    emotion_idx = emotion2idx[s_data_category]
    dataset_8.append([s_data[0], emotion_idx, s_data[1]])

save2pickle(os.path.join(z_data_dir, '{}_8.pkl'.format(data_split)), dataset_8)
print("DB")
# train = loadpickle(os.path.join(dataset_dir, 'train.pkl'))
# train_sample = loadpickle(os.path.join(dataset_dir, 'train_sample.pkl'))
# test = loadpickle(os.path.join(dataset_dir, 'test.pkl'))