Beispiel #1
0
            continue
        processedCIDs.add(s_cid)
        s_tags = remove_hat(s_annotation['tags'])

        for s_tag in s_tags:
            s_tag = s_tag.lower()
            if s_tag in vocabularies or s_tag in bad_vocabularies:
                continue
            else:
                if s_tag in predefined_vocabularies:
                    vocabularies.add(s_tag)
                elif len(wordnet.synsets(s_tag))>=1:
                    if len(s_tag) < 3 or has_digits(s_tag):
                        bad_vocabularies.add(s_tag)
                    else:
                        vocabularies.add(s_tag)
                else:
                    continue

vocabularies_list = sorted(list(vocabularies))

# assert all the words are in
for s_word in predefined_vocabularies:
    if s_word not in vocabularies:
        print("{} was not found in dictionary".format(s_word))

idx2word, word2idx = string_list2dict(vocabularies_list)


save2pickle('/home/zwei/Dev/AttributeNet3/AdobeStockSelection/RetrieveSelected778/data_v2/Word2Vecs/vocabularies_complete_dict_from_train.pkl', {'idx2word': idx2word, 'word2idx': word2idx})
Beispiel #2
0
# Copyright (c) 2019 Zijun Wei.
# Licensed under the MIT License.
# Author: Zijun Wei
# Usage(TODO):
# Email: [email protected]
# Created: 27/Feb/2019 16:19

import os
from PyUtils.pickle_utils import loadpickle, save2pickle
from PyUtils.dict_utils import string_list2dict
import tqdm

user_root = os.path.expanduser('~')
dataset_dir = os.path.join(user_root, 'datasets/PublicEmotion', 'Deepemotion')
z_data_dir = os.path.join(dataset_dir, 'z_data')
emotion_categories = sorted(['fear', 'sadness', 'excitement', 'amusement', 'anger', 'awe', 'contentment', 'disgust'])
idx2emotion, emotion2idx = string_list2dict(emotion_categories)
data_split = 'train_sample'
dataset = loadpickle(os.path.join(dataset_dir, '{}.pkl'.format(data_split)))
dataset_8 = []
for s_data in tqdm.tqdm(dataset):
    s_data_category = os.path.dirname(s_data[0])
    emotion_idx = emotion2idx[s_data_category]
    dataset_8.append([s_data[0], emotion_idx, s_data[1]])

save2pickle(os.path.join(z_data_dir, '{}_8.pkl'.format(data_split)), dataset_8)
print("DB")
# train = loadpickle(os.path.join(dataset_dir, 'train.pkl'))
# train_sample = loadpickle(os.path.join(dataset_dir, 'train_sample.pkl'))
# test = loadpickle(os.path.join(dataset_dir, 'test.pkl'))
Beispiel #3
0
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="Pytorch Image CNN training from Configure Files")
    parser.add_argument(
        '--config_file',
        required=True,
        help="This scripts only accepts parameters from Json files")
    input_args = parser.parse_args()

    config_file = input_args.config_file

    args = parse_config(config_file)
    if args.name is None:
        args.name = get_stem(config_file)

    torch.set_default_tensor_type('torch.FloatTensor')
    best_prec1 = 0

    args.script_name = get_stem(__file__)
    current_time_str = get_date_str()

    print_func = print

    if args.device:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.device

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size)

    if args.pretrained:
        print_func("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True,
                                           num_classes=args.num_classes)
    else:
        print_func("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=False,
                                           num_classes=args.num_classes)

    if args.gpu is not None:
        model = model.cuda(args.gpu)
    elif args.distributed:
        model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(model)
    else:
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            # model = torch.nn.DataParallel(model).cuda()
            model = model.cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print_func("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            import collections
            if isinstance(checkpoint, collections.OrderedDict):
                load_state_dict(model,
                                checkpoint,
                                exclude_layers=['fc.weight', 'fc.bias'])

            else:
                load_state_dict(
                    model,
                    checkpoint['state_dict'],
                    exclude_layers=['module.fc.weight', 'module.fc.bias'])
                print_func("=> loaded checkpoint '{}' (epoch {})".format(
                    args.resume, checkpoint['epoch']))
        else:
            print_func("=> no checkpoint found at '{}'".format(args.resume))
            return
    else:
        print_func(
            "=> This script is for fine-tuning only, please double check '{}'".
            format(args.resume))
        print_func("Now using randomly initialized parameters!")

    cudnn.benchmark = True

    from PyUtils.pickle_utils import loadpickle
    from PublicEmotionDatasets.Deepemotion.constants import emotion2idx, idx2emotion
    from PyUtils.dict_utils import string_list2dict
    import numpy as np
    from torchvision.datasets.folder import default_loader
    tag_wordvectors = loadpickle(args.tag_embeddings)
    tag_words = []
    tag_matrix = []
    label_words = []
    label_matrix = []

    for x_tag in tag_wordvectors:
        tag_words.append(x_tag)
        tag_matrix.append(tag_wordvectors[x_tag])
        if x_tag in emotion2idx:
            label_words.append(x_tag)
            label_matrix.append(tag_wordvectors[x_tag])
    idx2tag, tag2idx = string_list2dict(tag_words)
    idx2label, label2idx = string_list2dict(label_words)
    tag_matrix = np.array(tag_matrix)
    label_matrix = np.array(label_matrix)
    label_matrix = label_matrix.squeeze(1)
    tag_matrix = tag_matrix.squeeze(1)
    val_list = loadpickle(args.val_file)
    from CNNs.datasets.multilabel import get_val_simple_transform
    val_transform = get_val_simple_transform()
    model.eval()

    correct = 0
    total = len(val_list) * 1.0
    for i, (input_image_file, target, _) in enumerate(val_list):
        # measure data loading time

        image_path = os.path.join(args.data_dir, input_image_file)
        input_image = default_loader(image_path)
        input_image = val_transform(input_image)

        if args.gpu is not None:
            input_image = input_image.cuda(args.gpu, non_blocking=True)
        input_image = input_image.unsqueeze(0).cuda()

        # target_idx = target.nonzero() [:,1]

        # compute output
        output, output_proj = model(input_image)

        output_proj = output_proj.cpu().data.numpy()

        dot_product_label = cosine_similarity(output_proj, label_matrix)[0]
        output_label = np.argmax(dot_product_label)
        if output_label == target:
            correct += 1

        dot_product_tag = cosine_similarity(output_proj, tag_matrix)[0]
        out_tags = np.argsort(dot_product_tag)[::-1][:10]

        print("* {} Image: {} GT label: {}, predicted label: {}".format(
            i, input_image_file, idx2emotion[target], idx2label[output_label]))
        print(" == closest tags: {}".format(', '.join([
            '{}({:.02f})'.format(idx2tag[x], dot_product_tag[x])
            for x in out_tags
        ])))
    print("Accuracy {:.4f}".format(correct / total))
Beispiel #4
0
)
anchor_data = anchor_annotations['key2idx']
anchor_words = list(anchor_data.keys())
anchor_word_set = set(anchor_words)
# model_path = '/home/zwei/Dev/AttributeNet3/TextClassification/pre_extract_w2v/params/googlenews_extracted_w2v_wordnet_synsets_py3.pl'
model_path = '/home/zwei/Dev/AttributeNet3/TextClassificationV2/ckpts/selftrained.pkl'

word2vec_model = loadpickle(model_path)

word2vec_matrix = []
word_list = []
for s_word in word2vec_model:
    word_list.append(s_word)
    word2vec_matrix.append(word2vec_model[s_word])

idx2word, word2idx = string_list2dict(word_list)
word2vec_matrix = np.array(word2vec_matrix)

for word_idx, s_word in enumerate(anchor_words):
    # if s_word not in word2idx:
    #     print("Cannot find {}".format(s_word))
    #     continue
    # s_word_id = str(word2idx[s_word])
    if s_word in word2idx:
        s_word_vec = word2vec_matrix[word2idx[s_word]]
    else:
        print("{} Not found in the learned dict".format(s_word))
        continue

    knns = cosine_similarity(s_word_vec.reshape(1, -1), word2vec_matrix)[0]
    topK = np.argsort(knns)[::-1][:20]

from PyUtils.pickle_utils import loadpickle
from PyUtils.dict_utils import string_list2dict, get_key_sorted_dict
from AdobeStockTools.TagUtils import has_digits, keepGoodTags
from collections import Counter
import tqdm
import numpy as np
import random


random.seed(0)

predefined_emotions = sorted(['surprise(negative)', 'happiness', 'sadness', 'disgust', 'surprise(positive)', 'neutral',
                              'anger', 'fear'])
idx2emotion, emotion2idx = string_list2dict(predefined_emotions)


def counter2multilabel(counter, emotion2idx):
    emotion_vector = np.zeros(len(emotion2idx))
    for x in counter.most_common():
        emotion_vector[emotion2idx[x[0]]] = x[1]

    emotion_vector /= sum(emotion_vector)*1.
    return emotion_vector


def counter2singlelabel(counter, emotion2idx, thres=3):
    if counter.most_common()[0][1] < thres:
        return None
    else:
Beispiel #6
0
# Author: Zijun Wei
# Usage(TODO):
# Email: [email protected]
# Created: 08/Mar/2019 09:19

from PyUtils.dict_utils import string_list2dict

emotion_list = [
    'Affection', 'Anger', 'Annoyance', 'Anticipation', 'Aversion',
    'Confidence', 'Disapproval', 'Disconnection', 'Disquietment',
    'Doubt/Confusion', 'Embarrassment', 'Engagement', 'Esteem', 'Excitement',
    'Fatigue', 'Fear', 'Happiness', 'Pain', 'Peace', 'Pleasure', 'Sadness',
    'Sensitivity', 'Suffering', 'Surprise', 'Sympathy', 'Yearning'
]

idx2emorion, emorion2idx = string_list2dict(emotion_list)

emotion_explainations_orig = {
    'Peace':
    'well being and relaxed; no worry; having positive thoughts or sensations; satisfied',
    'Affection': 'fond feelings; love; tenderness',
    'Esteem':
    'feelings of favorable opinion or judgment; respect; admiration; gratefulness',
    'Anticipation':
    'state of looking forward; hoping on or getting prepared for possible future events',
    'Engagement':
    'paying attention to something; absorbed into something; curious; interested',
    'Confidence':
    'feeling of being certain; conviction that an outcome will be favorable; encouraged; proud',
    'Happiness': 'feeling delighted; feeling enjoyment or amusement',
    'Pleasure': 'feeling of delight in the senses',