continue processedCIDs.add(s_cid) s_tags = remove_hat(s_annotation['tags']) for s_tag in s_tags: s_tag = s_tag.lower() if s_tag in vocabularies or s_tag in bad_vocabularies: continue else: if s_tag in predefined_vocabularies: vocabularies.add(s_tag) elif len(wordnet.synsets(s_tag))>=1: if len(s_tag) < 3 or has_digits(s_tag): bad_vocabularies.add(s_tag) else: vocabularies.add(s_tag) else: continue vocabularies_list = sorted(list(vocabularies)) # assert all the words are in for s_word in predefined_vocabularies: if s_word not in vocabularies: print("{} was not found in dictionary".format(s_word)) idx2word, word2idx = string_list2dict(vocabularies_list) save2pickle('/home/zwei/Dev/AttributeNet3/AdobeStockSelection/RetrieveSelected778/data_v2/Word2Vecs/vocabularies_complete_dict_from_train.pkl', {'idx2word': idx2word, 'word2idx': word2idx})
# Copyright (c) 2019 Zijun Wei. # Licensed under the MIT License. # Author: Zijun Wei # Usage(TODO): # Email: [email protected] # Created: 27/Feb/2019 16:19 import os from PyUtils.pickle_utils import loadpickle, save2pickle from PyUtils.dict_utils import string_list2dict import tqdm user_root = os.path.expanduser('~') dataset_dir = os.path.join(user_root, 'datasets/PublicEmotion', 'Deepemotion') z_data_dir = os.path.join(dataset_dir, 'z_data') emotion_categories = sorted(['fear', 'sadness', 'excitement', 'amusement', 'anger', 'awe', 'contentment', 'disgust']) idx2emotion, emotion2idx = string_list2dict(emotion_categories) data_split = 'train_sample' dataset = loadpickle(os.path.join(dataset_dir, '{}.pkl'.format(data_split))) dataset_8 = [] for s_data in tqdm.tqdm(dataset): s_data_category = os.path.dirname(s_data[0]) emotion_idx = emotion2idx[s_data_category] dataset_8.append([s_data[0], emotion_idx, s_data[1]]) save2pickle(os.path.join(z_data_dir, '{}_8.pkl'.format(data_split)), dataset_8) print("DB") # train = loadpickle(os.path.join(dataset_dir, 'train.pkl')) # train_sample = loadpickle(os.path.join(dataset_dir, 'train_sample.pkl')) # test = loadpickle(os.path.join(dataset_dir, 'test.pkl'))
def main(): import argparse parser = argparse.ArgumentParser( description="Pytorch Image CNN training from Configure Files") parser.add_argument( '--config_file', required=True, help="This scripts only accepts parameters from Json files") input_args = parser.parse_args() config_file = input_args.config_file args = parse_config(config_file) if args.name is None: args.name = get_stem(config_file) torch.set_default_tensor_type('torch.FloatTensor') best_prec1 = 0 args.script_name = get_stem(__file__) current_time_str = get_date_str() print_func = print if args.device: os.environ["CUDA_VISIBLE_DEVICES"] = args.device if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') args.distributed = args.world_size > 1 if args.distributed: dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) if args.pretrained: print_func("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True, num_classes=args.num_classes) else: print_func("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=False, num_classes=args.num_classes) if args.gpu is not None: model = model.cuda(args.gpu) elif args.distributed: model.cuda() model = torch.nn.parallel.DistributedDataParallel(model) else: if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: # model = torch.nn.DataParallel(model).cuda() model = model.cuda() if args.resume: if os.path.isfile(args.resume): print_func("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) import collections if isinstance(checkpoint, collections.OrderedDict): load_state_dict(model, checkpoint, exclude_layers=['fc.weight', 'fc.bias']) else: load_state_dict( model, checkpoint['state_dict'], exclude_layers=['module.fc.weight', 'module.fc.bias']) print_func("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print_func("=> no checkpoint found at '{}'".format(args.resume)) return else: print_func( "=> This script is for fine-tuning only, please double check '{}'". format(args.resume)) print_func("Now using randomly initialized parameters!") cudnn.benchmark = True from PyUtils.pickle_utils import loadpickle from PublicEmotionDatasets.Deepemotion.constants import emotion2idx, idx2emotion from PyUtils.dict_utils import string_list2dict import numpy as np from torchvision.datasets.folder import default_loader tag_wordvectors = loadpickle(args.tag_embeddings) tag_words = [] tag_matrix = [] label_words = [] label_matrix = [] for x_tag in tag_wordvectors: tag_words.append(x_tag) tag_matrix.append(tag_wordvectors[x_tag]) if x_tag in emotion2idx: label_words.append(x_tag) label_matrix.append(tag_wordvectors[x_tag]) idx2tag, tag2idx = string_list2dict(tag_words) idx2label, label2idx = string_list2dict(label_words) tag_matrix = np.array(tag_matrix) label_matrix = np.array(label_matrix) label_matrix = label_matrix.squeeze(1) tag_matrix = tag_matrix.squeeze(1) val_list = loadpickle(args.val_file) from CNNs.datasets.multilabel import get_val_simple_transform val_transform = get_val_simple_transform() model.eval() correct = 0 total = len(val_list) * 1.0 for i, (input_image_file, target, _) in enumerate(val_list): # measure data loading time image_path = os.path.join(args.data_dir, input_image_file) input_image = default_loader(image_path) input_image = val_transform(input_image) if args.gpu is not None: input_image = input_image.cuda(args.gpu, non_blocking=True) input_image = input_image.unsqueeze(0).cuda() # target_idx = target.nonzero() [:,1] # compute output output, output_proj = model(input_image) output_proj = output_proj.cpu().data.numpy() dot_product_label = cosine_similarity(output_proj, label_matrix)[0] output_label = np.argmax(dot_product_label) if output_label == target: correct += 1 dot_product_tag = cosine_similarity(output_proj, tag_matrix)[0] out_tags = np.argsort(dot_product_tag)[::-1][:10] print("* {} Image: {} GT label: {}, predicted label: {}".format( i, input_image_file, idx2emotion[target], idx2label[output_label])) print(" == closest tags: {}".format(', '.join([ '{}({:.02f})'.format(idx2tag[x], dot_product_tag[x]) for x in out_tags ]))) print("Accuracy {:.4f}".format(correct / total))
) anchor_data = anchor_annotations['key2idx'] anchor_words = list(anchor_data.keys()) anchor_word_set = set(anchor_words) # model_path = '/home/zwei/Dev/AttributeNet3/TextClassification/pre_extract_w2v/params/googlenews_extracted_w2v_wordnet_synsets_py3.pl' model_path = '/home/zwei/Dev/AttributeNet3/TextClassificationV2/ckpts/selftrained.pkl' word2vec_model = loadpickle(model_path) word2vec_matrix = [] word_list = [] for s_word in word2vec_model: word_list.append(s_word) word2vec_matrix.append(word2vec_model[s_word]) idx2word, word2idx = string_list2dict(word_list) word2vec_matrix = np.array(word2vec_matrix) for word_idx, s_word in enumerate(anchor_words): # if s_word not in word2idx: # print("Cannot find {}".format(s_word)) # continue # s_word_id = str(word2idx[s_word]) if s_word in word2idx: s_word_vec = word2vec_matrix[word2idx[s_word]] else: print("{} Not found in the learned dict".format(s_word)) continue knns = cosine_similarity(s_word_vec.reshape(1, -1), word2vec_matrix)[0] topK = np.argsort(knns)[::-1][:20]
from PyUtils.pickle_utils import loadpickle from PyUtils.dict_utils import string_list2dict, get_key_sorted_dict from AdobeStockTools.TagUtils import has_digits, keepGoodTags from collections import Counter import tqdm import numpy as np import random random.seed(0) predefined_emotions = sorted(['surprise(negative)', 'happiness', 'sadness', 'disgust', 'surprise(positive)', 'neutral', 'anger', 'fear']) idx2emotion, emotion2idx = string_list2dict(predefined_emotions) def counter2multilabel(counter, emotion2idx): emotion_vector = np.zeros(len(emotion2idx)) for x in counter.most_common(): emotion_vector[emotion2idx[x[0]]] = x[1] emotion_vector /= sum(emotion_vector)*1. return emotion_vector def counter2singlelabel(counter, emotion2idx, thres=3): if counter.most_common()[0][1] < thres: return None else:
# Author: Zijun Wei # Usage(TODO): # Email: [email protected] # Created: 08/Mar/2019 09:19 from PyUtils.dict_utils import string_list2dict emotion_list = [ 'Affection', 'Anger', 'Annoyance', 'Anticipation', 'Aversion', 'Confidence', 'Disapproval', 'Disconnection', 'Disquietment', 'Doubt/Confusion', 'Embarrassment', 'Engagement', 'Esteem', 'Excitement', 'Fatigue', 'Fear', 'Happiness', 'Pain', 'Peace', 'Pleasure', 'Sadness', 'Sensitivity', 'Suffering', 'Surprise', 'Sympathy', 'Yearning' ] idx2emorion, emorion2idx = string_list2dict(emotion_list) emotion_explainations_orig = { 'Peace': 'well being and relaxed; no worry; having positive thoughts or sensations; satisfied', 'Affection': 'fond feelings; love; tenderness', 'Esteem': 'feelings of favorable opinion or judgment; respect; admiration; gratefulness', 'Anticipation': 'state of looking forward; hoping on or getting prepared for possible future events', 'Engagement': 'paying attention to something; absorbed into something; curious; interested', 'Confidence': 'feeling of being certain; conviction that an outcome will be favorable; encouraged; proud', 'Happiness': 'feeling delighted; feeling enjoyment or amusement', 'Pleasure': 'feeling of delight in the senses',