def main(): text_ckpt = torch.load( '/home/zwei/Dev/AttributeNet3/TextClassificationV2/ckpts/TextCNN_googlenews_690Removed_NLTDAN_1M_Static.pth.tar', map_location=lambda storage, loc: storage) args_model = text_ckpt['args_model'] args_data = text_ckpt['args_data'] text_model = TextCNN(args_model) model_tag2idx = args_data.tag2idx text_model.load_state_dict(text_ckpt['state_dict'], strict=True) vocab_idx2tag = loadpickle( '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/EmotionNetFinal/tag2idx.pkl' )['idx2tag'] dataset = loadpickle( '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/EmotionNetFinal/CNNsplit_tag_labels+full_tagidx_train+face.pkl' ) text_model.eval() emotion_tags = loadpickle( '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/EmotionNetFinal/etag2idx.pkl' )['key2idx'] idx2emotion = loadpickle( '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/EmotionNetFinal/etag2idx.pkl' )['idx2key'] image_url_dict = loadpickle( '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/RetrieveSelected778/data_v2/dataset_image_urls.pkl' ) for data_idx, s_data in enumerate(dataset): if data_idx % 10000 != 0: continue x_tags = [vocab_idx2tag[x] for x in s_data[2]] x_tag_ids = [] x_tag_names = [] x_emotion_tags = [] for x_tag in x_tags: if x_tag in model_tag2idx: x_tag_names.append(x_tag) x_tag_ids.append(model_tag2idx[x_tag]) else: pass # x_tag_ids.append(args_model.vocab_size) if x_tag in emotion_tags: x_emotion_tags.append(x_tag) x_tag_ids = pad_sentences(x_tag_ids, args_model.max_len, args_model.vocab_size + 1) x_tag_ids = torch.LongTensor(x_tag_ids).unsqueeze(0) predicts = F.softmax(text_model(x_tag_ids)[0], dim=1).squeeze(0).cpu().data.numpy() image_cid = int(get_image_cid_from_url(s_data[0], location=1)) arg_max_predict = np.argsort(predicts)[::-1][:10] if image_cid in image_url_dict: print("{}".format(image_url_dict[image_cid])) print(", ".join(x_emotion_tags)) print(", ".join(x_tag_names)) print(', '.join('{}({:.2f})'.format(idx2emotion[i], predicts[i]) for i in arg_max_predict))
def main(): text_ckpt = torch.load( '/home/zwei/Dev/AttributeNet3/TextClassificationV2/ckpts/TextCNN_googlenews_NLT_Static.pth.tar' ) args_model = text_ckpt['args_model'] args_data = text_ckpt['args_data'] text_model = TextCNN(args_model) model_tag2idx = args_data.tag2idx text_model.load_state_dict(text_ckpt['state_dict'], strict=True) vocab_idx2tag = loadpickle( '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/EmotionNetFinal/tag2idx.pkl' )['idx2tag'] dataset = loadpickle( '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/EmotionNetFinal/CNNsplit_tagidx_36534_test.pkl' ) text_model.eval() emotion_tags = loadpickle( '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/EmotionNetFinal/etag2idx.pkl' )['key2idx'] image_url_dict = loadpickle( '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/RetrieveSelected778/data_v2/dataset_image_urls.pkl' ) new_dataset = [] for data_idx, s_data in tqdm.tqdm(enumerate(dataset), total=len(dataset)): x_tags = [vocab_idx2tag[x] for x in s_data[1]] x_tag_ids = [] x_tag_names = [] x_emotion_tags = [] for x_tag in x_tags: if x_tag in model_tag2idx: x_tag_ids.append(model_tag2idx[x_tag]) x_tag_names.append(x_tag) if x_tag in emotion_tags: x_emotion_tags.append(x_tag) x_tag_ids = pad_sentences(x_tag_ids, args_model.max_len, args_model.vocab_size + 1) x_tag_ids = torch.LongTensor(x_tag_ids).unsqueeze(0) predicts = F.softmax(text_model(x_tag_ids)[0], dim=1).squeeze(0).cpu().data.numpy() new_dataset.append([s_data[0], predicts.tolist()]) if data_idx % 50000 == 0: image_cid = int(get_image_cid_from_url(s_data[0], location=1)) if image_cid in image_url_dict: print("{}".format(image_url_dict[image_cid])) print(", ".join(x_emotion_tags)) print(", ".join(x_tag_names)) print(', '.join( '{}({:.2f})'.format(idx2emotion[i], predicts[i]) for i in range(len(predicts)))) save2pickle( '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/EmotionNetFinal/CNNsplit_distill8_test.pkl', new_dataset)
def read_AMT_complete_mtrain_mtest(data_path=None): if data_path is None: data_path = '/home/zwei/Dev/AttributeNet3/MturkCollectedData/data/mturk_annotations.pkl' annotated_data = loadpickle(data_path) predefined_vocabularies = loadpickle( '/home/zwei/Dev/AttributeNet3/TextClassification/pre_extract_w2v/params/selftrained_extracted_w2v_wordnet_synsets_py3.pl' ) data = [] for s_image_cid in tqdm.tqdm(annotated_data, desc="Processing Annotated Data"): s_data = annotated_data[s_image_cid] s_image_emotions = [] for x in s_data['image_emotion']: s_image_emotions.extend(x) s_image_emotions = Counter(s_image_emotions) goodtags = [] raw_tags = s_data['tags'] for s_raw_tag in raw_tags: if s_raw_tag in predefined_vocabularies: goodtags.append(s_raw_tag) if len(goodtags) < 1: continue data.append([goodtags, s_image_emotions, s_image_cid]) random.seed(0) random.shuffle(data) dev_idx = len(data) // 10 val_data = data[:dev_idx] train_data = data[dev_idx:] updated_train_data = [] for s_data in train_data: goodtags, s_emotion_counter, s_image_cid = s_data s_emotion_label = counter2multilabel(s_emotion_counter, emotion2idx) updated_train_data.append([goodtags, s_emotion_label, s_image_cid]) updated_val_data = [] for s_data in val_data: goodtags, s_emotion_counter, s_image_cid = s_data s_emotion_label = counter2multilabel(s_emotion_counter, emotion2idx) updated_val_data.append([goodtags, s_emotion_label, s_image_cid]) print("Train: {}\tVal: {}".format(len(updated_train_data), len(updated_val_data))) return updated_train_data, updated_val_data
def deepsentiment_s_test(args): image_information = loadpickle(args.test_file) dataset = ImageRelLists(image_paths=image_information, image_root=args.data_dir, transform=get_val_simple_transform(), target_transform=None) return dataset
def main(): import argparse parser = argparse.ArgumentParser( description="Pytorch Image CNN training from Configure Files") parser.add_argument( '--config_file', required=True, help="This scripts only accepts parameters from Json files") input_args = parser.parse_args() config_file = input_args.config_file args = parse_config(config_file) class_lens = args.class_len for ind in range(len(class_lens)): print("-------------------------------") train_dataset = loadpickle(args.train_files[ind]) print(len(train_dataset)) image_directory = args.data_dirs[ind] for s_data in tqdm.tqdm(train_dataset, desc="Extracting Features"): if s_data is None: continue image_path = os.path.join(image_directory, s_data[0]).replace("\\", "/")
def multilabel_idxcount_v2_val(args): image_information = loadpickle(args.val_file) dataset = ImageRelLists(image_paths=image_information, image_root=args.data_dir, transform=get_val_simple_transform(), target_transform=multilabelidxcount2KL( args.num_classes)) return dataset # if __name__ == '__main__': # # x_transform = multilabel2multihot(500) # # x = x_transform([4, 10]) # # print("DEB") # from argparse import Namespace # from CNNs.dataloaders.utils import none_collate # # args = Namespace(num_classes=742) # annotation_file = '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/RetrieveSelected778/data_v2/CNNsplit_{}.pkl' # data_dir = '/home/zwei/datasets/stockimage_742/images-256' # dataset = multilabel_val(args, annotation_file, data_dir) # val_loader = torch.utils.data.DataLoader(dataset, # batch_size=10, shuffle=False, # num_workers=4, pin_memory=True, collate_fn=none_collate) # import tqdm # # for s_images, s_labels in tqdm.tqdm(val_loader): # pass # print("Done")
def main(): """ img_id_kws = {} print("Reading keyword files") for file in glob.glob(kw_folder + "*.json"): print(file) with open(file, 'r') as of_: # keyword file lines = of_.readlines() for l in lines: # image d = json.loads(l) if d['cid'] not in img_id_kws: tags = [] for t in d['tags']: words = t.split('^')[0].split() for w in words: tags.append(w) img_id_kws[d['cid']] = tags save2pickle(os.path.join(adobe_folder, "img_id_kws.pkl"), img_id_kws) """ img_id_kws = loadpickle(os.path.join(adobe_folder, "img_id_kws.pkl")) sentences = list(img_id_kws.values()) #save2pickle(os.path.join(adobe_folder, "sentences.pkl"), sentences) # train word2vec model model_folder = "/nfs/bigfovea/add_disk0/eugenia/Emotion/wordembedding_models/" model_file = "w2v_adobe.model" print("Training Word2Vec model") model = Word2Vec(sentences, min_count=1, size= 50, workers=16, window=3, sg=1)
def categorical_train(args): image_information = loadpickle(args.train_file) category_counts = {} for s_idx in image_information: category_counts[s_idx] = len(image_information[s_idx]) dataset = SampleLoader(categories=image_information, categories_counts=category_counts, root=args.data_dir, transform=get_train_simple_transform(), target_transform=None, sample_size=args.sample_size) return dataset
def multilabel_idxcount_v2_train(args): image_information = loadpickle(args.train_file) dataset = ImageRelLists(image_paths=image_information, image_root=args.data_dir, transform=get_train_fix_size_transform(), target_transform=multilabelidxcount2KL( args.num_classes)) return dataset
def categorical_train(args): image_categorical_dict = loadpickle(args.train_file) dataset = SampleLoader(category_dict=image_categorical_dict, root=args.data_dir, transform=get_train_fix_size_transform(), target_transform=None, sample_size=args.sample_size) return dataset
def multilabel_idxcount_v2_val(args): image_information = loadpickle(args.val_file) dataset = ImageRelLists(image_paths=image_information, image_root=args.data_dir, transform=get_val_simple_transform(), target_transform=multilabelidxcount2multihot( args.num_classes)) return dataset
def feature_list(args, annotation_file, data_dir, rel_path_h=None): image_paths = loadpickle(annotation_file) dataset = ImageNamesRelLists(image_paths=image_paths, image_root=data_dir, transform=get_val_simple_transform()) return dataset
def singlelabel_v2_val(args): #FIXME: image_information = loadpickle(args.val_file) dataset = ImageRelLists(image_paths=image_information, image_root=args.data_dir, transform=get_val_simple_transform(), target_transform=None) return dataset
def deepsentiment_m_val(args): image_information = loadpickle(args.val_files[args.ind]) dataset = ImageRelLists( image_paths=image_information, #[:n_samples], image_root=args.data_dirs[args.ind], transform=get_val_simple_transform(), target_transform=None) return dataset
def singlelabel_test(args, annotation_file, data_dir): #FIXME: annotation_file = annotation_file.format('test') image_information = loadpickle(annotation_file) dataset = ImageRelLists(image_paths=image_information, image_root=data_dir, transform=get_val_simple_transform(), target_transform=None) return dataset
def simple_multilabel_val(args): #FIXME: # annotation_file = annotation_file.format('train') image_information = loadpickle(args.val_file) dataset = ImageRelLists(image_paths=image_information, image_root=args.data_dir, transform=get_val_simple_transform(), target_transform=simple_multitrans()) return dataset
def multilabel_BCE_test(args, annotation_file, data_dir): #FIXME: annotation_file = annotation_file.format('test') image_information = loadpickle(annotation_file) dataset = ImageRelLists(image_paths=image_information, image_root=data_dir, transform=get_val_simple_transform(), target_transform=multilabel2multi1( args.num_classes)) return dataset
def multilabel_v2_val(args): #FIXME: # annotation_file = annotation_file.format('val') image_information = loadpickle(args.val_file) dataset = ImageRelLists(image_paths=image_information, image_root=args.data_dir, transform=get_val_simple_transform(), target_transform=multilabel2multihot( args.num_classes)) return dataset
def read_690_complete_mtrain_mtest_wo_emotion(data_path=None, subset_N=None): if data_path is None: data_path = os.path.join( project_root, 'AdobeStockSelection/EmotionNetFinal/CNNsplit_tag_labels+full_tagidx_train+face.pkl' ) annotated_data = loadpickle(data_path) emotion2idx = loadpickle( os.path.join( project_root, 'AdobeStockSelection/EmotionNetFinal/etag2idx.pkl'))['key2idx'] idx2tag = loadpickle( os.path.join( project_root, 'AdobeStockSelection/EmotionNetFinal/tag2idx.pkl'))['idx2tag'] # predefined_vocabularies = loadpickle('/home/zwei/Dev/AttributeNet3/TextClassification/pre_extract_w2v/params/selftrained_extracted_w2v_wordnet_synsets_py3.pl') data = [] if subset_N is None: subset = annotated_data else: subset = annotated_data[:subset_N] for s_data in tqdm.tqdm(subset, desc="Processing Annotated Data"): s_image_cid = int(get_image_cid_from_url(s_data[0], location=1)) raw_tags = [idx2tag[x] for x in s_data[2]] updated_tags = [] for s_tag in raw_tags: if s_tag not in emotion2idx: updated_tags.append(s_tag) data.append([updated_tags, s_data[1], s_image_cid]) random.seed(0) random.shuffle(data) dev_idx = 2000 val_data = data[:dev_idx] train_data = data[dev_idx:] return train_data, val_data
def test_690_contain(tag2idx): emotion690_vocabulary = loadpickle('/home/zwei/Dev/AttributeNet3/AdobeStockSelection/RetrieveSelected778/data_v6_690_xmas/etag2idx.pkl') selected_emotion690 = emotion690_vocabulary['key2idx'] all_found = True for x in selected_emotion690: if x not in tag2idx: print("{} Not Found".format(x)) all_found = False if all_found: print("All the 690 words can be found in this dict")
def ifContainCoreWords(tag2idx): emotion690_vocabulary = loadpickle(os.path.join(project_root, '/AdobeStockSelection/RetrieveSelected778/data_v6_690_xmas/etag2idx.pkl')) selected_emotion690 = emotion690_vocabulary['key2idx'] all_found = True for x in selected_emotion690: if x not in tag2idx: print("{} Not Found".format(x)) all_found = False if all_found: print("All the 690 words can be found in this dict")
def main(): import argparse parser = argparse.ArgumentParser(description="Pytorch Image CNN training from Configure Files") parser.add_argument('--config_file', required=True, help="This scripts only accepts parameters from Json files") input_args = parser.parse_args() config_file = input_args.config_file args = parse_config(config_file) class_lens = args.class_len for ind in range(len(class_lens)): print("-------------------------------") train_dataset = loadpickle(args.train_files[ind]) add_dataset = loadpickle(args.train_files[ind].replace(".pkl", "_try.pkl")) print(len(train_dataset), len(add_dataset)) file_name = args.train_files[ind] save2pickle(file_name.replace(".pkl", "_new.pkl"), train_dataset + add_dataset)
def read_AMT_complete_mtrain_mtest(data_path=None): if data_path is None: data_path = '/home/zwei/Dev/AttributeNet3/MturkCollectedData/data/mturk_annotations.pkl' annotated_data = loadpickle(data_path) for idx, s_image_cid in enumerate(annotated_data): if idx > 1000: break s_data = annotated_data[s_image_cid] print("{}\t{}\t{}".format(idx, len(annotated_data), len(s_data['emotion-tags']))) print("{}".format(s_data['image_url'])) print(', '.join(s_data['emotion-tags']))
# Usage(TODO): create the vocabulary # Email: [email protected] # Created: 15/Feb/2019 12:46 import glob import os import tqdm from PyUtils.pickle_utils import loadpickle, save2pickle from PyUtils.json_utils import load_json_list from PyUtils.dict_utils import string_list2dict from nltk.corpus import wordnet from AdobeStockTools.TagUtils import remove_hat, has_digits from AdobeStockTools.AdobeStockUnitls import get_image_cid_from_url raw_annotation_files = glob.glob(os.path.join('/home/zwei/Dev/AttributeNet3/AdobeStockSelection/RetrieveSelected778/selected_keywords_retrieve_v2', '*.json')) predefined_vocabularies = set(loadpickle('/home/zwei/Dev/AttributeNet3/AdobeStockSelection/RetrieveSelected778/data_v2/tag_frequencies_selected.pkl').keys()) valid_annotation_list = loadpickle('/home/zwei/Dev/AttributeNet3/AdobeStockSelection/RetrieveSelected778/data_v2/CNNsplit_train.pkl') train_cid_list = [] for s_item in tqdm.tqdm(valid_annotation_list, desc="Processing image cids"): train_cid_list.append(int(get_image_cid_from_url(s_item[0], location=1))) train_cid_set = set(train_cid_list) processedCIDs = set() vocabularies = set() bad_vocabularies = set() for s_file in tqdm.tqdm(raw_annotation_files): keyword_raw_annotations = load_json_list(s_file) for s_annotation in keyword_raw_annotations: s_cid = s_annotation['cid']
def main(): import argparse parser = argparse.ArgumentParser(description="Pytorch Image CNN training from Configure Files") parser.add_argument('--config_file', required=True, help="This scripts only accepts parameters from Json files") input_args = parser.parse_args() config_file = input_args.config_file args = parse_config(config_file) if args.name is None: args.name = get_stem(config_file) torch.set_default_tensor_type('torch.FloatTensor') best_prec1 = 0 args.script_name = get_stem(__file__) current_time_str = get_date_str() # if args.resume is None: if args.save_directory is None: save_directory = get_dir(os.path.join(project_root, 'ckpts2', '{:s}'.format(args.name), '{:s}-{:s}'.format(args.ID, current_time_str))) else: save_directory = get_dir(os.path.join(project_root, 'ckpts2', args.save_directory)) # else: # save_directory = os.path.dirname(args.resume) print("Save to {}".format(save_directory)) log_file = os.path.join(save_directory, 'log-{0}.txt'.format(current_time_str)) logger = log_utils.get_logger(log_file) log_utils.print_config(vars(args), logger) print_func = logger.info print_func('ConfigFile: {}'.format(config_file)) args.log_file = log_file if args.device: os.environ["CUDA_VISIBLE_DEVICES"]=args.device if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') args.distributed = args.world_size > 1 if args.distributed: dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) if args.pretrained: print_func("=> using pre-trained model '{}'".format(args.arch)) visual_model = models.__dict__[args.arch](pretrained=True, num_classes=args.num_classes) else: print_func("=> creating model '{}'".format(args.arch)) visual_model = models.__dict__[args.arch](pretrained=False, num_classes=args.num_classes) if args.freeze: visual_model = CNN_utils.freeze_all_except_fc(visual_model) if os.path.isfile(args.text_ckpt): print_func("=> loading checkpoint '{}'".format(args.text_ckpt)) text_data = torch.load(args.text_ckpt, map_location=lambda storage, loc:storage) text_model = TextCNN(text_data['args_model']) # load_state_dict(text_model, text_data['state_dict']) text_model.load_state_dict(text_data['state_dict'], strict=True) text_model.eval() print_func("=> loaded checkpoint '{}' for text classification" .format(args.text_ckpt)) args.vocab_size = text_data['args_model'].vocab_size else: print_func("=> no checkpoint found at '{}'".format(args.text_ckpt)) return args.tag2clsidx = text_data['args_data'].tag2idx args.vocab_size = len(args.tag2clsidx) args.text_embed = loadpickle(args.text_embed) args.idx2tag = loadpickle(args.idx2tag)['idx2tag'] if args.gpu is not None: visual_model = visual_model.cuda(args.gpu) text_model = text_model.cuda((args.gpu)) elif args.distributed: visual_model.cuda() visual_model = torch.nn.parallel.DistributedDataParallel(visual_model) else: if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): visual_model.features = torch.nn.DataParallel(visual_model.features) visual_model.cuda() else: visual_model = torch.nn.DataParallel(visual_model).cuda() text_model = torch.nn.DataParallel(text_model).cuda() criterion = nn.CrossEntropyLoss(ignore_index=-1).cuda(args.gpu) optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, visual_model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.lr_schedule: print_func("Using scheduled learning rate") scheduler = lr_scheduler.MultiStepLR( optimizer, [int(i) for i in args.lr_schedule.split(',')], gamma=0.1) else: scheduler = lr_scheduler.ReduceLROnPlateau( optimizer, 'min', patience=args.lr_patience) # optimizer = torch.optim.SGD(model.parameters(), args.lr, # momentum=args.momentum, # weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print_func("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) import collections if isinstance(checkpoint, collections.OrderedDict): load_state_dict(visual_model, checkpoint) else: load_state_dict(visual_model, checkpoint['state_dict']) print_func("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print_func("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True model_total_params = sum(p.numel() for p in visual_model.parameters()) model_grad_params = sum(p.numel() for p in visual_model.parameters() if p.requires_grad) print_func("Total Parameters: {0}\t Gradient Parameters: {1}".format(model_total_params, model_grad_params)) # Data loading code val_dataset = get_instance(custom_datasets, '{0}'.format(args.valloader), args) if val_dataset is None: val_loader = None else: val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, collate_fn=none_collate) if args.evaluate: print_func('Validation Only') validate(val_loader, visual_model, criterion, args, print_func) return else: train_dataset = get_instance(custom_datasets, '{0}'.format(args.trainloader), args) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, collate_fn=none_collate) for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) if args.lr_schedule: # CNN_utils.adjust_learning_rate(optimizer, epoch, args.lr) scheduler.step() current_lr = optimizer.param_groups[0]['lr'] print_func("Epoch: [{}], learning rate: {}".format(epoch, current_lr)) # train for one epoch train(train_loader, visual_model, text_model, criterion, optimizer, epoch, args, print_func) # evaluate on validation set if val_loader: prec1, val_loss = validate(val_loader, visual_model, criterion, args, print_func) else: prec1 = 0 val_loss = 0 # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) CNN_utils.save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': visual_model.state_dict(), 'best_prec1': best_prec1, 'optimizer' : optimizer.state_dict(), }, is_best, file_directory=save_directory, epoch=epoch) if not args.lr_schedule: scheduler.step(val_loss)
# Copyright (c) 2019 Zijun Wei. # Licensed under the MIT License. # Author: Zijun Wei # Usage(TODO): # Email: [email protected] # Created: 26/Mar/2019 11:42 from PyUtils.pickle_utils import loadpickle keydicts = loadpickle( '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/EmotionNetFinal/etag2idx.pkl' ) key2idx = keydicts['key2idx'] keys = [] for idx, s_key in enumerate(key2idx): keys.append('{}: {}'.format(idx + 1, s_key)) print(", ".join(keys))
# Copyright (c) 2019 Zijun Wei. # Licensed under the MIT License. # Author: Zijun Wei # Usage(TODO): # Email: [email protected] # Created: 16/Mar/2019 11:15 from PyUtils.pickle_utils import loadpickle, save2pickle import random random.seed(0) train_data = loadpickle( '/home/zwei/datasets/PublicEmotion/Deepsentiment/z_data/train_3.pkl') split = len(train_data) // 10 random.shuffle(train_data) train_val_data = train_data[:split] train_train_data = train_data[split:] save2pickle( '/home/zwei/datasets/PublicEmotion/Deepsentiment/z_data/train_3_90_list.pkl', train_train_data) save2pickle( '/home/zwei/datasets/PublicEmotion/Deepsentiment/z_data/train_3_10_list.pkl', train_val_data) print("DB")
sys.path.append(project_root) import logging logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) import tqdm from PyUtils.file_utils import get_dir from PyUtils.pickle_utils import loadpickle from gensim import models import random random.seed(0) user_root = os.path.expanduser('~') training_data = loadpickle( os.path.join(user_root, 'Dev/AttributeNet3/LanguageData/raw_tag_sentences.pkl')) save_directory = get_dir( os.path.join(user_root, 'Dev/AttributeNet3/LanguageData/word2vec_models')) training_sentences = training_data['data'] max_len = training_data['max_len'] embedding_dim = 300 window_size = max_len texts = [] shuffle_times = 10 def random_drop(s_text, drop_rate=0.1):
# # This file is part of the AttributeNet3 project. # # @author Zijun Wei <*****@*****.**> # @copyright (c) Adobe Inc. # 2020-Jun-15. # 08: 51 # All Rights Reserved # from PyUtils.pickle_utils import loadpickle, save2pickle import tqdm annotation_file = '/Dataset_release/SE30K8/annotations/mturk_annotations_240.pkl.keep' raw_annotations = loadpickle(annotation_file) # updated_annotations = {} for s_idx, (s_key, s_item) in enumerate(tqdm.tqdm(raw_annotations.items())): # if s_idx % 500 == 0: # print(s_item['image_url']) s_emotion_annotations = s_item['image_emotion'] for s_emotion in s_emotion_annotations: if len(s_emotion) > 1: print(s_key) print("DB")
# Copyright (c) 2019 Zijun Wei. # Licensed under the MIT License. # Author: Zijun Wei # Usage(TODO): # Email: [email protected] # Created: 27/Feb/2019 16:19 import os from PyUtils.pickle_utils import loadpickle, save2pickle from PyUtils.dict_utils import string_list2dict import tqdm user_root = os.path.expanduser('~') dataset_dir = os.path.join(user_root, 'datasets/PublicEmotion', 'Deepemotion') z_data_dir = os.path.join(dataset_dir, 'z_data') emotion_categories = sorted(['fear', 'sadness', 'excitement', 'amusement', 'anger', 'awe', 'contentment', 'disgust']) idx2emotion, emotion2idx = string_list2dict(emotion_categories) data_split = 'train_sample' dataset = loadpickle(os.path.join(dataset_dir, '{}.pkl'.format(data_split))) dataset_8 = [] for s_data in tqdm.tqdm(dataset): s_data_category = os.path.dirname(s_data[0]) emotion_idx = emotion2idx[s_data_category] dataset_8.append([s_data[0], emotion_idx, s_data[1]]) save2pickle(os.path.join(z_data_dir, '{}_8.pkl'.format(data_split)), dataset_8) print("DB") # train = loadpickle(os.path.join(dataset_dir, 'train.pkl')) # train_sample = loadpickle(os.path.join(dataset_dir, 'train_sample.pkl')) # test = loadpickle(os.path.join(dataset_dir, 'test.pkl'))