예제 #1
0
raw_annotation_files = glob.glob(os.path.join('/home/zwei/Dev/AttributeNet3/AdobeStockSelection/RetrieveSelected778/selected_keywords_retrieve_v2', '*.json'))

predefined_vocabularies = set(loadpickle('/home/zwei/Dev/AttributeNet3/AdobeStockSelection/RetrieveSelected778/data_v2/tag_frequencies_selected.pkl').keys())

valid_annotation_list = loadpickle('/home/zwei/Dev/AttributeNet3/AdobeStockSelection/RetrieveSelected778/data_v2/CNNsplit_train.pkl')
train_cid_list = []
for s_item in tqdm.tqdm(valid_annotation_list, desc="Processing image cids"):
    train_cid_list.append(int(get_image_cid_from_url(s_item[0], location=1)))

train_cid_set = set(train_cid_list)

processedCIDs = set()
vocabularies = set()
bad_vocabularies = set()
for s_file in tqdm.tqdm(raw_annotation_files):
    keyword_raw_annotations = load_json_list(s_file)
    for s_annotation in keyword_raw_annotations:
        s_cid = s_annotation['cid']
        if s_cid not in train_cid_set or s_cid in processedCIDs:
            continue
        processedCIDs.add(s_cid)
        s_tags = remove_hat(s_annotation['tags'])

        for s_tag in s_tags:
            s_tag = s_tag.lower()
            if s_tag in vocabularies or s_tag in bad_vocabularies:
                continue
            else:
                if s_tag in predefined_vocabularies:
                    vocabularies.add(s_tag)
                elif len(wordnet.synsets(s_tag))>=1:
image_cids = []
for s_image_annotation in tqdm.tqdm(image_cid_list, desc="Create CID set"):
    image_cids.append(
        int(get_image_cid_from_url(s_image_annotation[0], location=1)))

image_cids = set(image_cids)

raw_annotation_files = glob.glob(
    os.path.join(
        '/home/zwei/Dev/AttributeNet3/AdobeStockSelection/RetrieveSelected778/selected_keywords_retrieve_v2',
        '*.json'))
processed_images = {}
tag_counts = {}
max_len = 0
for s_raw_annotation_file in tqdm.tqdm(raw_annotation_files):
    s_raw_annotations = load_json_list(s_raw_annotation_file)
    for s_raw_annotation in s_raw_annotations:
        s_cid = s_raw_annotation['cid']
        if s_cid in processed_images or s_cid not in image_cids:
            continue
        else:

            s_tags = remove_hat(s_raw_annotation['tags'])
            for s_tag in s_tags:
                if s_tag in tag_counts:
                    tag_counts[s_tag] += 1
                else:
                    tag_counts[s_tag] = 1
            if len(s_tags) > max_len:
                max_len = len(s_tags)
            processed_images[s_cid] = s_tags
import tqdm, os
from collections import Counter
from PyUtils.json_utils import load_json_list
from PyUtils.file_utils import get_stem
dataset_directory = '/home/zwei/datasets/PublicEmotion/EMOTIC'





data_split = 'test'
text_annotation_file = os.path.join(dataset_directory, 'annotations/samples', '{}.txt'.format(data_split))

annotaitons_person_crop = []
annotaitons_whole_image = []
raw_annotation_list = load_json_list(text_annotation_file)[0]

for s_annotation in tqdm.tqdm(raw_annotation_list, desc="Processing data"):
    s_file_name = os.path.join(s_annotation['folder'],  s_annotation['filename'])
    s_file_name_stem = get_stem(s_file_name)
    s_file_extension = s_file_name.split('.')[-1]
    if isinstance(s_annotation['person'], list):
        pass
    else:
        s_annotation['person'] = [s_annotation['person']]


    for s_person_idx, s_person in enumerate(s_annotation['person']):
        s_bbox = s_person['body_bbox']

        annotated_categories = []
예제 #4
0
    # from PyUtils.pickle_utils import loadpickle
    import json

    import argparse
    import glob
    input_file = '/home/zwei/Dev/PastDatasets/EmotionStock/Urls/list_for_download.json'
    parser = argparse.ArgumentParser()
    parser.add_argument('--start_idx', '-b', default=0, type=int, help="Start Idx")
    parser.add_argument('--end_idx', '-e', default=None, type=int)

    parser.add_argument('--input_file', '-i', default=input_file, type=str, help='Annotation File directory')
    parser.add_argument('--output_directory', '-t', default='tmp', type=str, help='output filename')
    parser.add_argument('--nworkers', '-n', default=2, type=int, help="Number of workers")
    args = parser.parse_args()

    annotations = load_json_list(args.input_file)
    assert 'url' in annotations[0], "url is not an attributes of json file"
    start_idx = args.start_idx or 0
    end_idx = args.end_idx or len(annotations)
    if end_idx>len(annotations):
        end_idx = len(annotations)

    selected_annotations = annotations[start_idx:end_idx]
    print("Downloading {0} Json File Records".format(len(selected_annotations)))
    MAX_attempt = 3

    if not os.path.exists(args.output_directory):
        os.makedirs(args.output_directory)

    log_file = os.path.join(args.output_directory,'errlog-{0}-{1}-256.txt'.format(start_idx, end_idx))
    download_tuples = []