def __init__(self, config: object, session): if key_exists(FOLLOW, config): follow = config[FOLLOW] if key_exists(ENABLE, follow): self.__set_enable(follow[ENABLE]) if key_exists(USERNAMES, follow): self.__set_usernames(follow[USERNAMES]) if key_exists(AMOUNT, follow): self.__set_amount(follow[AMOUNT]) if key_exists(RANDOMIZE, follow): self.__set_randomize(follow[RANDOMIZE]) if key_exists(INTERACT, follow): self.__set_interact(follow[INTERACT]) if key_exists(SLEEP_DELAY, follow): self.__set_sleep_delay(follow[SLEEP_DELAY]) self.__session = session if self.__enable: if key_exists(BY_TAGS, follow): FollowByTags(follow[BY_TAGS], session) elif key_exists(BY_LOCATIONS, follow): FollowByLocations(follow[BY_LOCATIONS], session) else: self.__follow()
def __init__(self, config: object, session): if config: if key_exists(LOCATIONS, config): self.__locations = config[LOCATIONS] if key_exists(AMOUNT, config): self.__amount = config[AMOUNT] if key_exists(MEDIA, config): self.__media = config[MEDIA] if key_exists(SKIP_TOP_POSTS, config): self.__skip_top_posts = config[SKIP_TOP_POSTS] self.__session = session self.__follow_by_locations()
def process_label_dict(filt_label_dict, label_to_titles): threshhold = 2 key = 1 label_data = {} for label, group in filt_label_dict.items(): if group["count"] >= threshhold: if utils.key_exists(["label_info", "type"], group) and group["label_info"]["type"] == "twitter_id": label_info = group["label_info"] link, title = consume_twitter_label(label_info) else: label_info, title = consume_external_label(label, group, label_to_titles) link = label final_image_url = pick_image(label_info) tweet_data = make_tweet_data(group) row = { "label_info": label_info, "title": title, "count": group["count"], 'key': key, 'tag': link, 'final_image_url': final_image_url, 'tweet_data' : tweet_data } label_data[label] = row key += 1 return label_data
def featurize_test_images(bucket, prefix, batch_size): imgnt = imagenet.ImageNetData() to_featurize = [] to_featurize_keys = [] client = utils.get_s3_client() start = timer() num_batches = 0 for k in imgnt.test_filenames: key_name = os.path.join(prefix, f"{k}.npy") key_exists = utils.key_exists(bucket, key_name) if not key_exists: img = imgnt.load_image(k, size='scaled_256', force_rgb=True, verbose=False) img = skimage.transform.resize(img, FEATURIZE_SIZE, preserve_range=True) to_featurize.append(img) to_featurize_keys.append(k) if len(to_featurize) >= batch_size: num_batches += 1 featurize_and_upload_batch(to_featurize, to_featurize_keys, batch_size, bucket, prefix, client) end = timer() print('processing bach {} (size {}) took {} seconds'.format( num_batches, len(to_featurize), end - start)) start = timer() to_featurize = [] to_featurize_keys = [] if len(to_featurize) > 0: featurize_and_upload_batch(to_featurize, to_featurize_keys, batch_size, bucket, prefix, client)
def return_not_exists(lst): ret_lst = [] for e in lst: key = "{0}/{1}.jpg".format("imagenet2candidates_scaled", e) exists = utils.key_exists(bucket="imagenet2datav2", key=key) print(exists, key) if (not exists): ret_lst.append(e) return ret_lst
def quick_image_check(img_dict, bucket, prefix): url = img_dict['url'] ext = url.split(".")[-1] key = img_dict["id_ours"] mturk_key = "{2}_mturk/{0}.{1}".format(key, ext, prefix) if (utils.key_exists(bucket, mturk_key)): return True else: return None
def return_not_exists_encrypted(lst): ret_lst = [] for e in lst: e = utils.encrypt_string_with_magic(e) key = "{0}/{1}.jpg".format("encrypted", e) exists = utils.key_exists(bucket="imagenet2datav2", key=key) print(exists, key) if (not exists): ret_lst.append(e) return ret_lst
def featurize_candidates(bucket, prefix, batch_size, source_filename): imgnt = imagenet.ImageNetData() cds = candidate_data.CandidateData(verbose=False) filenames_to_ignore = [ '2018-08-06_17:33_vaishaal.json', '2018-08-17_17:24_vaishaal.json', 'vaishaal_hits_submitted_2018-08-17-18:28:33-PDT.json', 'vaishaal_hits_submitted_2018-08-17-18:50:38-PDT.json', 'vaishaal_hits_submitted_2018-08-17-19:28:24-PDT.json', 'vaishaal_hits_submitted_2018-08-17-19:56:28-PDT.json', 'vaishaal_hits_submitted_2018-08-25-09:47:26-PDT.json'] mturk = mturk_data.MTurkData(live=True, load_assignments=True, source_filenames_to_ignore=filenames_to_ignore, verbose=False) to_featurize = [] to_featurize_keys = [] client = utils.get_s3_client() i = 0 #candidate_list = dataset_sampling.get_histogram_sampling_ndc_candidates(imgnet=imgnt, cds=cds, mturk=mturk) start = timer() with open('../data/metadata/fc7_candidates.json', 'r') as f: candidate_list = json.load(f) for k in candidate_list: key_name = os.path.join(prefix, str(k)+".npy") key_exists = utils.key_exists(bucket, key_name) if not key_exists: img = cds.load_image(k, size='original', verbose=False) img = skimage.transform.resize(img, FEATURIZE_SIZE, preserve_range=True) to_featurize.append(img) to_featurize_keys.append(k) #if i > 250: # break; i = i + 1 print('Got candidate {}'.format(i)) end = timer() print(f"Took {end-start} seconds to get remaining candidates.") print('Beginning featurization of {} items'.format(len(to_featurize_keys))) if len(to_featurize) > 0: to_featurize = np.stack(to_featurize, axis=0) print(f"input shape {to_featurize.shape}") batch_size = min(len(to_featurize), batch_size) features = featurize.vgg16_features(to_featurize, batch_size=batch_size) print(f"features shape {features.shape}") for i,f in enumerate(features): key_name = os.path.join(prefix, to_featurize_keys[i]+".npy") bio = io.BytesIO() np.save(bio, f) print("writing key {0}".format(key_name)) utils.put_s3_object_bytes_with_backoff(bio.getvalue(), key_name) print(f"Took {end-start} seconds to get remaining candidates.")
def __init__(self, config: object, session): if config: if key_exists(TAGS, config): self.__tags = config[TAGS] if key_exists(AMOUNT, config): self.__amount = config[AMOUNT] if key_exists(SKIP_TOP_POSTS, config): self.__skip_top_posts = config[SKIP_TOP_POSTS] if key_exists(RANDOMIZE, config): self.__randomize = config[RANDOMIZE] if key_exists(INTERACT, config): self.__interact = config[INTERACT] if key_exists(MEDIA, config): self.__media = config[MEDIA] if key_exists(USE_SMART_HASHTAGS, config): self.__use_smart_hashtags = config[USE_SMART_HASHTAGS] if key_exists(USE_SMART_LOCATION_HASHTAGS, config): self.__use_smart_location_hashtags = config[ USE_SMART_LOCATION_HASHTAGS] if key_not_exists(TAGS, config): sys.exit('[ERROR]: tags not found.') self.__session = session self.__follow_by_tags()
def compute_nearest_neighbors(distance_measures, candidate_filenames, reference_filenames, top_k, window_size, cache, cache_root): cache_key = compute_hash(distance_measures, candidate_filenames, reference_filenames, top_k, window_size) full_key = f"{cache_root}/{cache_key}" timing_info = {} if cache: if utils.key_exists(BUCKET, full_key): load_start = timer() ret_value = pickle.loads( utils.get_s3_object_bytes_with_backoff(full_key)[0]) load_end = timer() compute_start = compute_end = timer() timing_info['load_start'] = load_start timing_info['load_end'] = load_end timing_info['compute_start'] = compute_start timing_info['compute_end'] = compute_end timing_info['cached'] = True return ret_value, timing_info imgnt = imagenet.ImageNetData(cache_on_local_disk=True, verbose=False, cache_root_path='/tmp/imagenet2_cache') cds = candidate_data.CandidateData(cache_on_local_disk=True, load_metadata_from_s3=True, verbose=False, cache_root_path='/tmp/imagenet2_cache') loader = image_loader.ImageLoader(imgnt, cds, cache_on_local_disk=True, num_tries=4, cache_root_path='/tmp/imagenet2_cache') load_start = timer() if ('l2' in distance_measures) or ('dssim' in distance_measures): candidate_image_dict = loader.load_image_batch(candidate_filenames, size='scaled_256', force_rgb=True, verbose=False) reference_image_dict = loader.load_image_batch(reference_filenames, size='scaled_256', force_rgb=True, verbose=False) if 'fc7' in distance_measures: candidate_feature_dict = loader.load_features_batch( candidate_filenames, verbose=False) reference_feature_dict = loader.load_features_batch( reference_filenames, verbose=False) load_end = timer() compute_start = timer() result = {} for distance_measure in distance_measures: if distance_measure == 'l2': result['l2'] = compute_l2_distances(candidate_image_dict, reference_image_dict, 196608) elif distance_measure == 'dssim': result['dssim'] = compute_dssim_distances(candidate_image_dict, reference_image_dict, window_size) elif distance_measure == 'fc7': result['fc7'] = compute_l2_distances(candidate_feature_dict, reference_feature_dict, 4096) else: raise ValueError('Unknown distance measure') compute_end = timer() timing_info = {} timing_info['load_start'] = load_start timing_info['load_end'] = load_end timing_info['compute_start'] = compute_start timing_info['compute_end'] = compute_end timing_info['cached'] = False res = compute_top_k(result, top_k) if cache: utils.put_s3_object_bytes_with_backoff(pickle.dumps(res), full_key) return res, timing_info
def __init__(self, config: object, session): if key_exists(UNFOLLOW, config): unfollow = config[UNFOLLOW] print(unfollow) if key_exists(WHITE_LIST, unfollow): self.__set_white_list(unfollow[WHITE_LIST]) if key_exists(ENABLE, unfollow): self.__enable = unfollow[ENABLE] if key_exists(AMOUNT, unfollow): self.__set_amount(unfollow[AMOUNT]) if key_exists(CUSTOM_LIST_ENABLED, unfollow): self.__set_custom_list_enabled(unfollow[CUSTOM_LIST_ENABLED]) if key_exists(CUSTOM_LIST, unfollow): self.__set_custom_list(unfollow[CUSTOM_LIST]) if key_exists(CUSTOM_LIST_PARAM, unfollow): self.__set_custom_list_param(unfollow[CUSTOM_LIST_PARAM]) if key_exists(INSTAPY_FOLLOWED_ENABLED, unfollow): self.__set_instapy_followed_enabled( unfollow[INSTAPY_FOLLOWED_ENABLED]) if key_exists(INSTAPY_FOLLOWED_PARAM, unfollow): self.__set_instapy_followed_param( unfollow[INSTAPY_FOLLOWED_PARAM]) if key_exists(NON_FOLLOWERS, unfollow): self.__set_non_followers(unfollow[NON_FOLLOWERS]) if key_exists(ALL_FOLLOWING, unfollow): self.__set_all_following(unfollow[ALL_FOLLOWING]) if key_exists(STYLE, unfollow): self.__set_style(unfollow[STYLE]) if key_exists(UNFOLLOW_AFTER, unfollow): self.__set_unfollow_after(unfollow[UNFOLLOW_AFTER]) if key_exists(DELAY_FOLLOWBACKERS, unfollow): self.__set_delay_followbackers(unfollow[DELAY_FOLLOWBACKERS]) if key_exists(SLEEP_DELAY, unfollow): self.__set_sleep_delay(unfollow[SLEEP_DELAY]) self.__session = session self.__unfollow()
def make_tweet_data(group): tweet_data = [] for s in group["statuses"]: count = s["retweet_count"] + s["favorite_count"] date = pd.to_datetime(s["created_at"], errors='coerce').tz_localize('UTC').tz_convert('America/Los_Angeles') date = int(round(date.timestamp() * 1000)) screen_name = s["satellite_enhanced"]["name_top"]["screen_name"] if "name_top" in s["satellite_enhanced"] else s["user"]["id_str"] profile_image_url = s["satellite_enhanced"]["name_top"]["profile_image_url"] if utils.key_exists(["satellite_enhanced", "name_top", "profile_image_url"],s) else None clean_full_text = utils.clean_one(s) clean_full_text = utils.clean_title(clean_full_text) row = { "count": count, 'date': date, 'text': clean_full_text, 'user_id': s["user"]["id_str"], 'tweet_id': s['id_str'], 'screen_name': screen_name, 'profile_image_url': profile_image_url } tweet_data.append(row) tweet_data_sorted = sorted(tweet_data, key=lambda item: item['date']) return tweet_data_sorted
def write_image_to_s3(img_dict, bucket, prefix): t = time.time() url = img_dict['url'] ext = url.split(".")[-1] key = img_dict["id_ours"] mturk_key = "{2}_mturk/{0}.{1}".format(key, ext, prefix) if utils.key_exists(bucket, mturk_key): return img_dict gcontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1) img_bytes = urllib.request.urlopen(url, context=gcontext).read() pil_image = Image.open(io.BytesIO(img_bytes)) rotated_img, _ = fix_orientation(pil_image) np_image = np.array(rotated_img) np_image = utils.make_rgb(np_image) try: image_ndc = skimage.transform.resize(np_image, (NDC_SIZE, NDC_SIZE), preserve_range=True) except MemoryError: raise Exception(f"Image {img_dict} memory error") bigger_side = max(np_image.shape) scale_fac = MTURK_RESCALE_SIZE / bigger_side image_mturk = skimage.transform.rescale(np_image, scale=scale_fac, preserve_range=True) bio_mturk = io.BytesIO() bio_orig = io.BytesIO() bio_ndc = io.BytesIO() imageio.imwrite(uri=bio_orig, im=np_image, format="jpg", quality=90) try: imageio.imwrite(uri=bio_mturk, im=image_mturk, format="jpg", quality=90) except: raise Exception(f"Image {img_dict} error") imageio.imwrite(uri=bio_ndc, im=image_ndc, format="jpg", quality=90) client = utils.get_s3_client() ext = "jpg" backoff = 1 while (True): try: client.put_object(Key="{2}_scaled/{0}.{1}".format( key, ext, prefix), Bucket=bucket, Body=bio_ndc.getvalue()) client.put_object(Key="{2}_original/{0}.{1}".format( key, ext, prefix), Bucket=bucket, Body=bio_orig.getvalue()) client.put_object(Key="{2}_mturk/{0}.{1}".format(key, ext, prefix), Bucket=bucket, Body=bio_mturk.getvalue()) break except: time.sleep(backoff) backoff *= 2 e = time.time() print("One image took ", e - t) img_dict["width"] = np_image.shape[1] img_dict["height"] = np_image.shape[0] return img_dict