def create_evaluation_desc_datastore(orb, kind): storage_client = storage.Client('adina-image-analysis') bucket = storage_client.get_bucket('sampled_social_media_images') datastore_client = datastore.Client('adina-image-analysis') for blob in bucket.list_blobs(): X = [] if not (utils.ext(blob.public_url) == '.jpeg' or utils.ext(blob.public_url) == '.png' or utils.ext(blob.public_url) == '.jpg'): continue try: img = utils.url_to_img(blob.public_url) except: continue k = datastore_client.key(kind, utils.del_ext(blob.name)) entity = datastore_client.get(k) if not entity: entity = datastore.Entity(key=k, exclude_from_indexes=['ORB Descriptors']) try: des = utils.compute_ORB(img, orb, show_image=False) if des.any() != None: for i in range(des.shape[0]): X.append(utils.convert_intList_to_bit(list(des[i]))) des_json = json.dumps(X) entity.update({ 'ORB Descriptors': des_json, 'Indexed(ORB)': "No" }) datastore_client.put(entity) else: continue except: continue return
def sample_social_media_storage(quantity): storage_client = storage.Client('adina-image-analysis') bucket = storage_client.get_bucket('adina-images') path = storage_client.get_bucket('sampled_social_media_images') # p = utils.count_storage('adina-images') l = list(range(300000, 400000)) choseen = random.sample(l, quantity) i = 300000 s = 36443 for blob in bucket.list_blobs(): if s >= 65000: return i += 1 if i in choseen: if not (utils.ext(blob.public_url) == '.jpeg' or utils.ext(blob.public_url) == '.png' or utils.ext(blob.public_url) == '.jpg'): continue if not (blob.name.startswith('4C') or blob.name.startswith('RE') or blob.name.startswith('R')): continue try: img = io.BytesIO(requests.get(blob.public_url).content) name = '333_' + str(s) + '.jpg' s += 1 blob = storage.Blob(name, path) blob.upload_from_file(img) if s % 5000 == 0: print(s) except: continue return
def sample_social_media_datastore(orb, quantity, kind): storage_client = storage.Client('adina-image-analysis') bucket = storage_client.get_bucket('adina-images') datastore_client = datastore.Client('adina-image-analysis') p = utils.count_storage('adina-images') l = list(range(p)) choseen = random.sample(l, quantity) i = 0 s = 31 for blob in bucket.list_blobs(): i += 1 X = [] if i in choseen: if not (utils.ext(blob.public_url) == '.jpeg' or utils.ext(blob.public_url) == '.png' or utils.ext(blob.public_url) == '.jpg'): continue if not (blob.name.startswith('4C') or blob.name.startswith('RE') or blob.name.startswith('R')): continue try: img = utils.url_to_img(blob.public_url) except: continue try: des = utils.compute_ORB(img, orb, show_image=False) if des.any() != None: for i in range(des.shape[0]): X.append(utils.convert_intList_to_bit(list(des[i]))) des_json = json.dumps(X) name = '333_' + str(s) s += 1 k = datastore_client.key(kind, name) entity = datastore.Entity(key=k, exclude_from_indexes=[ 'ORB Descriptors', 'VGG16 Descriptors' ]) # print("1") # print(entity) entity.update({ 'ORB Descriptors': des_json, 'Indexed(ORB)': "No" }) # print("2") # print(entity) datastore_client.put(entity) # print("3") # print(entity) if s % 1000 == 0: print(s, 'images processed') else: continue except: continue return
def create_vgg_desc_datastore(vgg16_model): storage_client = storage.Client('adina-image-analysis') bucket = storage_client.get_bucket('adina-images') datastore_client = datastore.Client('adina-image-analysis') kind = "Evaluation_Reddit_Index_VGG" n = 0 for blob in bucket.list_blobs(): if n % 500 == 0: print(n) if n <= 1767000: n += 1 continue # # if n >= 40000: # return n += 1 X = [] if not (utils.ext(blob.public_url) == '.jpeg' or utils.ext(blob.public_url) == '.png' or utils.ext(blob.public_url) == '.jpg'): continue name = blob.name if not (name.startswith('RE') or name.startswith('R')): continue k = datastore_client.key(kind, utils.del_ext(blob.name)) entity = datastore_client.get(k) if not entity: entity = datastore.Entity( key=k, exclude_from_indexes=['VGG16 Descriptors']) # entity = datastore.Entity(key=k, exclude_from_indexes=['ORB Descriptors', 'VGG16 Descriptors']) try: img_preprocessed = vgg16_preprocess(blob.public_url) except AssertionError as e0: print("e0", e0) print(blob.public_url) continue if img_preprocessed is None: print("None") print(blob.public_url) continue try: features = cumpute_vgg_desc(vgg16_model, img_preprocessed) except AssertionError as e1: print("e1", e1) print(blob.public_url) continue try: for i in range(features.shape[0]): X.append(convert_nparray_to_List(features[i])) # X.append(utils.convert_intList_to_bit(list(features[i]))) des_json = json.dumps(X) entity.update({ 'VGG16 Descriptors': des_json, 'Indexed(VGG16)': "No" }) datastore_client.put(entity) except AssertionError as e2: print("e2", e2) print(blob.public_url) continue return