Ejemplo n.º 1
0
def create_evaluation_desc_datastore(orb, kind):

    storage_client = storage.Client('adina-image-analysis')
    bucket = storage_client.get_bucket('sampled_social_media_images')

    datastore_client = datastore.Client('adina-image-analysis')

    for blob in bucket.list_blobs():

        X = []

        if not (utils.ext(blob.public_url) == '.jpeg'
                or utils.ext(blob.public_url) == '.png'
                or utils.ext(blob.public_url) == '.jpg'):
            continue

        try:
            img = utils.url_to_img(blob.public_url)

        except:
            continue

        k = datastore_client.key(kind, utils.del_ext(blob.name))
        entity = datastore_client.get(k)

        if not entity:

            entity = datastore.Entity(key=k,
                                      exclude_from_indexes=['ORB Descriptors'])
            try:

                des = utils.compute_ORB(img, orb, show_image=False)
                if des.any() != None:

                    for i in range(des.shape[0]):
                        X.append(utils.convert_intList_to_bit(list(des[i])))

                    des_json = json.dumps(X)

                    entity.update({
                        'ORB Descriptors': des_json,
                        'Indexed(ORB)': "No"
                    })

                    datastore_client.put(entity)

                else:
                    continue

            except:
                continue
    return
Ejemplo n.º 2
0
def sample_social_media_storage(quantity):
    storage_client = storage.Client('adina-image-analysis')
    bucket = storage_client.get_bucket('adina-images')
    path = storage_client.get_bucket('sampled_social_media_images')

    # p = utils.count_storage('adina-images')

    l = list(range(300000, 400000))

    choseen = random.sample(l, quantity)

    i = 300000
    s = 36443

    for blob in bucket.list_blobs():

        if s >= 65000:
            return

        i += 1

        if i in choseen:

            if not (utils.ext(blob.public_url) == '.jpeg'
                    or utils.ext(blob.public_url) == '.png'
                    or utils.ext(blob.public_url) == '.jpg'):
                continue

            if not (blob.name.startswith('4C') or blob.name.startswith('RE')
                    or blob.name.startswith('R')):
                continue

            try:
                img = io.BytesIO(requests.get(blob.public_url).content)
                name = '333_' + str(s) + '.jpg'
                s += 1
                blob = storage.Blob(name, path)
                blob.upload_from_file(img)

                if s % 5000 == 0:
                    print(s)

            except:
                continue
    return
Ejemplo n.º 3
0
def sample_social_media_datastore(orb, quantity, kind):
    storage_client = storage.Client('adina-image-analysis')
    bucket = storage_client.get_bucket('adina-images')
    datastore_client = datastore.Client('adina-image-analysis')

    p = utils.count_storage('adina-images')

    l = list(range(p))

    choseen = random.sample(l, quantity)

    i = 0
    s = 31

    for blob in bucket.list_blobs():

        i += 1

        X = []

        if i in choseen:

            if not (utils.ext(blob.public_url) == '.jpeg'
                    or utils.ext(blob.public_url) == '.png'
                    or utils.ext(blob.public_url) == '.jpg'):
                continue

            if not (blob.name.startswith('4C') or blob.name.startswith('RE')
                    or blob.name.startswith('R')):
                continue

            try:
                img = utils.url_to_img(blob.public_url)

            except:
                continue

            try:

                des = utils.compute_ORB(img, orb, show_image=False)
                if des.any() != None:

                    for i in range(des.shape[0]):
                        X.append(utils.convert_intList_to_bit(list(des[i])))

                    des_json = json.dumps(X)

                    name = '333_' + str(s)
                    s += 1

                    k = datastore_client.key(kind, name)

                    entity = datastore.Entity(key=k,
                                              exclude_from_indexes=[
                                                  'ORB Descriptors',
                                                  'VGG16 Descriptors'
                                              ])

                    # print("1")
                    # print(entity)

                    entity.update({
                        'ORB Descriptors': des_json,
                        'Indexed(ORB)': "No"
                    })

                    # print("2")
                    # print(entity)

                    datastore_client.put(entity)

                    # print("3")
                    # print(entity)

                    if s % 1000 == 0:
                        print(s, 'images processed')

                else:
                    continue

            except:
                continue
    return
Ejemplo n.º 4
0
def create_vgg_desc_datastore(vgg16_model):

    storage_client = storage.Client('adina-image-analysis')
    bucket = storage_client.get_bucket('adina-images')

    datastore_client = datastore.Client('adina-image-analysis')
    kind = "Evaluation_Reddit_Index_VGG"
    n = 0
    for blob in bucket.list_blobs():

        if n % 500 == 0:
            print(n)

        if n <= 1767000:
            n += 1
            continue
        #
        # if n >= 40000:
        #     return

        n += 1

        X = []

        if not (utils.ext(blob.public_url) == '.jpeg'
                or utils.ext(blob.public_url) == '.png'
                or utils.ext(blob.public_url) == '.jpg'):
            continue

        name = blob.name

        if not (name.startswith('RE') or name.startswith('R')):
            continue

        k = datastore_client.key(kind, utils.del_ext(blob.name))

        entity = datastore_client.get(k)

        if not entity:

            entity = datastore.Entity(
                key=k, exclude_from_indexes=['VGG16 Descriptors'])
            # entity = datastore.Entity(key=k, exclude_from_indexes=['ORB Descriptors', 'VGG16 Descriptors'])

            try:
                img_preprocessed = vgg16_preprocess(blob.public_url)

            except AssertionError as e0:
                print("e0", e0)
                print(blob.public_url)
                continue

            if img_preprocessed is None:
                print("None")
                print(blob.public_url)
                continue

            try:
                features = cumpute_vgg_desc(vgg16_model, img_preprocessed)

            except AssertionError as e1:
                print("e1", e1)
                print(blob.public_url)
                continue

            try:
                for i in range(features.shape[0]):
                    X.append(convert_nparray_to_List(features[i]))
                    # X.append(utils.convert_intList_to_bit(list(features[i])))

                des_json = json.dumps(X)

                entity.update({
                    'VGG16 Descriptors': des_json,
                    'Indexed(VGG16)': "No"
                })

                datastore_client.put(entity)

            except AssertionError as e2:
                print("e2", e2)
                print(blob.public_url)
                continue
    return