def vvic_tranf_hash():
    print("Three steps, don't close the window and the computer")
    print('-------------------step1--------------------------------')
    print('加载vvic_daily_product_info的新纪录')
    t1 = time.time()
    results = mysql_select('id,images','vvic_daily_product_info','hashcode is null')
    t5 = time.time()
    print('新记录数:{} time:{:.4f}'.format(len(results),t5 - t1))
    gis = ImageSignature()
    for item in results:
        t2 = time.time()
        hashcode = []
        print('新纪录ID:',item[0], ' 正转换至vvic_matching...',end='')
        urls = json.loads(item[1])
        for url in urls:
            try:
                feature = gis.generate_signature('https:' + url).tolist()
            except:
                print(' err...',end='')
                continue
            feature_str = json.dumps(feature)
            feature_md5 = hashlib.new('md5', feature_str.encode('utf-8')).hexdigest()
            mysql_insert('vvic_matching', 'table_id, hashcode', "'{}','{}'".format('D'+str(item[0]),feature_md5))
            print(' ok...',end='')
            hashcode.append(feature)
        print('向vvic_daily_product_info备份hashcode...')
        hashcode = json.dumps(hashcode)
        mysql_update('vvic_daily_product_info', "vvic_daily_product_info.hashcode = '{}'".format(hashcode), 'id= {}'.format(item[0]))
        t3 = time.time()
        print('ok...time:{:.4f}'.format(t3-t2))
    t4 = time.time()
    print('step 1用时:{:.4f}'.format(t4 - t1))
def show_pic_link():
    product_ids = [577692000305,578191881353,578483657631,578677944652,578685252450,578687528582,578694732869,578701700138,578711868562,578839220054,578839332873,578841944462,578844188644,578845872826,578852456665,578854044763,578858704746,578864384899,578864404812,578871157099,578878057304,578882361059,578897537047,578908656165,578917632308,578924864350,578925904502,578930968926,578962313677,578964000190,578964844758,578986812746,578987764472,578987772099,578990680955,578991852646,578998934664,579003068170,579006508759,579009502144,579010232424,579011121818,579012089936,579012594962,579015793796,579016321903,579017682463,579020034338,579020945345,579026477272,579038105094,579040288537,579042944439,579043365863,579044453895,579045357367,579057188432,579059280146,579062600070,579073560581,579079084357,579079728725,579080084495,579080264865,579084680222,579098177648,579108665184,579113085387,579113477027,579113848321,579114913040,579121024934,579123399154,579124542365,579130499434,579135110276,579135502744,579136318705,579137402720,579142042161,579142413790,579146182250,579146438526,579146726978,579157958231,579162950328,579164486610,579164957525,579166333233,579167425040,579170054870,579174325276,579174420029,579182668407,579187540993,579191149073,579191713613,579197252214,579198116008,579199427996,579200876144,579210544302,579220073061,579220222669,579231438188,579232926829,579234814080,579239574605,579240145913,579243065356,579243221071,579243607519,579244499968,579244637472,579245609821,579246544998,579246953709,579247559069,579248267801,579248515279,579255815473,579258751145,579259116925,579262399043,579264911960,579270867502,579283639645,579283703168,579284534999,579287187271,579290719064,579293798898,579294854085,579296306512,579303749600,579313725833,579317386009,579317674187,579318525853,579319711576,579333265468,579345278635,579357809313,579361946277,579363703276,579366958175,579369118242,579374334530,579375238521,579378637587,579382726785,579383352473,579392979839,579407901663,579408967399,579411795959,579417531914]
    # product_ids = product_ids[:3]
    gis = ImageSignature()
    feature_all_id = []

    for i, product_id in enumerate(product_ids):
        cursor.execute("select images_url  from tb_product_detail where product_id ={}".format(product_id))
        product_select = cursor.fetchall()
        urls = json.loads(product_select[0][0])
        t1 = time.time()
        with open('img_md/{}.md'.format(product_id),'w') as f:
            feature_per_id = []
            for url in urls:
                try:
                    feature = gis.generate_signature(url).tolist()
                    feature_str = json.dumps(feature)
                    feature_md5 = hashlib.new('md5', feature_str.encode('utf-8')).hexdigest()
                except:
                    feature_md5 = '转换不出特征码'
                line = feature_md5+'<img  src="{}">\n'.format(url)
                f.write(line)
                feature_per_id.append(feature_md5)
        feature_all_id.append(feature_per_id)
        t2 = time.time()
        print('[{}/{}] {} {}'.format(i+1,len(product_ids),len(feature_per_id),t2-t1))
    inner_features = set(feature_all_id[0]).intersection(*feature_all_id[1:])
    with open('img_md/inner.md'.format(product_id), 'w') as f:
        indexes = [feature_per_id.index(x) for x in inner_features]
        inner_urls = [urls[x] for x in indexes]
        for url in inner_urls:
            line = '<img  src="{}">\n'.format(url)
            f.write(line)
Exemple #3
0
def image_duplication(m1, m2, threshold=0.4):
    result = 0
    match_score = 0
    score_type = ''
    info = {}

    gis = ImageSignature()
    try:
        a = gis.generate_signature(m1, True)
    except:
        info['m1'] = 'Invalid image data!'
    try:
        b = gis.generate_signature(m2, True)
    except:
        info['m2'] = 'Invalid image data!'

    if len(info) == 0:
        match_score = gis.normalized_distance(a, b)
        score_type = 'distance'
        result = match_score < threshold

        if result:
            result = 1
        else:
            result = 0

    return (result, match_score, score_type, threshold, info)
Exemple #4
0
def add_imgs():
    gis = ImageSignature()
    a = gis.generate_signature(
        'https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa,_by_Leonardo_da_Vinci,_from_C2RMF_retouched.jpg/687px-Mona_Lisa,_by_Leonardo_da_Vinci,_from_C2RMF_retouched.jpg'
    )
    b = gis.generate_signature(
        'https://upload.wikimedia.org/wikipedia/commons/thumb/9/99/Gioconda_%28copia_del_Museo_del_Prado_restaurada%29.jpg/800px-Gioconda_%28copia_del_Museo_del_Prado_restaurada%29.jpg'
    )
    res = gis.normalized_distance(a, b)
    print(res)

    es = Elasticsearch()
    ses = SignatureES(es)

    mypath = '/var/www/html/boots-market/image/catalog/product'

    ses.add_image(mypath + '/' +
                  'almcdnruimg389x562frfr030awdzpc579240581v1.jpg')
    #ses.add_image('/var/www/html/boots-market/image/catalog/almcdnruimg389x562frfr030awdzpc579240581v1.jpg')
    #ses.add_image('/var/www/html/boots-market/image/catalog/12616562_12123107_800.jpg')

    return

    onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

    for file in onlyfiles:
        filedir = mypath + '/' + str(file)
        print('add: ' + filedir)
        ses.add_image(filedir)
Exemple #5
0
def extract_signature():
    img_paths = os.listdir(IMG_DIR)
    img_paths.sort()
    img_paths = [
        os.path.join(IMG_DIR, filename) for filename in img_paths
        if filename.endswith(IMG_EXT)
    ]
    with open(os.path.join(DATA_DIR, IMG_PATHS), 'w') as f:
        f.writelines([line + "\n" for line in img_paths])

    # init a signature generator
    gis = ImageSignature()

    # process images
    num_processed_images = 0
    signatures = np.ndarray(shape=[0, gis.sig_length])

    for img_path in tqdm(img_paths):
        sig = gis.generate_signature(img_path)
        signatures = np.concatenate(
            (signatures, np.reshape(sig, (1, gis.sig_length))))

    # save signatures to npy file
    if os.path.exists(DATA_DIR) is False:
        os.makedirs(DATA_DIR)
    np.save(os.path.join(DATA_DIR, SIGNATURES), signatures)
Exemple #6
0
 def add_candidates(self):
     if not self.new_target_list():
         return ()
     dirsize = 0
     start = datetime.datetime.now()
     logger.info(f"Walking target list: {self.state.dirlist}")
     gis = ImageSignature()
     for top in self.state.dirlist:
         message = f"Traversing tree at {top} and adding to queue."
         logger.info(message)
         self.status(message)
         top_path = Path(top)
         for path in top_path.rglob("**/*"):
             ext = path.suffix.lower()
             if ext in cfg.settings.image_filetypes:
                 size = path.stat().st_size
                 dirsize += size
                 photo_b = self.get_bytes(path)
                 md5sum = hashlib.md5(photo_b).hexdigest()
                 # if not MD%sum already in database:
                 im = Image.open(io.BytesIO(photo_b))
                 tags = {
                     "cameraMake": im.info['parsed_exif'].get(0x010f, ""),
                     "cameraModel": im.info['parsed_exif'].get(0x0110, ""),
                     "creationTime": im.info['parsed_exif'].get(0x9003, ""),
                     "width": im.width,
                     "height": im.height,
                 }
                 image_md5 = hashlib.md5(im.tobytes()).hexdigest()
                 signature = gis.generate_signature(
                     photo_b, bytestream=True
                 ).tolist()
                 record = {
                     "src_path": str(path),
                     "size": size,
                     "md5sum": md5sum,
                     "image_md5": image_md5,
                     "signature": signature,
                     "mediaMetadata": tags,
                 }
                 photos.add(record)
                 logger.info(f"Added: {path}")
             else:
                 ext = ext.replace(
                     ".", ""
                 )  # Database can't handle keys starting with dot
                 excluded = self.state.excluded_ext_dict
                 if ext in excluded:
                     excluded[ext] += 1
                 else:
                     excluded[ext] = 1
                 self.state.update(excluded_ext_dict=excluded)
     self.state.save()
     elapsed = datetime.datetime.now() - start
     self.state.modify(
         dirsize=self.state.dirsize + dirsize,
         dirtime=elapsed.seconds + elapsed.microseconds / 1e6,
     )
     return
    def __init__(self, starting_directory):
        QObject.__init__(self)

        self.starting_directory = starting_directory
        self.glob_path = os.path.join(starting_directory, "*.jpg")
        self.signature_generator = ImageSignature()
        self.image_files = []
        self.distinct_image_files = []
Exemple #8
0
    def __init__(self, n_levels=5, crop_percentiles=(10, 90), equalize=False):
        Hasher.__init__(self, name=self.__class__.__name__ + (
            '(n_levels=%d,low_percentile=%d,equalize=%s)' % (
                n_levels, crop_percentiles[0], str(equalize))))

        self.gis = ImageSignature(n_levels=n_levels, crop_percentiles=crop_percentiles)
        self.equalize = equalize
        self.n_levels = n_levels
Exemple #9
0
def compare_photos(photo_1, photo_2):
    gis = ImageSignature()
    a = gis.generate_signature(photo_1)
    b = gis.generate_signature(photo_2)
    normalized_distance = gis.normalized_distance(a, b)

    if normalized_distance < 0.4:
        return True
    return False
Exemple #10
0
def test_load_from_unicode_path():
    try:
        path = u'test.jpg'
    except NameError:
        return
    gis = ImageSignature()
    sig = gis.generate_signature(path)
    assert type(sig) is ndarray
    assert sig.shape == (648,)
Exemple #11
0
def test_all_inputs_same_sig():
    gis = ImageSignature()
    sig1 = gis.generate_signature(test_img_url)
    sig2 = gis.generate_signature('test.jpg')
    with open('test.jpg', 'rb') as f:
        sig3 = gis.generate_signature(f.read(), bytestream=True)

    assert array_equal(sig1, sig2)
    assert array_equal(sig2, sig3)
Exemple #12
0
def test_load_from_unicode_path():
    try:
        path = u'test.jpg'
    except NameError:
        return
    gis = ImageSignature()
    sig = gis.generate_signature(path)
    assert type(sig) is ndarray
    assert sig.shape == (648, )
Exemple #13
0
def test_all_inputs_same_sig():
    gis = ImageSignature()
    sig1 = gis.generate_signature(test_img_url)
    sig2 = gis.generate_signature('test.jpg')
    with open('test.jpg', 'rb') as f:
        sig3 = gis.generate_signature(f.read(), bytestream=True)

    assert array_equal(sig1, sig2)
    assert array_equal(sig2, sig3)
Exemple #14
0
def calc_accuracy(path1, path2):
    print(path1, path2)
    path1 = str(path1)
    path2 = str(path2)
    gis = ImageSignature()
    a = gis.generate_signature(path1)
    b = gis.generate_signature(path2)
    dist = gis.normalized_distance(a, b)
    return dist
Exemple #15
0
def CPIMS(file1, file2):
    gis = ImageSignature()

    a = gis.generate_signature(file1)
    b = gis.generate_signature(file2)
    c = gis.normalized_distance(a,b)

    data = [[c]]
    result_data=pandas.DataFrame(data, columns=['  IMS normalized_distance'])

    return result_data
def get_image_sign_processed(path_image):
    """
    Obtiene la firma de la imagen dada y la procesa para que tenga el mismo formato que las firmas en el log
    enriquecido
    """
    image_signature = ImageSignature()
    sign_image = image_signature.generate_signature(path_image)
    sign_image_processed = str(sign_image).replace('\n', '*').replace(
        '[ ', '').replace(']', '')
    # Procesamos la firma para que se obtenga de la misma forma que en el log enriquecido
    return sign_image_processed
Exemple #17
0
 def gen_hd5(images_url):
     gis = ImageSignature()
     feature_list = []
     for i, url in enumerate(images_url):
         try:
             feature = gis.generate_signature(url).tolist()
             feature_str = json.dumps(feature)
             feature_md5 = hashlib.new('md5', feature_str.encode('utf-8')).hexdigest()
             feature_list.append(feature_md5)
             print('{}_ok...'.format(i),  )
         except:
             print('{}_err...'.format(i),  )
             continue
     return feature_list
def detect_callback():
    os.chdir(caliberate_path)
    gis = ImageSignature()

    caliberate_files = ["cal10.jpg", "cal100.jpg", "cal500.jpg", "cal2000.jpg"]
    signature_values = []

    for x in range(4):
        signature_values.append(cv2.imread(caliberate_files[x]))
        signature_values[x] = gis.generate_signature(signature_values[x])

    cam = cv2.VideoCapture(0)
    s, im = cam.read()
    cv2.imwrite('output.jpg', im)
    cam_out = cv2.imread("output.jpg")

    img = ImageTk.PhotoImage(Image.open(caliberate_path + '/' + 'output.jpg'))
    panel = tk.Label(root, image=img)
    panel.pack(side="bottom", fill="both", expand="yes")

    cam_out = gis.generate_signature(cam_out)

    distance_values = []
    for x in range(4):
        distance_values.append(
            gis.normalized_distance(cam_out, signature_values[x]))

    minv = 10.000000
    for x in range(4):
        if distance_values[x] < minv:
            minv = distance_values[x]
            min_d = x

    if min_d == 0:
        return_string = ("Rs 10 ")
        a = 10
    elif min_d == 1:
        return_string = ("Rs 100")
        a = 100
    elif min_d == 2:
        return_string = ("Rs 500")
        a = 500
    elif min_d == 3:
        return_string = ("Rs 2000")
        a = 2000

    tkMessageBox.showinfo("DETECTION ALERT!", return_string+'\nUSD:'+str(convert_currency(a,'usd'))+\
     '\nGBP:'+str(convert_currency(a,'GBP'))+\
     '\nEUR:'+str(convert_currency(a,'EUR'))\
     )
Exemple #19
0
def searchSimilarImages(dbFilePath, testImagePath, topSearch):
    print('Starting Search')
    distancemap = []
    img = cv2.imread(testImagePath, cv2.IMREAD_COLOR)
    dst = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21)
    median = cv2.medianBlur(dst, 3)
    gray = cv2.cvtColor(median, cv2.COLOR_BGR2GRAY)
    dim = (gray.shape)
    minsize = 0
    if dim[0] > 200:
        minsize = 200
    else:
        minsize = 100
    th, img_bw = cv2.threshold(gray, 0, 255,
                               cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    se1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    se2 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    mask = cv2.morphologyEx(img_bw, cv2.MORPH_CLOSE, se1)
    binary_cleaned = cv2.morphologyEx(mask, cv2.MORPH_OPEN, se2)
    binary_mask = cv2.bitwise_not(binary_cleaned)
    imglab = morphology.label(binary_mask)
    cleaned = morphology.remove_small_objects(imglab,
                                              min_size=minsize,
                                              connectivity=8)
    img3 = np.zeros((cleaned.shape))
    img3[cleaned > 0] = 255
    img3 = np.uint8(img3)
    clean_image = cv2.bitwise_not(img3)
    testdata = np.array(clean_image)

    with open(dbFilePath, 'rb') as filehandle:
        # read the data as binary data stream
        distances = pickle.load(filehandle)
        for i in range(len(distances)):
            imgname = distances[i].get('name')
            imgpath = distances[i].get('path')
            localdata = distances[i].get('data')
            gis = ImageSignature()
            a = gis.generate_signature(testdata)
            b = gis.generate_signature(localdata)
            dis = (gis.normalized_distance(a, b))
            ar = {'name': imgname + '.bmp', 'path': imgpath, 'distance': dis}
            distancemap.append(ar)

    newlist = sorted(distancemap, key=lambda k: k['distance'])
    fixeddistances = []
    for i in range(topSearch):
        fixeddistances.append(newlist[i])
    return fixeddistances
Exemple #20
0
class ImageSignatureService(object):
    def __init__(self):
        self._gis = ImageSignature()
        self._logger = Logger(self.__class__.__name__)

    def get_image_signature_from_bytes(self, image_bytes):
        base64_signature = image_signature_array_to_base64(
            self._gis.generate_signature(image_bytes, bytestream=True))
        base64_image = b64encode(image_bytes).decode()
        return base64_signature, base64_image

    def get_image_signature_from_file_path(self, image_file_path):
        with open(image_file_path, 'rb') as image_file:
            return self.get_image_signature_from_bytes(image_file.read())

    def get_image_signature_from_url(self, image_url):
        return self._get_image_signature_from_url(image_url)

    def _get_image_signature_from_url(self, image_url, retry_num=0):
        try:
            response = requests.get(image_url)
            response.raise_for_status()
            return self.get_image_signature_from_bytes(response.content)

        except HTTPError as e:
            if retry_num >= MAX_RETRIES or e.response.status_code == 404:
                return

            else:
                retry_num += 1
                return self._get_image_signature_from_url(image_url,
                                                          retry_num=retry_num)
Exemple #21
0
def find_distances(image):
    gis = ImageSignature()
    im_sig = gis.generate_signature(
        image)  # get signature for image you're finding match for
    # http://www.cs.cmu.edu/~hcwong/Pdfs/icip02.ps for info on how signatures are made
    global all_signatures
    all_signatures = session.query(Audio.image_signature).all()
    global distances
    distances = []
    for signature in all_signatures:
        listy = [int(char) for char in signature[0][1:-1].split(',')]
        key_sig = np.array(listy)
        distance = gis.normalized_distance(
            key_sig, im_sig)  # compute normalized distance between two points.
        # computes || b - a || / ( ||b|| + ||a||)
        distances.append(distance)
    return distances
def searchSimilarImages(dbFilePath, testImagePath, topSearch):
    print('Starting Search')
    distancemap = []
    img = cv2.imread(testImagePath)
    dst = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21)
    gaussian_3 = cv2.GaussianBlur(dst, (9, 9), 10.0)
    sharpened = cv2.addWeighted(dst, 1.5, gaussian_3, -0.5, 0, dst)
    gray = cv2.cvtColor(sharpened, cv2.COLOR_BGR2GRAY)
    th, im_thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    kernel = np.ones((3, 3), dtype=np.uint8)
    binary_cleaned = cv2.morphologyEx(im_thresh, cv2.MORPH_OPEN, kernel)
    binary_mask = cv2.bitwise_not(binary_cleaned)
    imglab = morphology.label(binary_mask)
    cleaned = morphology.remove_small_objects(imglab, min_size=500, connectivity=8)
    img3 = np.zeros((cleaned.shape))
    img3[cleaned > 0] = 255
    img3 = np.uint8(img3)
    clean_image = cv2.bitwise_not(img3)
    #clean_imagecol = cv2.cvtColor(clean_image, cv2.COLOR_GRAY2RGB)
    testdata = np.array(clean_image)

    with open(dbFilePath, 'rb') as filehandle:
        # read the data as binary data stream
        distances = pickle.load(filehandle)
        for i in range(len(distances)):
            imgname = distances[i].get('name')
            imgpath = distances[i].get('path')
            localdata = distances[i].get('data')
            gis = ImageSignature()
            a = gis.generate_signature(testdata)
            b = gis.generate_signature(localdata)
            dis = (gis.normalized_distance(a, b))
            ar = {'name': imgname + '.bmp', 'path': imgpath, 'distance': dis}
            distancemap.append(ar)

    newlist = sorted(distancemap, key=lambda k: k['distance'])
    fixeddistances = []
    for i in range(topSearch):
        fixeddistances.append(newlist[i])
    return fixeddistances
Exemple #23
0
def image2db(
):  # add image signature, corresponding audio bytes, and parameters to database
    name = fname
    gis = ImageSignature()  # instantiate
    image_key = gis.generate_signature(
        'key.png')  #get signature for the tattooed image
    image_sig = str(
        list(image_key))  # convert to string to be stored in database
    audio_b = signal_byte  # signal_bytes was globally defined above
    parameters = audio.getparams()  # get the parameters of audio
    nchannels = parameters[0]
    sampwidth = parameters[1]
    framerate = parameters[2]
    nframes = parameters[3]
    comptype = parameters[4]
    compname = parameters[5]
    instance = Audio(name = name, image_signature = image_sig, \
    audio_bytes = audio_b, nchannels = nchannels, sampwidth = sampwidth, \
    framerate = framerate, nframes = nframes, comptype = comptype, compname = compname) # create an audio instance

    session.add(instance)  # add to database
    session.commit()
Exemple #24
0
 def __init__(self, size = 32, angle = 45):
     """
     Create a sizeXsizeX(350/angle) block.
     Pick one SIFT descriptor per block.
     At the moment, the pick is first come, first serve.
     However, there maybe use in adding a qualifier such as smallest magnitude
     as bigger blobs are less interesting.
     :param size:
     :param angle:
     """
     Hasher.__init__(self, name=self.__class__.__name__ + ('(size=%d, angle=%d)' % (size, angle)))
     self.size = size
     self.angle = angle
     self.gis = ImageSignature(n_levels=5, crop_percentiles=(5, 95))
     pass
Exemple #25
0
class ImageMatchHasher(Hasher):
    def __init__(self, n_levels=5, crop_percentiles=(10, 90), equalize=False):
        Hasher.__init__(self, name=self.__class__.__name__ + (
            '(n_levels=%d,low_percentile=%d,equalize=%s)' % (
                n_levels, crop_percentiles[0], str(equalize))))

        self.gis = ImageSignature(n_levels=n_levels, crop_percentiles=crop_percentiles)
        self.equalize = equalize
        self.n_levels = n_levels

    def hash(self, id, img):
        sig = self.gis.generate_signature(img)
        return IndexItem(id, [sig], [1])

    def dims(self):
        return 648
Exemple #26
0
class SignatureDatabaseBase(object):
    """Base class for storing and searching image signatures in a database

    Note:
        You must implement the methods search_single_record and insert_single_record
        in a derived class

    """
    def search_single_record(self, rec, pre_filter=None):
        """Search for a matching image record.

        Must be implemented by derived class.

        Args:
            rec (dict): an image record. Will be in the format returned by
                make_record

                For example, rec could have the form:

                {'path': 'https://pixabay.com/static/uploads/photo/2012/11/28/08/56/mona-lisa-67506_960_720.jpg',
                 'signature': [0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 0 ... ]
                 'simple_word_0': 42252475,
                 'simple_word_1': 23885671,
                 'simple_word_10': 9967839,
                 'simple_word_11': 4257902,
                 'simple_word_12': 28651959,
                 'simple_word_13': 33773597,
                 'simple_word_14': 39331441,
                 'simple_word_15': 39327300,
                 'simple_word_16': 11337345,
                 'simple_word_17': 9571961,
                 'simple_word_18': 28697868,
                 'simple_word_19': 14834907,
                 'simple_word_2': 7434746,
                 'simple_word_20': 37985525,
                 'simple_word_21': 10753207,
                 'simple_word_22': 9566120,
                 ...
                 'metadata': {'category': 'art'},
                 }

                 The number of simple words corresponds to the attribute N

            pre_filter (dict): a filter to be applied by the concrete implementation
                   before applying the matching strategy

                For example:
                    { "term": {  "metadata.category": "art" } }

        Returns:
            a formatted list of dicts representing matches.

            For example, if three matches are found:

            [
             {'dist': 0.069116439263706961,
              'id': u'AVM37oZq0osmmAxpPvx7',
              'path': u'https://pixabay.com/static/uploads/photo/2012/11/28/08/56/mona-lisa-67506_960_720.jpg'},
             {'dist': 0.22484320805049718,
              'id': u'AVM37nMg0osmmAxpPvx6',
              'path': u'https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa,_by_Leonardo_da_Vinci,_from_C2RMF_retouched.jpg/687px-Mona_Lisa,_by_Leonardo_da_Vinci,_from_C2RMF_retouched.jpg'},
             {'dist': 0.42529792112113302,
              'id': u'AVM37p530osmmAxpPvx9',
              'metadata': {...},
              'path': u'https://c2.staticflickr.com/8/7158/6814444991_08d82de57e_z.jpg'}
            ]

            You can return any fields you like, but must include at least dist and id. Duplicate entries are ok,
            and they do not need to be sorted

        """
        raise NotImplementedError

    def insert_single_record(self, rec, refresh_after=False):
        """Insert an image record.

        Must be implemented by derived class.

        Args:
            rec (dict): an image record. Will be in the format returned by
                make_record

                For example, rec could have the form:

                {'path': 'https://pixabay.com/static/uploads/photo/2012/11/28/08/56/mona-lisa-67506_960_720.jpg',
                 'signature': [0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 0 ... ]
                 'simple_word_0': 42252475,
                 'simple_word_1': 23885671,
                 'simple_word_10': 9967839,
                 'simple_word_11': 4257902,
                 'simple_word_12': 28651959,
                 'simple_word_13': 33773597,
                 'simple_word_14': 39331441,
                 'simple_word_15': 39327300,
                 'simple_word_16': 11337345,
                 'simple_word_17': 9571961,
                 'simple_word_18': 28697868,
                 'simple_word_19': 14834907,
                 'simple_word_2': 7434746,
                 'simple_word_20': 37985525,
                 'simple_word_21': 10753207,
                 'simple_word_22': 9566120,
                 ...
                 'metadata': {...}
                 }

                 The number of simple words corresponds to the attribute N

        """
        raise NotImplementedError

    def __init__(self,
                 k=16,
                 N=63,
                 n_grid=9,
                 crop_percentile=(5, 95),
                 distance_cutoff=0.45,
                 *signature_args,
                 **signature_kwargs):
        """Set up storage scheme for images

        Central to the speed of this approach is the transforming the image
        signature into something that can be speedily indexed and matched.
        In our case, that means splitting the image signature into N words
        of length k, then encoding those words as integers. The idea here is
        that integer indices are more efficient than array indices.

        For example, say your image signature is [0, 1, 2, 0, -1, -2, 0, 1] and
        k=3 and N=4. That means we want 4 words of length 3.  For this signa-
        ture, that gives us:

        [0, 1, 2]
        [2, 0, -1]
        [-1, -2, 0]
        [0, 1]

        Note that signature elements can be repeated, and any mismatch in length
        is chopped off in the last word (which will be padded with zeros). Since
        these numbers run from -2..2, there 5 possibilites.  Adding 2 to each word
        makes them strictly non-negative, then the quantity, and transforming to
        base-5 makes unique integers. For the first word:

        [0, 1, 2] + 2 = [2, 3, 4]
        [5**0, 5**1, 5**2] = [1, 5, 25]
        dot([2, 3, 4], [1, 5, 25]) = 2 + 15 + 100 = 117

        So the integer word is 117.  Storing all the integer words as different
        database columns or fields gives us the speedy lookup. In practice, word
        arrays are 'squeezed' to between -1..1 before encoding.

        Args:
            k (Optional[int]): the width of a word (default 16)
            N (Optional[int]): the number of words (default 63)
            n_grid (Optional[int]): the n_grid x n_grid size to use in determining
                the image signature (default 9)
            crop_percentiles (Optional[Tuple[int]]): lower and upper bounds when
                considering how much variance to keep in the image (default (5, 95))
            distance_cutoff (Optional [float]): maximum image signature distance to
                be considered a match (default 0.45)
            *signature_args: Variable length argument list to pass to ImageSignature
            **signature_kwargs: Arbitrary keyword arguments to pass to ImageSignature

        """
        # Check integer inputs
        if type(k) is not int:
            raise TypeError('k should be an integer')
        if type(N) is not int:
            raise TypeError('N should be an integer')
        if type(n_grid) is not int:
            raise TypeError('n_grid should be an integer')

        self.k = k
        self.N = N
        self.n_grid = n_grid

        # Check float input
        if type(distance_cutoff) is not float:
            raise TypeError('distance_cutoff should be a float')
        if distance_cutoff < 0.:
            raise ValueError('distance_cutoff should be > 0 (got %r)' %
                             distance_cutoff)

        self.distance_cutoff = distance_cutoff

        self.crop_percentile = crop_percentile

        self.gis = ImageSignature(n=n_grid,
                                  crop_percentiles=crop_percentile,
                                  *signature_args,
                                  **signature_kwargs)

    def add_image(self,
                  path,
                  img=None,
                  bytestream=False,
                  metadata=None,
                  refresh_after=False):
        """Add a single image to the database

        Args:
            path (string): path or identifier for image. If img=None, then path is assumed to be
                a URL or filesystem path
            img (Optional[string]): usually raw image data. In this case, path will still be stored, but
                a signature will be generated from data in img. If bytestream is False, but img is
                not None, then img is assumed to be the URL or filesystem path. Thus, you can store
                image records with a different 'path' than the actual image location (default None)
            bytestream (Optional[boolean]): will the image be passed as raw bytes?
                That is, is the 'path_or_image' argument an in-memory image? If img is None but, this
                argument will be ignored.  If img is not None, and bytestream is False, then the behavior
                is as described in the explanation for the img argument
                (default False)
            metadata (Optional): any other information you want to include, can be nested (default None)

        """
        rec = make_record(path,
                          self.gis,
                          self.k,
                          self.N,
                          img=img,
                          bytestream=bytestream,
                          metadata=metadata)
        self.insert_single_record(rec, refresh_after=refresh_after)

    def search_image(self,
                     path,
                     all_orientations=False,
                     bytestream=False,
                     pre_filter=None):
        """Search for matches

        Args:
            path (string): path or image data. If bytestream=False, then path is assumed to be
                a URL or filesystem path. Otherwise, it's assumed to be raw image data
            all_orientations (Optional[boolean]): if True, search for all combinations of mirror
                images, rotations, and color inversions (default False)
            bytestream (Optional[boolean]): will the image be passed as raw bytes?
                That is, is the 'path_or_image' argument an in-memory image?
                (default False)
            pre_filter (Optional[dict]): filters list before applying the matching algorithm
                (default None)
        Returns:
            a formatted list of dicts representing unique matches, sorted by dist

            For example, if three matches are found:

            [
             {'dist': 0.069116439263706961,
              'id': u'AVM37oZq0osmmAxpPvx7',
              'path': u'https://pixabay.com/static/uploads/photo/2012/11/28/08/56/mona-lisa-67506_960_720.jpg'},
             {'dist': 0.22484320805049718,
              'id': u'AVM37nMg0osmmAxpPvx6',
              'path': u'https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa,_by_Leonardo_da_Vinci,_from_C2RMF_retouched.jpg/687px-Mona_Lisa,_by_Leonardo_da_Vinci,_from_C2RMF_retouched.jpg'},
             {'dist': 0.42529792112113302,
              'id': u'AVM37p530osmmAxpPvx9',
              'path': u'https://c2.staticflickr.com/8/7158/6814444991_08d82de57e_z.jpg'}
            ]

        """
        img = self.gis.preprocess_image(path, bytestream)

        if all_orientations:
            # initialize an iterator of composed transformations
            inversions = [lambda x: x, lambda x: -x]

            mirrors = [lambda x: x, np.fliplr]

            # an ugly solution for function composition
            rotations = [
                lambda x: x, np.rot90, lambda x: np.rot90(x, 2),
                lambda x: np.rot90(x, 3)
            ]

            # cartesian product of all possible orientations
            orientations = product(inversions, rotations, mirrors)

        else:
            # otherwise just use the identity transformation
            orientations = [lambda x: x]

        # try for every possible combination of transformations; if all_orientations=False,
        # this will only take one iteration
        result = []

        orientations = set(np.ravel(list(orientations)))
        for transform in orientations:
            # compose all functions and apply on signature
            transformed_img = transform(img)

            # generate the signature
            transformed_record = make_record(transformed_img, self.gis, self.k,
                                             self.N)

            l = self.search_single_record(transformed_record,
                                          pre_filter=pre_filter)
            result.extend(l)

        ids = set()
        unique = []
        for item in result:
            if item['id'] not in ids:
                unique.append(item)
                ids.add(item['id'])

        r = sorted(unique, key=itemgetter('dist'))
        return r
Exemple #27
0
def test_load_from_file():
    gis = ImageSignature()
    sig = gis.generate_signature('test.jpg')
    assert type(sig) is ndarray
    assert sig.shape == (648,)
from PIL import Image

#import random

helmet_model_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                 "yolov5_m_hat.pt")
mask_model_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                               "mask_classification_model.pth")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

helmet_model = YOLOv5(helmet_model_path, device)  # сеть поиска касок
mtcnn = MTCNN(keep_all=True, device=device)  # сеть поиска лиц
mask_model = torch.load(
    mask_model_path, map_location=device)  # классификация лиц в маске или без
mask_model.eval()
gis = ImageSignature()


def add_new_face_frame(frame):
    """
    функция для добавления лиц по кадрам
    :param frame:
    :return:
    """
    _, width = frame.shape[1], frame.shape[0]
    k = 900 / width
    dim = (int(frame.shape[1] * k), int(frame.shape[0] * k))
    image = cv2.resize(frame, dim)
    return add_new_face_video(image)

Exemple #29
0
    def __init__(self,
                 k=16,
                 N=63,
                 n_grid=9,
                 crop_percentile=(5, 95),
                 distance_cutoff=0.45,
                 *signature_args,
                 **signature_kwargs):
        """Set up storage scheme for images

        Central to the speed of this approach is the transforming the image
        signature into something that can be speedily indexed and matched.
        In our case, that means splitting the image signature into N words
        of length k, then encoding those words as integers. The idea here is
        that integer indices are more efficient than array indices.

        For example, say your image signature is [0, 1, 2, 0, -1, -2, 0, 1] and
        k=3 and N=4. That means we want 4 words of length 3.  For this signa-
        ture, that gives us:

        [0, 1, 2]
        [2, 0, -1]
        [-1, -2, 0]
        [0, 1]

        Note that signature elements can be repeated, and any mismatch in length
        is chopped off in the last word (which will be padded with zeros). Since
        these numbers run from -2..2, there 5 possibilites.  Adding 2 to each word
        makes them strictly non-negative, then the quantity, and transforming to
        base-5 makes unique integers. For the first word:

        [0, 1, 2] + 2 = [2, 3, 4]
        [5**0, 5**1, 5**2] = [1, 5, 25]
        dot([2, 3, 4], [1, 5, 25]) = 2 + 15 + 100 = 117

        So the integer word is 117.  Storing all the integer words as different
        database columns or fields gives us the speedy lookup. In practice, word
        arrays are 'squeezed' to between -1..1 before encoding.

        Args:
            k (Optional[int]): the width of a word (default 16)
            N (Optional[int]): the number of words (default 63)
            n_grid (Optional[int]): the n_grid x n_grid size to use in determining
                the image signature (default 9)
            crop_percentiles (Optional[Tuple[int]]): lower and upper bounds when
                considering how much variance to keep in the image (default (5, 95))
            distance_cutoff (Optional [float]): maximum image signature distance to
                be considered a match (default 0.45)
            *signature_args: Variable length argument list to pass to ImageSignature
            **signature_kwargs: Arbitrary keyword arguments to pass to ImageSignature

        """
        # Check integer inputs
        if type(k) is not int:
            raise TypeError('k should be an integer')
        if type(N) is not int:
            raise TypeError('N should be an integer')
        if type(n_grid) is not int:
            raise TypeError('n_grid should be an integer')

        self.k = k
        self.N = N
        self.n_grid = n_grid

        # Check float input
        if type(distance_cutoff) is not float:
            raise TypeError('distance_cutoff should be a float')
        if distance_cutoff < 0.:
            raise ValueError('distance_cutoff should be > 0 (got %r)' %
                             distance_cutoff)

        self.distance_cutoff = distance_cutoff

        self.crop_percentile = crop_percentile

        self.gis = ImageSignature(n=n_grid,
                                  crop_percentiles=crop_percentile,
                                  *signature_args,
                                  **signature_kwargs)
Exemple #30
0
def test_load_from_url():
    gis = ImageSignature()
    sig = gis.generate_signature(test_img_url)
    assert type(sig) is ndarray
    assert sig.shape == (648,)
Exemple #31
0
from image_match.goldberg import ImageSignature
gis = ImageSignature()
a = gis.generate_signature('./MonaLisa_1.jpg')
b = gis.generate_signature('./MonaLisa_2.jpg')
c = gis.generate_signature('./Other.jpg')
dist = gis.normalized_distance(a, b)
dist2 = gis.normalized_distance(a, c)
# normalized distance < 0.4 likely to be a match
print(dist)
print(dist2)
Exemple #32
0
def test_load_from_corrupt_stream():
    gis = ImageSignature()
    with pytest.raises(CorruptImageError):
        gis.generate_signature(b'corrupt', bytestream=True)
class ImageMatchCalculator(QThread):
    """
    This is a threaded class, which will find matches for images
    based on the image_match library.import os
    """

    # Emitted when a distinct image file is found
    distinct_image_found = pyqtSignal(
        ImageFile, arguments=["imagePath"], name="distinctImageFileFound"
    )
    # Emitted when a duplicate image is found for another image
    # The first argument will be the original image and the second
    # argument will be the duplicate image.
    duplicate_image_found = pyqtSignal(
        ImageFile,
        ImageFile,
        arguments=["imageFile", "duplicateImageFile"],
        name="imageDuplicatesFound",
    )

    # Emitted when all the duplicate matches are found
    finished = pyqtSignal(object)

    def __init__(self, starting_directory):
        QObject.__init__(self)

        self.starting_directory = starting_directory
        self.glob_path = os.path.join(starting_directory, "*.jpg")
        self.signature_generator = ImageSignature()
        self.image_files = []
        self.distinct_image_files = []

    def __del__(self):
        self.wait()

    def run(self):
        self.calculate()

    def calculate(self):
        for image_path in glob.iglob(self.glob_path):
            new_image_file = ImageFile(
                path=image_path,
                signature=self._get_image_signatures(image_path),
            )
            original_image = self._find_original_image(new_image_file)
            if original_image:
                self.duplicate_image_found.emit(original_image, new_image_file)
            else:
                self.distinct_image_files.append(new_image_file)
                self.distinct_image_found.emit(new_image_file)

        self.finished.emit(self.distinct_image_files)

    def _find_original_image(self, image):
        for distinct_image in self.distinct_image_files:
            if self._matches(distinct_image, image):
                return distinct_image

    def _matches(self, image_file1, image_file2):
        distance = self.signature_generator.normalized_distance(
            image_file1.signature, image_file2.signature
        )
        return distance <= 0.3

    def _get_image_signatures(self, image_path):
        return self.signature_generator.generate_signature(image_path)

    def _compute_matches(self):
        for i in range(len(self.image_files)):
            image_file = self.image_files[i]

            if image_file is not None:
                for j in range(i + 1, len(self.image_files)):
                    another_image_file = self.image_files[j]

                    if another_image_file is not None:
                        if self._matches(image_file, another_image_file):
                            image_file.add_duplicate(another_image_file)
                            self.image_files[j] = None
            # If this element is None and the previous element is not None,
            # then it must be an ImageFile with all of its duplicates
            # identified
            elif self.image_files[i - 1] is not None:
                self.duplicate_image_found.emit(self.image_files[i - 1])

        self.image_files = [
            image_file
            for image_file in self.image_files
            if image_file is not None
        ]
Exemple #34
0
def test_identity():
    gis = ImageSignature()
    sig = gis.generate_signature('test.jpg')
    dist = gis.normalized_distance(sig, sig)
    assert dist == 0.0
Exemple #35
0
def test_difference():
    gis = ImageSignature()
    sig1 = gis.generate_signature('test.jpg')
    sig2 = gis.generate_signature(test_diff_img_url)
    dist = gis.normalized_distance(sig1, sig2)
    assert 0.42 < dist < 0.43
Exemple #36
0
def test_load_from_stream():
    gis = ImageSignature()
    with open('test.jpg', 'rb') as f:
        sig = gis.generate_signature(f.read(), bytestream=True)
        assert type(sig) is ndarray
        assert sig.shape == (648,)
Exemple #37
0
def test_identity():
    gis = ImageSignature()
    sig = gis.generate_signature('test.jpg')
    dist = gis.normalized_distance(sig, sig)
    assert dist == 0.0
class SignatureDatabaseBase(object):
    """Base class for storing and searching image signatures in a database

    Note:
        You must implement the methods search_single_record and insert_single_record
        in a derived class

    """

    def search_single_record(self, rec):
        """Search for a matching image record.

        Must be implemented by derived class.

        Args:
            rec (dict): an image record. Will be in the format returned by
                make_record

                For example, rec could have the form:

                {'path': 'https://pixabay.com/static/uploads/photo/2012/11/28/08/56/mona-lisa-67506_960_720.jpg',
                 'signature': [0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 0 ... ]
                 'simple_word_0': 42252475,
                 'simple_word_1': 23885671,
                 'simple_word_10': 9967839,
                 'simple_word_11': 4257902,
                 'simple_word_12': 28651959,
                 'simple_word_13': 33773597,
                 'simple_word_14': 39331441,
                 'simple_word_15': 39327300,
                 'simple_word_16': 11337345,
                 'simple_word_17': 9571961,
                 'simple_word_18': 28697868,
                 'simple_word_19': 14834907,
                 'simple_word_2': 7434746,
                 'simple_word_20': 37985525,
                 'simple_word_21': 10753207,
                 'simple_word_22': 9566120,
                 ...
                 'metadata': {'category': 'art'},
                 }

                 The number of simple words corresponds to the attribute N

        Returns:
            a formatted list of dicts representing matches.

            For example, if three matches are found:

            [
             {'dist': 0.069116439263706961,
              'id': u'AVM37oZq0osmmAxpPvx7',
              'path': u'https://pixabay.com/static/uploads/photo/2012/11/28/08/56/mona-lisa-67506_960_720.jpg'},
             {'dist': 0.22484320805049718,
              'id': u'AVM37nMg0osmmAxpPvx6',
              'path': u'https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa,_by_Leonardo_da_Vinci,_from_C2RMF_retouched.jpg/687px-Mona_Lisa,_by_Leonardo_da_Vinci,_from_C2RMF_retouched.jpg'},
             {'dist': 0.42529792112113302,
              'id': u'AVM37p530osmmAxpPvx9',
              'metadata': {...},
              'path': u'https://c2.staticflickr.com/8/7158/6814444991_08d82de57e_z.jpg'}
            ]

            You can return any fields you like, but must include at least dist and id. Duplicate entries are ok,
            and they do not need to be sorted

        """
        raise NotImplementedError

    def insert_single_record(self, rec):
        """Insert an image record.

        Must be implemented by derived class.

        Args:
            rec (dict): an image record. Will be in the format returned by
                make_record

                For example, rec could have the form:

                {'path': 'https://pixabay.com/static/uploads/photo/2012/11/28/08/56/mona-lisa-67506_960_720.jpg',
                 'signature': [0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 0 ... ]
                 'simple_word_0': 42252475,
                 'simple_word_1': 23885671,
                 'simple_word_10': 9967839,
                 'simple_word_11': 4257902,
                 'simple_word_12': 28651959,
                 'simple_word_13': 33773597,
                 'simple_word_14': 39331441,
                 'simple_word_15': 39327300,
                 'simple_word_16': 11337345,
                 'simple_word_17': 9571961,
                 'simple_word_18': 28697868,
                 'simple_word_19': 14834907,
                 'simple_word_2': 7434746,
                 'simple_word_20': 37985525,
                 'simple_word_21': 10753207,
                 'simple_word_22': 9566120,
                 ...
                 'metadata': {...}
                 }

                 The number of simple words corresponds to the attribute N

        """
        raise NotImplementedError

    def __init__(self, k=16, N=63, n_grid=9,
                 crop_percentile=(5, 95), distance_cutoff=0.45,
                 *signature_args, **signature_kwargs):
        """Set up storage scheme for images

        Central to the speed of this approach is the transforming the image
        signature into something that can be speedily indexed and matched.
        In our case, that means splitting the image signature into N words
        of length k, then encoding those words as integers. The idea here is
        that integer indices are more efficient than array indices.

        For example, say your image signature is [0, 1, 2, 0, -1, -2, 0, 1] and
        k=3 and N=4. That means we want 4 words of length 3.  For this signa-
        ture, that gives us:

        [0, 1, 2]
        [2, 0, -1]
        [-1, -2, 0]
        [0, 1]

        Note that signature elements can be repeated, and any mismatch in length
        is chopped off in the last word (which will be padded with zeros). Since
        these numbers run from -2..2, there 5 possibilites.  Adding 2 to each word
        makes them strictly non-negative, then the quantity, and transforming to
        base-5 makes unique integers. For the first word:

        [0, 1, 2] + 2 = [2, 3, 4]
        [5**0, 5**1, 5**2] = [1, 5, 25]
        dot([2, 3, 4], [1, 5, 25]) = 2 + 15 + 100 = 117

        So the integer word is 117.  Storing all the integer words as different
        database columns or fields gives us the speedy lookup. In practice, word
        arrays are 'squeezed' to between -1..1 before encoding.

        Args:
            k (Optional[int]): the width of a word (default 16)
            N (Optional[int]): the number of words (default 63)
            n_grid (Optional[int]): the n_grid x n_grid size to use in determining
                the image signature (default 9)
            crop_percentiles (Optional[Tuple[int]]): lower and upper bounds when
                considering how much variance to keep in the image (default (5, 95))
            distance_cutoff (Optional [float]): maximum image signature distance to
                be considered a match (default 0.45)
            *signature_args: Variable length argument list to pass to ImageSignature
            **signature_kwargs: Arbitrary keyword arguments to pass to ImageSignature

        """
        # Check integer inputs
        if type(k) is not int:
            raise TypeError('k should be an integer')
        if type(N) is not int:
            raise TypeError('N should be an integer')
        if type(n_grid) is not int:
            raise TypeError('n_grid should be an integer')

        self.k = k
        self.N = N
        self.n_grid = n_grid

        # Check float input
        if type(distance_cutoff) is not float:
            raise TypeError('distance_cutoff should be a float')
        if distance_cutoff < 0.:
            raise ValueError('distance_cutoff should be > 0 (got %r)' % distance_cutoff)

        self.distance_cutoff = distance_cutoff

        self.crop_percentile = crop_percentile

        self.gis = ImageSignature(n=n_grid, crop_percentiles=crop_percentile, *signature_args, **signature_kwargs)

    def add_image(self, path, img=None, bytestream=False, metadata=None):
        """Add a single image to the database

        Args:
            path (string): path or identifier for image. If img=None, then path is assumed to be
                a URL or filesystem path
            img (Optional[string]): usually raw image data. In this case, path will still be stored, but
                a signature will be generated from data in img. If bytestream is False, but img is
                not None, then img is assumed to be the URL or filesystem path. Thus, you can store
                image records with a different 'path' than the actual image location (default None)
            bytestream (Optional[boolean]): will the image be passed as raw bytes?
                That is, is the 'path_or_image' argument an in-memory image? If img is None but, this
                argument will be ignored.  If img is not None, and bytestream is False, then the behavior
                is as described in the explanation for the img argument
                (default False)
            metadata (Optional): any other information you want to include, can be nested (default None)

        """
        rec = make_record(path, self.gis, self.k, self.N, img=img, bytestream=bytestream, metadata=metadata)
        self.insert_single_record(rec)

    def search_image(self, path, all_orientations=False, bytestream=False):
        """Search for matches

        Args:
            path (string): path or image data. If bytestream=False, then path is assumed to be
                a URL or filesystem path. Otherwise, it's assumed to be raw image data
            all_orientations (Optional[boolean]): if True, search for all combinations of mirror
                images, rotations, and color inversions (default False)
            bytestream (Optional[boolean]): will the image be passed as raw bytes?
                That is, is the 'path_or_image' argument an in-memory image?
                (default False)

        Returns:
            a formatted list of dicts representing unique matches, sorted by dist

            For example, if three matches are found:

            [
             {'dist': 0.069116439263706961,
              'id': u'AVM37oZq0osmmAxpPvx7',
              'path': u'https://pixabay.com/static/uploads/photo/2012/11/28/08/56/mona-lisa-67506_960_720.jpg'},
             {'dist': 0.22484320805049718,
              'id': u'AVM37nMg0osmmAxpPvx6',
              'path': u'https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa,_by_Leonardo_da_Vinci,_from_C2RMF_retouched.jpg/687px-Mona_Lisa,_by_Leonardo_da_Vinci,_from_C2RMF_retouched.jpg'},
             {'dist': 0.42529792112113302,
              'id': u'AVM37p530osmmAxpPvx9',
              'path': u'https://c2.staticflickr.com/8/7158/6814444991_08d82de57e_z.jpg'}
            ]

        """
        img = self.gis.preprocess_image(path, bytestream)

        if all_orientations:
            # initialize an iterator of composed transformations
            inversions = [lambda x: x, lambda x: -x]

            mirrors = [lambda x: x, np.fliplr]

            # an ugly solution for function composition
            rotations = [lambda x: x,
                         np.rot90,
                         lambda x: np.rot90(x, 2),
                         lambda x: np.rot90(x, 3)]

            # cartesian product of all possible orientations
            orientations = product(inversions, rotations, mirrors)

        else:
            # otherwise just use the identity transformation
            orientations = [lambda x: x]

        # try for every possible combination of transformations; if all_orientations=False,
        # this will only take one iteration
        result = []

        orientations = np.unique(np.ravel(list(orientations)))
        for transform in orientations:
            # compose all functions and apply on signature
            transformed_img = transform(img)

            # generate the signature
            transformed_record = make_record(transformed_img, self.gis, self.k, self.N)

            l = self.search_single_record(transformed_record)
            result.extend(l)
        r = sorted(np.unique(result).tolist(), key=itemgetter('dist'))
        s = set([x['id'] for x in r])
        for i, x in enumerate(r):
            if x['id'] not in s:
                r.pop(i)
            else:
                s.remove(x['id'])
        return r
    def __init__(self, k=16, N=63, n_grid=9,
                 crop_percentile=(5, 95), distance_cutoff=0.45,
                 *signature_args, **signature_kwargs):
        """Set up storage scheme for images

        Central to the speed of this approach is the transforming the image
        signature into something that can be speedily indexed and matched.
        In our case, that means splitting the image signature into N words
        of length k, then encoding those words as integers. The idea here is
        that integer indices are more efficient than array indices.

        For example, say your image signature is [0, 1, 2, 0, -1, -2, 0, 1] and
        k=3 and N=4. That means we want 4 words of length 3.  For this signa-
        ture, that gives us:

        [0, 1, 2]
        [2, 0, -1]
        [-1, -2, 0]
        [0, 1]

        Note that signature elements can be repeated, and any mismatch in length
        is chopped off in the last word (which will be padded with zeros). Since
        these numbers run from -2..2, there 5 possibilites.  Adding 2 to each word
        makes them strictly non-negative, then the quantity, and transforming to
        base-5 makes unique integers. For the first word:

        [0, 1, 2] + 2 = [2, 3, 4]
        [5**0, 5**1, 5**2] = [1, 5, 25]
        dot([2, 3, 4], [1, 5, 25]) = 2 + 15 + 100 = 117

        So the integer word is 117.  Storing all the integer words as different
        database columns or fields gives us the speedy lookup. In practice, word
        arrays are 'squeezed' to between -1..1 before encoding.

        Args:
            k (Optional[int]): the width of a word (default 16)
            N (Optional[int]): the number of words (default 63)
            n_grid (Optional[int]): the n_grid x n_grid size to use in determining
                the image signature (default 9)
            crop_percentiles (Optional[Tuple[int]]): lower and upper bounds when
                considering how much variance to keep in the image (default (5, 95))
            distance_cutoff (Optional [float]): maximum image signature distance to
                be considered a match (default 0.45)
            *signature_args: Variable length argument list to pass to ImageSignature
            **signature_kwargs: Arbitrary keyword arguments to pass to ImageSignature

        """
        # Check integer inputs
        if type(k) is not int:
            raise TypeError('k should be an integer')
        if type(N) is not int:
            raise TypeError('N should be an integer')
        if type(n_grid) is not int:
            raise TypeError('n_grid should be an integer')

        self.k = k
        self.N = N
        self.n_grid = n_grid

        # Check float input
        if type(distance_cutoff) is not float:
            raise TypeError('distance_cutoff should be a float')
        if distance_cutoff < 0.:
            raise ValueError('distance_cutoff should be > 0 (got %r)' % distance_cutoff)

        self.distance_cutoff = distance_cutoff

        self.crop_percentile = crop_percentile

        self.gis = ImageSignature(n=n_grid, crop_percentiles=crop_percentile, *signature_args, **signature_kwargs)
Exemple #40
0
def test_difference():
    gis = ImageSignature()
    sig1 = gis.generate_signature('test.jpg')
    sig2 = gis.generate_signature(test_diff_img_url)
    dist = gis.normalized_distance(sig1, sig2)
    assert dist == 0.42672771706789686