コード例 #1
0
    def sample(self, image, cls1, cls2, feature_object):
        """ create image sample """

        idx = image.split('/')[-1]
        idx = idx.split('.')[0]
        sample_cache = '{}-{}-{}-{}'.format(feature_object.name + '-based',
                                            idx, cls1, cls2)

        try:
            samples = cPickle.load(
                open(
                    os.path.join(Config.engine_cache_path() + '/single',
                                 sample_cache), "rb", True))
            return samples
        except:
            samples = []
            histogram = feature_object.fire(image)
            samples.append({
                'img': idx,
                'cls1': cls1,
                'cls2': cls2,
                'hist': histogram
            })

            cPickle.dump(
                samples,
                open(
                    os.path.join(Config.engine_cache_path() + '/single',
                                 sample_cache), "wb", True))

            return samples
コード例 #2
0
    def getImagePaths(self, create=True):
        cache = SimpleCache()
        if not create:
            rv = cache.get('image-paths')
            if rv is not None:
                return rv
        try:
            with MySQL.conn(MySQL) as cursor:
                categories = {}
                sql = "SELECT id, category_id, alias FROM `categories` WHERE id > 0 ORDER BY id ASC"
                cursor.execute(sql)
                for row in cursor:
                    idx = row['id']
                    category_id = row['category_id']
                    alias = row['alias']
                    if category_id == 0:
                        categories[idx] = alias
                    else:
                        categories[idx] = categories[category_id] + '/' + alias
                    if create:
                        if not os.path.exists(Config.product_image_path() +
                                              '/' + categories[idx]):
                            os.makedirs(Config.product_image_path() + '/' +
                                        categories[idx])

                cache.set('image-paths',
                          categories,
                          timeout=60 * 60 * 24 * 365)
                return categories
        except:
            return False
コード例 #3
0
    def conn(self):
        """ Returns cursor of MySQL connection """
        if not self.connection:
            self.connection = pymysql.connect(
                host=Config.databaseHost(),
                unix_socket=Config.databaseSocket(),
                user=Config.databaseUser(),
                passwd=Config.databasePassword(),
                db=Config.database(),
                autocommit=True,
                cursorclass=pymysql.cursors.DictCursor)

        return self.connection.cursor()
コード例 #4
0
    def samples(self, feature_object):
        """ create samples of all collection """
        db = ImageCollection()
        sample_cache = "{}-{}".format(feature_object.name + '-based',
                                      'all-products')

        try:
            samples = cPickle.load(
                open(os.path.join(Config.engine_cache_path(), sample_cache),
                     "rb", True))
            return samples
        except:
            samples = []

            i = 0
            data = db.get_collection()
            for d in data.itertuples():
                try:
                    cls1, cls2, img = getattr(d, "cls1"), getattr(
                        d, "cls2"), getattr(d, "img")
                    image = Config.product_image_path()
                    if len(cls1) > 0:
                        image = image + '/' + cls1
                    if len(cls2) > 0:
                        image = image + '/' + cls2
                    image = image + '/' + img
                    samples.append(
                        self.sample(image, cls1, cls2, feature_object))
                    i = i + 1
                    print(i)
                except:
                    continue
            with open(Config.engine_cache_path() + '/' + sample_cache,
                      'wb') as fp:
                dill.dump(samples, fp)

            return samples
コード例 #5
0
    def find(self, img, cls1, cls2, feature_object):
        """ Find similar images """
        depth = Config.compareDepth()

        feature = Feature()
        query = feature.sample(img, cls1, cls2, feature_object)
        query = query[0]

        result = []
        samples = feature.samples(feature_object)
        for sample in samples:
            sample = sample[0]
            result.append({
                'idx': sample['img'],
                'cls1': sample['cls1'],
                'cls2': sample['cls2'],
                'dis': self.distance(query['hist'], sample['hist'])
            })

        result = sorted(result, key=lambda x: x['dis'])
        if depth and depth <= len(result):
            result = result[:depth]

        return result
コード例 #6
0
    def category(self, category, url, page, image_paths, mysql):
        """ Fetch all product in given url, and insert to database """

        print('Page: ' + str(page))
        print()
        print()
        print()

        curl = Curl()
        body = curl.fetch(url)
        soup = BeautifulSoup(body, "html.parser")
        products = soup.findAll('li', attrs={'class': 'product-card-wrapper'})
        for product in products:
            try:
                soup = BeautifulSoup(product.encode('utf-8'), "html.parser")
                # title
                title = soup.find('img', attrs={'class': 'product-image'})
                title = title.get('title').encode('utf-8').strip()
                title = title.decode('utf-8').split('/////')
                title = title[0]
                # link
                link = soup.find('a', attrs={'class': 'product-detail-link'})
                link = link.get('href').encode('utf-8').strip()
                link = link.decode('utf-8')
                # photo
                photo = soup.find('img', attrs={'class': 'product-image'})
                photo = photo.get('src').encode('utf-8').strip()
                if photo.decode('utf-8') == '/Content/images/defaultThumb.jpg':
                    photo = soup.find('img', attrs={'class': 'product-image'})
                    photo = photo.get('data-original').encode('utf-8').strip()
                photo = photo.decode('utf-8').split(
                    "https://img-trendyol.mncdn.com/")
                photo = "https://img-trendyol.mncdn.com/mnresize/200/200/" + photo[
                    1]
                # old price
                try:
                    old_price = soup.find(
                        'span', attrs={'class': 'product-market-price'})
                    old_price = old_price.text.encode('utf-8').strip() \
                        .decode('utf8').replace(" TL", "").replace(".", "").replace(",", ".")
                except:
                    old_price = 0

                # new price
                try:
                    new_price = soup.find(
                        'span', attrs={'class': 'product-sale-price'})
                    new_price = new_price.text.encode('utf-8').strip().decode('utf8') \
                        .replace(" TL", "").replace(".", "").replace(",", ".")
                except:
                    new_price = 0
                # discount
                try:
                    discount = soup.find('div', attrs={'class': 'discountBox'})
                    soup = BeautifulSoup(discount.encode('utf-8'),
                                         "html.parser")
                    discount = soup.find('span')
                    discount = discount.text.encode('utf-8').strip().decode(
                        'utf8').replace(" TL", "").replace("%", "")
                except:
                    discount = 0
                # currency
                currency = 'TRY'
                # category
                category_id = self.product(title)
                if category_id <= 0:
                    if category == 'women':
                        category_id = 11
                    elif category == 'men':
                        category_id = 16
                    elif category == 'baby':
                        category_id = 32
                    else:
                        category_id = 27

                # image path
                image_path = image_paths[category_id]

                # insert
                product_id = mysql.insertProduct(self.site_id, category_id,
                                                 title, new_price, old_price,
                                                 discount, currency, link)
                if product_id:
                    curl.download(
                        photo,
                        Config.product_image_path() + '/' + image_path + '/' +
                        str(product_id) + '.jpg')

                    print('Added: ' + str(product_id))
                    print('Category: ' + str(category_id))
                    print('Image Path: ' + str(image_path))
                    if os.path.exists(Config.product_image_path() + '/' +
                                      image_path + '/' + str(product_id) +
                                      '.jpg'):
                        print('Image is added.')
                    else:
                        print('Image is not added.')
                    print('Url: ' + link)
                    print()
                    print()
                    print()

                # sleep
                time.sleep(self.sleep)
            except:
                continue
コード例 #7
0
    def category(self, url, page, image_paths, mysql):
        """ Fetch all product in given url, and insert to database """

        print('Page: ' + str(page))
        print()
        print()
        print()

        curl = Curl()
        body = curl.fetch(url)
        soup = BeautifulSoup(body, "html.parser")
        products = soup.findAll('li', attrs={'class': 'search-item'})
        for product in products:
            try:
                soup = BeautifulSoup(product.encode('utf-8'), "html.parser")
                # title
                title = soup.find('h3', attrs={'class': 'product-title'})
                title = title.get('title').encode('utf-8').strip()
                title = title.decode('utf-8').split('/////')
                title = title[0]
                # link
                link = soup.find('a')
                link = link.get('href').encode('utf-8').strip()
                link = link.decode('utf-8')
                # photo
                photo = soup.find('img', attrs={'class': 'product-image'})
                photo = photo.get('src').encode('utf-8').strip()
                photo = photo.decode('utf-8').split("/")
                photo = "https://productimages.hepsiburada.net/s/" + photo[
                    4] + "/200/" + photo[6]
                # old price
                try:
                    old_price = soup.find('del',
                                          attrs={'class': 'product-old-price'})
                    old_price = old_price.text.encode('utf-8').strip() \
                        .decode('utf8').replace(" TL", "").replace(".", "").replace(",", ".")
                except:
                    old_price = 0
                # new price
                try:
                    new_price = soup.find('span',
                                          attrs={'class': 'product-old-price'})
                    new_price = new_price.text.encode('utf-8').strip().decode('utf8') \
                        .replace(" TL", "").replace(".", "").replace(",", ".")
                except:
                    new_price = soup.find('span',
                                          attrs={'class': 'product-price'})
                    new_price = new_price.text.encode('utf-8').strip().decode('utf8') \
                        .replace(" TL", "").replace(".", "").replace(",", ".")
                # discount
                try:
                    discount = soup.find('div',
                                         attrs={'class': 'discount-badge'})
                    soup = BeautifulSoup(discount.encode('utf-8'),
                                         "html.parser")
                    discount = soup.find('span')
                    discount = discount.text.encode('utf-8').strip().decode(
                        'utf8').replace(" TL", "")
                except:
                    discount = 0
                # currency
                currency = 'TRY'
                # category
                category_id = self.product(link)
                if category_id <= 0:
                    continue
                # image path
                image_path = image_paths[category_id]

                # insert
                product_id = mysql.insertProduct(self.site_id, category_id,
                                                 title, new_price, old_price,
                                                 discount, currency, link)
                if product_id:
                    curl.download(
                        photo,
                        Config.product_image_path() + '/' + image_path + '/' +
                        str(product_id) + '.jpg')

                    print('Added: ' + str(product_id))
                    print('Category: ' + str(category_id))
                    print('Image Path: ' + str(image_path))
                    if os.path.exists(Config.product_image_path() + '/' +
                                      image_path + '/' + str(product_id) +
                                      '.jpg'):
                        print('Image is added.')
                    else:
                        print('Image is not added.')
                    print('Url: ' + link)
                    print()
                    print()
                    print()

                # sleep
                time.sleep(1)
            except:
                continue
コード例 #8
0
    def category(self, category, url, page, image_paths, mysql):
        """ Fetch all product in given url, and insert to database """

        print('Page: ' + str(page))
        print()
        print()
        print()

        curl = Curl()
        body = curl.fetch(url)
        soup = BeautifulSoup(body, "html.parser")
        products = soup.findAll('div', attrs={'class': 'pro-product'})
        for product in products:
            try:
                soup = BeautifulSoup(product.encode('utf-8'), "html.parser")
                # title
                title = soup.find('img', attrs={'class': 'visible'})
                title = title.get('alt').encode('utf-8').strip()
                title = title.decode('utf-8').split('/////')
                title = title[0]
                # link
                link = soup.find('a', attrs={'class': 'pro-product-title'})
                link = link.get('href').encode('utf-8').strip()
                link = link.decode('utf-8')
                # photo
                photo = soup.find('img', attrs={'class': 'visible'})
                photo = photo.get('data-original').encode('utf-8').strip()
                photo = photo.decode('utf-8').replace("480/640", "200/200")
                # old price
                try:
                    old_price = soup.find(
                        'div', attrs={'data-pro-product-info': 'actual_price'})
                    old_price = old_price.text.encode('utf-8').strip() \
                        .decode('utf8').replace(" TL", "").replace(".", "").replace(",", ".")
                except:
                    old_price = 0

                # new price
                try:
                    new_price = soup.find(
                        'div', attrs={'data-pro-product-info': 'sale_price'})
                    new_price = new_price.text.encode('utf-8').strip().decode('utf8') \
                        .replace(" TL", "").replace(".", "").replace(",", ".")
                except:
                    new_price = 0
                # discount
                try:
                    if float(old_price) == 0:
                        discount = 0
                    else:
                        discount = round(
                            (1 - (float(new_price) / float(old_price))) * 100)
                except:
                    discount = 0
                # currency
                currency = 'TRY'
                # category
                category_id = self.product(title)
                if category_id <= 0:
                    if category == 'women':
                        category_id = 11
                    elif category == 'men':
                        category_id = 16
                    elif category == 'baby':
                        category_id = 32
                    elif category == 'cosmetic':
                        category_id = 26
                    else:
                        category_id = 27

                # image path
                image_path = image_paths[category_id]

                # insert
                product_id = mysql.insertProduct(self.site_id, category_id,
                                                 title, new_price, old_price,
                                                 discount, currency, link)
                if product_id:
                    curl.download(
                        photo,
                        Config.product_image_path() + '/' + image_path + '/' +
                        str(product_id) + '.jpg')

                    print('Added: ' + str(product_id))
                    print('Category: ' + str(category_id))
                    print('Image Path: ' + str(image_path))
                    if os.path.exists(Config.product_image_path() + '/' +
                                      image_path + '/' + str(product_id) +
                                      '.jpg'):
                        print('Image is added.')
                    else:
                        print('Image is not added.')
                    print('Url: ' + link)
                    print()
                    print()
                    print()
                # sleep
                time.sleep(self.sleep)
            except:
                continue
コード例 #9
0
    def post(self):
        """ Search Photo """

        parser = reqparse.RequestParser(bundle_errors=True)
        parser.add_argument('unique_id',
                            required=True,
                            help="Unique ID is required.")
        parser.add_argument('photo',
                            type=werkzeug.FileStorage,
                            location='files')
        args = parser.parse_args()
        unique_id = args['unique_id']
        """ Save Image """
        image_name = Config.search_image_path() + '/' + unique_id + '.jpg'
        if args['photo']:
            photo = args['photo']
            photo.save(image_name)
        """ Resize Image """
        try:
            im = Image.open(image_name)
            width, height = im.size
            new_width = 128
            new_height = new_width * height / width
            size = new_width, new_height
            im.thumbnail(size, Image.ANTIALIAS)
            im.save(image_name, "JPEG")
        except:
            return {'success': False}, 400
        """ Image Engine """
        compare = Compare()
        result = compare.find(image_name, 'search', '', DeepFeature())
        """ Products """
        product_id_list = []
        for image in result:
            product_image = image['idx'].split('/')
            product_id = product_image[-1].split('.')[0]
            product_id_list.append(product_id)

        mysql = MySQL()
        image_paths = mysql.getImagePaths(False)
        products = mysql.getProducts(product_id_list)
        response = []
        for row in products:
            response.append({
                'url':
                row['ecommerce_url'] + '/' + row['link'],
                'image':
                Config.base_url() + '/' + Config.product_image_path() + '/' +
                image_paths[row['category_id']] + '/' + str(row['id']) +
                '.jpg',
                'name':
                mysql.latinToUnicode(row['name']),
                'price':
                str(row['price']),
                'currency':
                str(row['currency']),
                'discount':
                str(row['discount']),
                'merchant':
                row['ecommerce_name'],
            })

        register = {'unique_id': args['unique_id'], 'products': response}
        return register, 201
コード例 #10
0
 def __init__(self):
     self.product_path = Config.product_image_path()
     self.collection_csv_path = Config.image_collection_path()
     self.set_collection()
     self.collection = pd.read_csv(self.collection_csv_path)