def save_published_id(self, published_offer_list):
        print( "Salving Ids")
        
        # Porcessamentos por categoria
        for category in categories:
            print("\t{0}".format( category[ 'name' ] ))

            # Pega a lista de ofertas salva localmente
            local_offers = IOUtil.json_to_dic( 'offers.json', category[ 'id' ] )
            count_remove = 0

            # Percorre todas as ofertas locais 
            for local_offer in tqdm( reversed( local_offers ) ):
                # Buscando por link - a ofertas local tem de está na lista publicada - a menos que foi recusada
                published_offer = self.find_offer(local_offer, published_offer_list, field='link')
                
                # Se a não está publicada, em teoria é porque foi recusada e deve sair da base local
                if published_offer is None:
                    
                    # Deletando a oferta localmente
                    self.process_delete( [local_offer] )
                    local_offers.remove( local_offer )
                    count_remove += 1
                
                else:
                    local_offer['published_id'] = published_offer['id']

            # Salvando as atualizações
            print("Amount of removed offers:", count_remove)
            IOUtil.dic_to_json('offers.json', category[ 'id' ], local_offers)
    def fill_gtin(self, offers, category):
        dic_gtin = IOUtil.json_to_dic('gtin.json', category )
        print( "Requesting GTIN by offer...")

        # Função que busca e preenche cada GTIN    
        def local_request_gtin(offer, dic_gtin):
            # Verifica se tem o GTIN armazendo locamente para aquele produto
            if offer['id'] in dic_gtin.keys():
                # Armazena o GTIN
                gtin = dic_gtin[ offer['id'] ]
            else:
                # Tenta obter o GTIN na Web
                gtin = request_gtin( offer['names'], offer['brand'] )
                
                # Verifica se GTIN foi encontrado
                if gtin is None: 
                    # Tenta pegar o GTIN de outras ofertas do mesmo produto
                    product_id = offer['id'].split('_')[0]
                    
                    # Procura por ofertas do mesmo produto
                    offer_id = None
                    for key in dic_gtin.keys():
                        if product_id in key:
                            offer_id = key
                            break 
                    
                    # Verifica se foi encontrado
                    if offer_id is not None:
                        # Pega o GTIN da oferta
                        gtin = dic_gtin[ offer_id ]
                        
            if gtin is not None:
                offer['gtin'] = gtin
            
            return offer
        
        # Requerindo o GTIN - paralelizado
        nc = mp.cpu_count()
        offers = Parallel(n_jobs=nc)(delayed(local_request_gtin) (offer, dic_gtin) for offer in tqdm(offers))
        
        offers_without_gtin = []
        for offer in reversed( offers ):
            # Armazenando o GTIN
            if 'gtin' in offer.keys():
                # Armazena o GTIN na base local para uso futuro
                dic_gtin[ offer['id'] ] = offer['gtin']
            else:
                # Armazena a oferta sem GTIN para busca manual
                offers_without_gtin.append( offer )
                # Remove a oferta para não ser publicada
                offers.remove( offer )

        # Atualizando a lista de GTINs
        IOUtil.dic_to_json( 'gtin.json', category, dic_gtin)
        # IOUtil.dic_to_json( 'offers_without_gtin.json', category, offers_without_gtin)

        print("Amount of offers without GTIN: ", len(offers_without_gtin))
        return offers
Beispiel #3
0
def request_gtin(descriptions, brand):

    response = None

    try:
        for description in descriptions:

            url = "https://cosmos.bluesoft.com.br/pesquisar?utf8=%E2%9C%93&q={0}"
            url = url.format(description)

            # Realiza a mesma requisição no máximo 5x
            count_request = 0
            while count_request < 5:
                response = requests.get(url)
                if response.status_code == 200:
                    break
                count_request += 1

            gtin = None
            try:

                soup = BeautifulSoup(response.content,
                                     "lxml",
                                     from_encoding="utf8")
                r = soup.find('body').find('div', id='container-principal')
                r = r.find('section',
                           class_='col-md-6 col-lg-6 col-xs-12 main')
                r = r.find('div',
                           id='results').find('div',
                                              class_='list-group').find('ul')
                r = r.find('li', class_='product-list-item item').find(
                    'div', class_='content')

                # Verificando se o produto é da mesma marca
                title = r.find('h5').find('a').get_text().lower()
                if brand.lower() in title or 'moto' in title:
                    # Getting o GTIN
                    gtin = r.find('ul').find_all('a')[-1].get_text()
                    gtin = gtin.zfill(14)
                    break

            except:
                pass
    except:
        pass

    if response is not None and response.status_code != 200:
        print("WARNING: GTIN site status_code = {0}".format(
            response.status_code))
        print("URL: ", url, "Product description", descriptions[0])

    if gtin is None:
        IOUtil.save_log("WARNING: GTIN not found = ({0}, {1})".format(
            brand, descriptions[0]))

    return gtin
Beispiel #4
0
def main(sellerApi, category):
    # Carregando as ofertas a serem adicionadas
    offers = IOUtil.json_to_dic('to_add.json', category['id'])

    # Verifica se tem alguma oferta
    if len(offers) == 0:
        print("No offer to add.")
        return

    # Buscando e preenchendo o GTIN
    if category['gtin']:
        offers = sellerApi.fill_gtin(offers, category['id'])

    # Processando as ofertas
    print("Processing to sent...")
    offers = sellerApi.process_add(offers)

    # Uploading a atualização dos dados para o site
    print("Updating the website...")
    upload_to_site()

    # Waiting for 1 minute to upload to Google
    print("Waiting for 2 minute to upload to Google...")
    time.sleep(120)

    # Enviando para o Shopping
    insert_batch.do(offers)
    def get_offers_by_product_id(self, id, results=1):
        # TEMP: fazendo um gato com a api v1
        # http://bws.buscape.com.br/service/findOfferList/lomadee/65717751673178504d42633d/BR/?sourceId=36029361&productid=657637&format=json&encode=utf-8&sort=price&results=10
        url = "http://bws.buscape.com.br/service/findOfferList/lomadee/{0}/BR/?"
        url = url.format("65717751673178504d42633d")
        url = url + "{0}&{1}&".format("sourceId=36029361", "productid=" + str(id) )
        url = url + "format=json&encode=utf-8&sort=price&results={0}".format( results )
        
        # Pegando as ofertas
        json_data = IOUtil.request_json_from_url(url)

        # Especificações tecnicas
        specification = ""
        product = json_data['product'][0]['product']
        # print( product )
        if 'item' in product['specification'].keys():
            for s in product['specification']['item']:
                specification += s['item']['label'] + ": " + s['item']['value'][0] + "\n"
        
        names = [ product['productname'] ]
        if 'productshortname' in product: names.append( product['productshortname'] )
        offers = self._get_offers_from_json(json_data)

        for offer in offers:
            offer['names'].extend( names )
            offer['names'].sort(key=len, reverse=True)
            offer['specification'] = specification

        return offers 
 def get_products_id(self, url, category=None, page=None, max_pages=None, return_cat_id=False):
     products = []
     
     # pegando uma pagina de ofertas
     json_data = IOUtil.request_json_from_url(url.format(page=1 if page is None else page))
     products = self._get_product_ids_from_json( json_data, category, return_cat_id )
     
     # retornando somente a pagina solicitada
     if page is not None: return products
     page = 2
     
     # quantidade de paginas a ser buscadas
     total_pages = int(json_data['pagination']['totalPage'])
     if max_pages is not None: 
         total_pages = min(total_pages, max_pages)
     
     # função que busca, processa e retorna todos os produtos de uma pagina
     def process_batch(url, page):
         json_data = IOUtil.request_json_from_url( url.format(page=page) )
         return self._get_product_ids_from_json( json_data, category, return_cat_id )
         
     # Processando todas as paginas paralelamente    
     nc = mp.cpu_count()
     results = Parallel(n_jobs=nc)(delayed(process_batch) (url, i) for i in range(page, total_pages + 1))
     
     # Passando as ofertas para a lista de retorno
     for result in results:
         products.extend( result )
     
     return products
    def process_offers(self, new_offer_list, category):
        print("Processing the offers...")
       
        # Carregando as ofertas por categoria e criando os vetores
        local_offer_list = IOUtil.json_to_dic( 'offers.json', category[ 'id' ] )
        offer_to_add, offer_to_delete, offer_to_update = [], [], []
        
        print("\tStep 1/2")
        # Produtos que serão removidos
        for local_offer in tqdm( reversed(local_offer_list) ):
            # Se a oferta local não estiver mais disponivel, ela deve ser removida
            if self.find_offer(local_offer, new_offer_list) is None:
                # Verifica se a oferta já havia sido publicada
                if 'published_id' in local_offer.keys():
                    # Adicionando para deletar a publicação
                    offer_to_delete.append( local_offer['published_id'] )
                # Removendo a oferta da base local
                local_offer_list.remove( local_offer )
        
        print("\tStep 2/2")
        # Produtos a serem inseridos e atualizados
        for new_offer in tqdm( new_offer_list ):
            # Buscando o produto na lista de publicados
            published_offer = self.find_offer(new_offer, local_offer_list)
            # Não tem esse produto publicado, então ele será publicado e salvo na base local
            if published_offer is None:
                offer_to_add.append( new_offer )
                local_offer_list.append( new_offer )
            
            # Tem o produto publicado, mas o mesmo possui updates
            elif self.has_updates( published_offer, new_offer ):
                # Removendo a oferta da base local
                local_offer_list.remove( published_offer )
                # Verifica se a oferta possui o id de publicação
                if 'published_id' in published_offer.keys():
                    # Adicionando oferta na lista de atualização
                    new_offer['published_id'] = published_offer['published_id']
                    offer_to_update.append( new_offer )
                # Adicionado a oferta atualizada na base local    
                local_offer_list.append( new_offer )
                
        # Salvando as ofertas locamente
        IOUtil.dic_to_json( 'offers.json', category[ 'id' ], local_offer_list )
        print("offer_to_add: ", len(offer_to_add))

        # Retonas as listas
        return offer_to_add, offer_to_delete, offer_to_update
Beispiel #8
0
def main(sellerApi, category):    
    offers = IOUtil.json_to_dic( 'to_update.json', category['id'] )

    if len(offers) == 0:
        print( "No offer to update.")
        return

    sellerApi.process_delete( offers )
    offers = sellerApi.process_add( offers )
    
    print("Updating offers....")
    update.do( offers )
Beispiel #9
0
def get_categories_by_store(page=None, max_pages=None):
    categories = {}
    
    # pegando uma pagina de ofertas
    json_data = IOUtil.request_json_from_url(url.format(page=1 if page is None else page))
    process_json( json_data, categories )
    
    # retornando somente a pagina solicitada
    if page is not None: return categories
    page = 2
    
    # quantidade de paginas a ser buscadas
    total_pages = int(json_data['pagination']['totalPage'])
    if max_pages is not None: 
        total_pages = min(total_pages, max_pages)

    for i in tqdm(range(page, total_pages)):
        json_data = IOUtil.request_json_from_url( url.format(page=i) )
        process_json( json_data, categories )
        
    return categories
def main(category):
    offers = IOUtil.json_to_dic('to_delete.json', category['id'])

    if len(offers) == 0:
        print("No offer to delete.")
        return

    # Uploading a atualização dos dados para o site
    print("Updating the website...")
    upload_to_site()

    print("Deleting {0} offers...".format(len(offers)))
    delete_batch.do(offers)
    def find_offer(self, item, list_of_item, field="id"):
        def _p(s):
            return s.strip().lower()

        # Lisa para armazenar os resultados
        result_find = []
        
        # Faz a busca do item
        for i in list_of_item:
            s_1 = _p(i[field])
            s_2 = _p(item[field])
            if (s_1 in s_2) or (s_2 in s_1):
                result_find.append( i )
                IOUtil.save_log("Olha isso: " + s_1 + ", " + s_2)
        
        # Se mais de um produto foi encontrado, então tem alguma coisa errada
        if len(result_find) > 1:
            IOUtil.save_log("BUG: more than one product {0} with the same {1}: ".format( len(result_find), field ) + item['id'])
        
        # Retorna o produto
        if len(result_find) == 0: return None
        return result_find[0] 
    def brands_updates(self, _categories=None, to_json=True):
        url = self._get_url('category/_id/{0}')
        dic_brands = defaultdict(list)
        # categories_filter = []
        if _categories is None: _categories = categories

        # percorre todas as categorias listadas 
        print("Searching brands by category")
        for category in tqdm(_categories):
            
            # buscas as informações da categoria
            # print("\tGetting Brands of Category '" + category['name'] + "'")
            json_data = IOUtil.request_json_from_url(url.format(category[ 'id' ]))
            
            # pega somente a lista de marcas
            json_filters = json_data['categories'][0]['filters']
            
            # Check if has filters
            if len(json_filters) == 0: continue

            # categories_filter.append( category )
            json_brands = json_filters[0]['options']
            list_brands = []

            # faz uma lista com as marcas e os ids 
            for brand in json_brands:
                list_brands.append(brand)
            
            # salva essa lista em um dict
            dic_brands[str(category[ 'id' ])] = list_brands

        # print( "Categories without brands: ", count_no_brand)

        # gerar um json com todas as marcas
        dic_brands = self.remove_brands(dic_brands) 
        if to_json: IOUtil.dic_to_json('brands.json', None, dic_brands)
        # IOUtil.dic_to_json('categories_filter.json', None,  categories_filter)

        return dic_brands
    def get_offers(self, category):
        print("Searching Offers...")

        offers = []
        params = ["filters={filters}", "page={page}"]
        url = self._get_url('product/_category/{cat}', size=100, request_params=params)
        
        dict_brands = IOUtil.json_to_dic('brands.json', None )

        print("\tGetting Offers from Category: ", category['name'])
        actual_len = len(offers)
        
        brands = dict_brands[ str(category['id']) ]
        print("\tLooking in {0} Brands.".format(len(brands)))

        def __product_offers(id, brand):
            # Getting offers
            offers_temp = self.get_offers_by_product_id(id)

            # Filling Brand
            for offer in offers_temp:
                offer['brand'] = brand

            return offers_temp

        # Loop por marcas desta categorias
        n_brands = len(brands)
        for i in range( n_brands ):
            brand = brands[ i ]
            
            print("\t\tBrand[{0}/{1}]: {2}".format(i, n_brands, brand['name']))
            print( "\t\tGetting products ids...")
            
            url_cat = url.format(cat=category[ 'id' ], filters=brand['id'], page="{page}")
            products_id = self.get_products_id(url_cat, category)

            # Loop pelos produtos daquela categoria e daquela marca
            print( "\t\tGetting offers from products ids...")
            
            nc = mp.cpu_count()
            list_list_offers = Parallel(n_jobs=nc)(delayed(__product_offers) (id, brand['name']) for id in tqdm(products_id) )

            for list_offers in list_list_offers:
                offers.extend( list_offers )
            
        print("\tAumont of '{0}' found: {1}.".format( category['name'], len(offers) - actual_len))
        print("\tAll found offers: {0}.\n".format(len(offers)))

        return offers
def main(sellerApi, category):
    timer = Timer()

    timer.start()
    if category['id'] is None:
        print("Top offers...")
        offers = sellerApi.get_top_offers()
    else:
        offers = sellerApi.get_offers(category)
    print("Time elapsed to get {0} produts: ".format(len(offers)),
          timer.diff())

    timer.start()
    adds, deletes, updates = sellerApi.process_offers(offers, category)
    print("Time elapsed to process the products: ", timer.diff())

    print("Produts to add: ", len(adds))
    print("Produts to update: ", len(updates))
    print("Produts to delete: ", len(deletes))

    # Salvando os ultimos produtos enviados para o csv
    IOUtil.dic_to_json('to_add.json', category['id'], adds)
    IOUtil.dic_to_json('to_update.json', category['id'], updates)
    IOUtil.dic_to_json('to_delete.json', category['id'], deletes)
 def process_batch(url, page):
     json_data = IOUtil.request_json_from_url( url.format(page=page) )
     return self._get_product_ids_from_json( json_data, category, return_cat_id )
Beispiel #16
0
from io_util import IOUtil

song_list = IOUtil.read_song_list()
print(song_list)
Beispiel #17
0
 def save_music(music, music_name):
     path = IOUtil.get_path()
     f = open(path + '/' + music_name, "wb")
     f.write(music)
     f.close()
     print('保存完毕:' + music_name + ' ' + path)
 def process_delete(self, offers):
     for offer in offers:
         try:
             os.remove(local_folder + image_folder + "/" + str(offer['id']) + ".jpg")
         except:
             IOUtil.save_log("WARNING: " + str(offer['id']) + ".jpg" + " not found.")
from joblib import Parallel, delayed
import multiprocessing as mp
from tqdm import tqdm

from site_api import image_folder, site_url, local_folder
from util import download_file

from io_util import IOUtil

import os 

from GTIN import request_gtin
from util import str_utf8


categories = IOUtil.json_to_dic('categories.json', None)

APP_TOKEN = '1547052034937b19ab87d'
SOURCE_ID = '36029361'


class SellerAPI:
    def __init__(self):
        self.env = "api"


    def set_test_env(self):
        self.env = "sandbox-api"


    def set_production_env(self):
from io_util import IOUtil
from tqdm import tqdm
import os
from site_api import upload_to_site

DELETE_ALL = True

if DELETE_ALL:

    files = glob("./Data/*")
    print("Number of files data to clean: ", len(files))
    for file in tqdm(files):
        if 'pem' not in file and 'gtin' not in file:
            if '.json' in file:
                if 'offers' in file:
                    IOUtil.dic_to_json(file, None, [], change_path=False)
                else:
                    IOUtil.dic_to_json(file, None, {}, change_path=False)
            else:
                os.remove(file)

    files = glob("./woow/ImagesProducts/*")
    print("Number of images to remove: ", len(files))
    for file in tqdm(files):
        os.remove(file)

    list_products = list.do()
    print("Number of published offers: ", len(list_products))

    delete_list = []
    for product in list_products:
Beispiel #21
0
from seller_api import SellerAPI
from io_util import IOUtil

api = SellerAPI()
offers = api.get_top_offers()
IOUtil.dic_to_json('test.json', None, offers)