def get_products(cat, subcat, url): print('Downloading from %s' % url) rez = requests.get(url, verify=False) soup = BeautifulSoup(rez.text, 'html.parser') for item in soup.findAll('div', {'class': 'company_pic'}): img = item.find('img') in_stop = False # отсеиваем ненужное for w in STOP_WORDS: if img.get('title').find(w) > -1: in_stop = True if img.get('src').find('no_image') > -1: in_stop = True if not in_stop: print(img.get('title')) pr = Product() pr.category = cat pr.name = img.get('title') pr.subcategory = subcat img_url = 'https://gastronoma.net/%s' % img.get('src') img_response = requests.get(img_url, stream=True, verify=False) # сохраняем файл with open('tmp.png', 'wb') as out_file: shutil.copyfileobj(img_response.raw, out_file) # читаем временный файл и загружаем его программно в модель with open('%s/tmp.png' % BASE_DIR, 'rb') as img_file: pr.image.save('cat.png', File(img_file), save=True) pr.save()
def handle(self, *args, **options): print('Clearing DB') Category.objects.all().delete() Product.objects.all().delete() print('Start importing from excel %s' % DATA_DIR) wb = load_workbook(DATA_DIR + '/price.xlsx') sheet = wb.get_sheet_by_name(wb.get_sheet_names()[0]) cat = None for cnt in range(1, sheet.max_row, 1): item = sheet.cell(row=cnt, column=3).value id = sheet.cell(row=cnt, column=2).value if id == None: print('Create a new category') cat = Category() cat.name = item cat.save() else: print('Create a new good') if cat: p = Product() p.name = item p.category = cat p.save()
def post(self, request, format=None): cat = Category.objects.get(pk=request.data.get('cat')) subcat = SubCategory.objects.get(pk=request.data.get('subcat')) p = Product() p.category = cat p.user = request.user.userprofile p.subcategory = subcat p.name = request.data.get('name') if "image" in request.data: p.image = (request.data['image']) if "image_base64" in request.data: try: format, imgstr = request.data.get('image_base64').split( ';base64,') ext = format.split('/')[-1] data = ContentFile(base64.b64decode(imgstr)) file_name = '%s_user.%s' % (p.id, ext) p.image.save(file_name, data, save=True) except: pass p.save() return Response(ProductSerializer(p).data)
def get_product_description(link, category): URL = 'https://tainabox.com.ua' print('Start importing from %s' % URL + link) # rez = requests.get(URL + link, verify=False) rez = requests.get(URL + link) soup = BeautifulSoup(rez.text, 'html.parser') for desc in soup.findAll('div', {'class': 'product__big-item'}): image = desc.find('div', { 'class': 'product__big-item_right' }).find('img') consist = desc.find('div', {'class': 'product__item__composition__value'}) price = desc.find('div', {'class': 'to-order__value'}) name = desc.find('div', {'product__big-item__name'}) in_stop = False for w in STOP_WORDS: if name.text.find(w) > -1: in_stop = True if consist.text.find(w) > -1: in_stop = True if not in_stop: p = Product() p.name = re.sub('\n', '', name.text) p.price = re.split(r'\n', price.text)[0] p.consist = consist.text p.category = category img_url = URL + image['src'] img_temp = NamedTemporaryFile(delete=True) req = urllib.request.Request( img_url, data=None, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36' }) img_temp.write(urllib.request.urlopen(req).read()) img_temp.flush() img_res = re.split(r'\.', image['src']) p.image.save("image_.{}".format(img_res[-1]), File(img_temp)) p.save()
def get_products(cat, subcat, url): try: shutil.rmtree('%s/media/category/subcategory' % BASE_DIR) # очищает католог от картинок except: pass print('Downloading fm %s' % url) requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning) rez = requests.get(url, verify=False) soup = BeautifulSoup(rez.text, 'html.parser') for item in soup.findAll('div', {'class': 'company_pic'}): img = item.find('img') in_stop = False # отсеиваем ненужное for w in STOP_WORDS: if img.get('title').find(w) > -1: in_stop = True if img.get('src').find('no_image') > -1: in_stop = True if not in_stop: print(img.get('title')) pr = Product() pr.category = cat pr.name = img.get('title') pr.subcategory = subcat img_url = 'https://gastronoma.net/%s' % img.get('src') requests.packages.urllib3.disable_warnings( category=InsecureRequestWarning) img_response = requests.get(img_url, stream=True, verify=False) # сохраняем временный файл with open( 'tmp.png', 'wb' ) as out_file: # создается временный файл 'b' - binary, для записи 'w'-write shutil.copyfileobj(img_response.raw, out_file) # читаем временный файл и загружаем его программно в модель with open('%s/tmp.png' % BASE_DIR, 'rb') as img_file: pr.image.save('product.png', File(img_file), save=True) pr.save()
def handle(self, *args, **options): print('Clear DB') Category.objects.all().delete() Product.objects.all().delete() print('Start import from excel %s' % DATA_DIR) wb = load_workbook((DATA_DIR + '\\price.xlsx')) worksheet = wb.get_sheet_by_name(wb.get_sheet_names()[0]) category = None for cnt in range(1, worksheet.max_row + 1): item = worksheet.cell(row=cnt, column=5).value cat = worksheet.cell(row=cnt, column=1).value if item == None: category = Category() category.name = cat category.save() print('Create category') else: if category: product = Product() product.name = item product.category = category product.save() print('Create item')