def getCampaign(self): source = BeautifulSoup(self.response.content,"html.parser") #print(source.find("div",attrs={"class":self.group})) allgroups=source.findAll("div",attrs={"class":self.group}) bimCampaigns=[] #C:/inetpub/wwwroot/marketbrosuru.com/wwwroot os.chdir(static.appPath+"img/bros-img") ocr=Ocr() if os.path.isdir('bim')==False: os.mkdir('bim') os.chdir(static.appPath+"img/bros-img/bim") for number, group in enumerate(allgroups, start=1): title=group.select("a>span")[0].text hrefs=group.select("div.row>div.imageArea>div.row>div.smallArea>a") texts=[] Path(str(number)).mkdir(parents=True, exist_ok=True) for imgx, href in enumerate(hrefs, start=1): urllib.request.urlretrieve("http://www.bim.com.tr/"+urllib.parse.quote(href.get('data-bigimg')), static.appPath+'img/bros-img/bim/'+str(number)+'/'+str(imgx)+".jpg") imgContent=ocr.read(static.appPath+'img/bros-img/bim/'+str(number)+'/'+str(imgx)+".jpg") texts.append(imgContent) openedImage= Image.open(static.appPath+'img/bros-img/bim/'+str(number)+'/'+str(imgx)+".jpg") openedImage.thumbnail([810,810]) openedImage.save(static.appPath+'img/bros-img/bim/'+str(number)+'/'+str(imgx)+"thumb.jpg") bimCampaigns.append({"title":title, "text":texts}) return bimCampaigns
def getCampaign(self, campaignNo): source = BeautifulSoup(self.response.content, "html.parser") #print(source.find("div",attrs={"class":self.group})) title = source.select("ul.brochures-actions-list>li")[ campaignNo - 1].find('a').select('div.dates')[0].get('data-date') images = source.select( "div.brochure-tabs>div.contents>div.content")[0].findAll( 'img', attrs={"class": "image0"}) ocr = Ocr() a101Campaigns = [] os.chdir(static.appPath + "img/bros-img") if os.path.isdir('a101') == False: os.mkdir('a101') os.chdir(static.appPath + "img/bros-img/a101") texts = [] for number, image in enumerate(images, start=1): Path(str(campaignNo)).mkdir(parents=True, exist_ok=True) urllib.request.urlretrieve( image.get('src'), static.appPath + 'img/bros-img/a101/' + str(campaignNo) + '/' + str(number) + ".jpg") imgContent = ocr.read(static.appPath + 'img/bros-img/a101/' + str(campaignNo) + '/' + str(number) + ".jpg") texts.append(imgContent) openedImage = Image.open(static.appPath + 'img/bros-img/a101/' + str(campaignNo) + '/' + str(number) + ".jpg") openedImage.thumbnail([810, 810]) openedImage.save(static.appPath + 'img/bros-img/a101/' + str(campaignNo) + '/' + str(number) + "thumb.jpg") a101Campaigns.append({"title": title, "text": texts}) return a101Campaigns
def getCampaign(self): os.chdir(static.appPath + "img/bros-img") if os.path.isdir('sok') == False: os.mkdir('sok') os.chdir(static.appPath + "img/bros-img/sok") haftaUrl = "https://kurumsal.sokmarket.com.tr/firsatlar/carsamba/" haftaSonuUrl = "https://kurumsal.sokmarket.com.tr/firsatlar/hafta-sonu/" Path('haftanin-firsatlari').mkdir(parents=True, exist_ok=True) Path('haftasonu-firsatlari').mkdir(parents=True, exist_ok=True) Path('temp').mkdir(parents=True, exist_ok=True) urllib.request.urlretrieve(haftaUrl, 'temp/hafta.pdf') urllib.request.urlretrieve(haftaSonuUrl, 'temp/haftaSonu.pdf') images = convert_from_path( 'temp/hafta.pdf', poppler_path='C:\\Program Files\\poppler\\Library\\bin') ocr = Ocr() texts = [] for i, image in enumerate(images, start=1): image.save(static.appPath + 'img/bros-img/sok/haftanin-firsatlari/' + str(i) + '.jpg') haftaText = ocr.read(static.appPath + 'img/bros-img/sok/haftanin-firsatlari/' + str(i) + '.jpg') openedImage = Image.open(static.appPath + 'img/bros-img/sok/haftanin-firsatlari/' + str(i) + '.jpg') openedImage.thumbnail(size=[1010, 1010]) openedImage.save(static.appPath + 'img/bros-img/sok/haftanin-firsatlari/' + str(i) + '.jpg') openedImage.thumbnail(size=[810, 810]) openedImage.save(static.appPath + 'img/bros-img/sok/haftanin-firsatlari/' + str(i) + 'thumb.jpg') images = convert_from_path( 'temp/haftaSonu.pdf', poppler_path='C:\\Program Files\\poppler\\Library\\bin') for i, image in enumerate(images, start=1): image.save(static.appPath + 'img/bros-img/sok/haftasonu-firsatlari/' + str(i) + '.jpg') haftaSonuText = ocr.read(static.appPath + 'img/bros-img/sok/haftasonu-firsatlari/' + str(i) + '.jpg') openedImage = Image.open(static.appPath + 'img/bros-img/sok/haftasonu-firsatlari/' + str(i) + '.jpg') openedImage.thumbnail(size=[1010, 1010]) openedImage.save(static.appPath + 'img/bros-img/sok/haftasonu-firsatlari/' + str(i) + '.jpg') openedImage.thumbnail(size=[810, 810]) openedImage.save(static.appPath + 'img/bros-img/sok/haftasonu-firsatlari/' + str(i) + 'thumb.jpg') return { 'Brand': 'ŞOK', 'Code': 'sok', 'Campaigns': [{ 'title': 'Haftanın Fırsatları', 'contents': 'haftanin-firsatlari', 'texts': haftaText }, { 'title': 'Haftasonu Fırsatları', 'contents': 'haftasonu-firsatlari', 'texts': haftaSonuText }] }