def __init__(self, input_file, output_file): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.input_file = input_file self.output_file = output_file
class GoogleFinanceScrapper: isFinished = False def __init__(self, filename): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.filename = filename self.url = 'https://www.google.com/finance?' self.main_url = 'https://www.google.com' self.csvWriter = Csv('google_finance.csv') csvDataHeader = ['Ticker Symbol', 'Quarter End', 'Revenue', 'Total Revenue', 'Date of Scrape'] self.csvWriter.writeCsvRow(csvDataHeader) def run(self): self.scrapData() self.csvWriter.closeWriter() def scrapData(self): try: file = open(self.filename, 'rb') for line in file.readlines(): if self.isFinished: return line = self.regex.replaceData('\r+', '', line) line = self.regex.reduceNewLine(line) line = self.regex.reduceBlankSpace(line) line = line.strip() params = urllib.urlencode({'q': line}) url = self.url + params self.scrapBykeyword(url, line) except Exception, x: print x self.logger.error('Error: ' + x.message)
class TopsyScrapper: isFinished = False def __init__(self, filename): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.filename = filename self.url = 'http://topsy.com/s?' self.csvWriter = Csv('topsy.csv') csvDataHeader = ['Keyword', 'Tweets in last 30 days', 'Topsy Sentiment Score', ' Date of scrape'] self.csvWriter.writeCsvRow(csvDataHeader) def run(self): self.scrapData() self.csvWriter.closeWriter() def scrapData(self): try: file = open(self.filename, 'rb') for line in file.readlines(): if self.isFinished: return line = self.regex.replaceData('\r+', '', line) line = self.regex.reduceNewLine(line) line = self.regex.reduceBlankSpace(line) line = line.strip() if len(line) > 0: params = urllib.urlencode({'q': line, 'window': 'm', 'type': 'tweet'}) url = self.url + params self.scrapBrowserData(url, line) except Exception, x: print x
def __init__(self, spider, memberList, subject, message): QThread.__init__(self) # self.spider = Spider() self.spider = spider self.regex = Regex() self.memberList = memberList self.subject = unicode(subject) self.message = unicode(message)
class NisbetProduct(QtCore.QThread): scrapProductData = QtCore.pyqtSignal(object) stopThread = QtCore.pyqtSignal(int) def __init__(self): QtCore.QThread.__init__(self) self.isExiting = False self.totalProducts = 0 self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow('nisbets.csv', 0) self.csvWriter = Csv('nisbets.csv') self.mainUrl = 'http://www.nisbets.co.uk' csvHeaderList = ['URL', 'Product Code', 'Product Technical Specifications', 'Product Name', 'Brand', 'Product Price', 'Product Short Description', 'Product Long Description', 'Image File Name', 'User Manual File Name', 'Exploded View File Name', 'Spares Code', 'Accessories', 'Product Status' 'Category1', 'Category2', 'Category3', 'Category4'] if 'URL' not in self.dupCsvRows: self.csvWriter.writeCsvRow(csvHeaderList) self.dupCsvRows.append(csvHeaderList[0]) self.utils = Utils() def run(self): self.scrapData() def stop(self): self.isExiting = True def scrapData(self): if self.isExiting: return self.scrapProductData.emit('<font color=green><b>Main URL: </b>%s</font>' % self.mainUrl) self.logger.debug('===== URL [' + self.mainUrl + '] =====') data = self.spider.fetchData(self.mainUrl) if data and len(str(data).strip()) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) category1Chunk = self.regex.getAllSearchedData('(?i)<li id="li-id-\d+">(.*?)</ul> </li>', data) if category1Chunk and len(str(category1Chunk).strip()) > 0: i = 0 for category1Data in category1Chunk: category1 = self.regex.getSearchedData('(?i)<a href="[^"]*">([^<]*)</a>', category1Data) category2Chunk = self.regex.getAllSearchedData('(?i)<li><a href="([^"]*)">([^<]*)</a>', category1Data) if category2Chunk and len(str(category2Chunk).strip()) > 0: for category2Data in category2Chunk: try: self.scrapCategory2Data(self.mainUrl + category2Data[0], category1, category2Data[1]) except Exception, x: self.logger.error(x) self.scrapProductData.emit('<font color=red><b>Finish Scraping Product data from %s</b></font>' % self.mainUrl)
def downloadFile(self, url, downloadPath, proxyHandler=None, notifier=None, retry=0): try: if os.path.exists(downloadPath) and os.path.getsize(downloadPath): if notifier is not None: notifier.emit('<font color=red><b>Image file already exists. Skip downloading file.</b></font>') return notifier.emit(('<font color=blue><b>Image URL: %s</b></font>' % url)) regex = Regex() opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0)) opener.addheaders = [ config.USER_AGENT, ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'), ('Connection', 'keep-alive')] if proxyHandler is not None: opener.add_handler(proxyHandler) resp = urllib2.urlopen(url, timeout=30) contentLength = resp.info()['Content-Length'] contentLength = regex.getSearchedData('(?i)^(\d+)', contentLength) totalSize = float(contentLength) directory = os.path.dirname(downloadPath) if not os.path.exists(directory): os.makedirs(directory) dl_file = open(downloadPath, 'wb') currentSize = 0 CHUNK_SIZE = 32768 while True: data = resp.read(CHUNK_SIZE) if not data: break currentSize += len(data) dl_file.write(data) msg = '=====> ' + str(round(float(currentSize * 100) / totalSize, 2)) + \ '% of ' + str(totalSize / (1024)) + ' KB' print('=====> ' + str(round(float(currentSize * 100) / totalSize, 2)) + '% of ' + str( totalSize) + ' bytes') if notifier is not None: notifier.emit('<font color=blue><b>%s</b></font>' % msg) if currentSize >= totalSize: dl_file.close() return True except Exception, x: print x notifier.emit(('<font color=red><b>Error Download Image URL: %s</b></font>' % url)) if retry < 1: notifier.emit('<font color=black><b>Will retry after 5 seconds.</b></font>') time.sleep(5) notifier.emit('<font color=black><b>Retry...</b></font>') self.downloadFile(url, downloadPath, proxyHandler, notifier, retry + 1) else: notifier.emit('<font color=red><b>Failed to download after maximum retry.</b></font>')
def __init__(self): QObject.__init__(self) self.regex = Regex() self.title = '' self.webView = QWebView() self.webView.settings().setAttribute(QWebSettings.AutoLoadImages, True) self.webView.settings().setAttribute(QWebSettings.JavascriptEnabled, True) self.webView.settings().setAttribute(QWebSettings.PluginsEnabled, True) self.webView.settings().setAttribute(QWebSettings.DeveloperExtrasEnabled, True) self.pdfPrinter = QPrinter() self.webView.loadFinished.connect(self.convertToPdf)
def __init__(self, filename): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.filename = filename self.url = 'http://topsy.com/s?' self.csvWriter = Csv('topsy.csv') csvDataHeader = [ 'Keyword', 'Tweets in last 30 days', 'Topsy Sentiment Score', ' Date of scrape' ] self.csvWriter.writeCsvRow(csvDataHeader)
def __init__(self): self.logger = LogManager(__name__) self.spider = Spider() self.browser = BrowserUtil() self.regex = Regex() self.utils = Utils() self.csvHeader = [ 'Category', 'Sub Category 1', 'Sub Category 2', 'Product Code', 'Product Name', 'Product ShortName', 'Product Description', 'List Price', 'Vendor Price', 'Availability', 'Power', 'Size', 'KW', 'Weight(kg)', 'Other Tech', 'Pdf File', 'Image File' ] self.totalProducts = 0
def __init__(self, filename): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.filename = filename self.url = 'https://www.google.com/finance?' self.main_url = 'https://www.google.com' self.csvWriter = Csv('google_finance.csv') csvDataHeader = [ 'Ticker Symbol', 'Quarter End', 'Revenue', 'Total Revenue', 'Date of Scrape' ] self.csvWriter.writeCsvRow(csvDataHeader)
class SaraivaScrapper(QThread): notifySaraiva = pyqtSignal(object) def __init__(self, urlList, category, htmlTag, replaceTag): QThread.__init__(self) self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.urlList = urlList self.category = category self.htmlTag = self.regex.replaceData('\r+', '', htmlTag) self.htmlTag = self.regex.replaceData('\n+', ' ', self.htmlTag) self.htmlTag = self.regex.replaceData('\s+', ' ', self.htmlTag) self.htmlTag = self.regex.replaceData(r'\"+', '\"', self.htmlTag) self.replaceTag = replaceTag self.csvWriter = Csv(category + '.csv') csvDataHeader = ['Link', 'Name', 'Subtitle', 'Price', 'Synopsis and Characteristics', 'Picture'] self.csvWriter.writeCsvRow(csvDataHeader) self.mainUrl = 'http://busca.livrariasaraiva.com.br' self.scrapUrl = None self.dbHelper = DbHelper('saraiva.db') self.dbHelper.createTable(category) self.total = self.dbHelper.getTotalProduct(category) def run(self, retry=0): try: if self.urlList is not None and len(self.urlList): for url in self.urlList: if len(url) > 0: url = self.regex.replaceData('(?i)\r', '', url) url = self.regex.replaceData('(?i)\n', '', url) self.notifySaraiva.emit('<font color=green><b>Saraiva Main URL: %s</b></font>' % url) paginationUrl, self.maxRecords = self.reformatUrl(url) self.notifySaraiva.emit( '<font color=black><b>Total Records: %s</b></font>' % str(self.maxRecords)) print 'Max records: ', self.maxRecords print 'URL: ' + str(paginationUrl) sortList = ['&isort=globalpop', '&isort=best', '&isort=title', '&isort=title+rev', '&isort=price+rev', '&isort=price', '&isort=date+rev'] for sort in sortList: self.scrapResults(paginationUrl, sort) self.notifySaraiva.emit('<font color=red><b>Saraiva Data Scraping finished.</b></font>') except Exception, x: print x.message self.logger.error('Exception at run: ', x.message) if retry < 5: self.run(retry + 1)
class PaodeacucarScrapper(QThread): notifyPaode = pyqtSignal(object) def __init__(self): QThread.__init__(self) self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.mainUrl = 'http://www.paodeacucar.com.br/' self.url = 'http://www.paodeacucar.com.br/' dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow('paodeacucar.csv', 4) self.csvWriter = Csv('paodeacucar.csv') csvDataHeader = ['SKU', 'Category', 'Subcategory', 'Name', 'URL', 'URL Image', 'Details', 'Nutrients Table html code', 'Price from, 28/abr/14', '28/abr/14'] if 'URL' not in self.dupCsvRows: self.dupCsvRows.append(csvDataHeader) self.csvWriter.writeCsvRow(csvDataHeader) def run(self): self.scrapData() def scrapData(self): try: print 'Main URL: ', self.url self.notifyPaode.emit(('<font color=green><b>Main URL: %s</b></font>' % self.url)) data = self.spider.fetchData(self.url) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) soup = BeautifulSoup(data) categories = soup.find('nav', class_='items-wrapper').find_all('li', class_=re.compile('\s*item\s*')) print 'Total Categories: ', len(categories) self.notifyPaode.emit(('<font color=black><b>Total Categories: %s</b></font>' % str(len(categories)))) for category in categories: if category.a is not None: submenu_target = self.regex.replaceData('#', '', category.a.get('data-target')) sub_categories = soup.find('ul', id=submenu_target).find_all('li', class_='item') print 'Total Sub Categories: ', len(sub_categories) self.notifyPaode.emit(('<font color=black><b>Total Subcategories: %s</b></font>' % str(len(sub_categories)))) for sub_category in sub_categories: sub_category_label = sub_category.find('span', class_='label').text sub_category_url = sub_category.a.get('href') if sub_category.a is not None else 'N/A' self.scrapItems(sub_category_url, category.text, sub_category_label) except Exception, x: self.logger.error(x.message) print x
def __init__(self, spider, url, pageRange=None): QThread.__init__(self) # self.spider = Spider() self.spider = spider self.regex = Regex() self.url = url self.startPage = None self.endPage = None if self.regex.isFoundPattern('(?i)(\d+)-(\d+)', str(pageRange).strip()): pageRangeFormat = self.regex.getSearchedDataGroups('(?i)(\d+)-(\d+)', str(pageRange).strip()) self.startPage = int(pageRangeFormat.group(1)) self.endPage = int(pageRangeFormat.group(2)) elif self.regex.isFoundPattern('(?i)(\d+)', str(pageRange).strip()): pageRangeFormat = self.regex.getSearchedDataGroups('(?i)(\d+)', str(pageRange).strip()) self.startPage = int(pageRangeFormat.group(1)) self.endPage = self.startPage
def __init__(self): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.mainUrl = 'http://www.ebags.com' self.url = 'http://www.ebags.com/brands'
def __init__(self): QtCore.QThread.__init__(self) self.isExiting = False self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow("nisbets.csv", 0) self.csvWriter = Csv("nisbets.csv") self.mainUrl = "http://www.nisbets.co.uk" csvHeaderList = [ "URL", "Product Code", "Product Technical Specifications", "Product Name", "Brand", "Product Price", "Product Short Description", "Product Long Description", "Image File Name", "User Manual File Name", "Exploded View File Name", "Spares Code", "Accessories", "Product Status" "Category1", "Category2", "Category3", "Category4", ] if "URL" not in self.dupCsvRows: self.csvWriter.writeCsvRow(csvHeaderList) self.dupCsvRows.append(csvHeaderList[0]) self.utils = Utils()
def __init__(self): QThread.__init__(self) self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.mainUrl = 'http://www.paodeacucar.com.br/' self.url = 'http://www.paodeacucar.com.br/' dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow('paodeacucar.csv', 4) self.csvWriter = Csv('paodeacucar.csv') csvDataHeader = ['SKU', 'Category', 'Subcategory', 'Name', 'URL', 'URL Image', 'Details', 'Nutrients Table html code', 'Price from, 28/abr/14', '28/abr/14'] if 'URL' not in self.dupCsvRows: self.dupCsvRows.append(csvDataHeader) self.csvWriter.writeCsvRow(csvDataHeader)
class CsBrands(QThread): notifyBrand = pyqtSignal(object) def __init__(self): QThread.__init__(self) self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow("cs_Brands.csv") self.csvWriter = Csv("cs_Brands.csv") self.mainUrl = "http://www.cs-catering-equipment.co.uk/brands" self.isExiting = False headerData = [ "URL", "Parent Category", "Brand Category", "Brand Description", "Image File", "Product Codes in this category", ] if headerData not in self.dupCsvRows: self.csvWriter.writeCsvRow(headerData) def run(self): self.scrapBrands() self.notifyBrand.emit("<font color=red><b>Finished Scraping All Brands.</b></font>") def scrapBrands(self): self.notifyBrand.emit("<font color=green><b>Main URL: %s<b></font>" % self.mainUrl) self.notifyBrand.emit("<b>Try To scrap All Brands.<b>") data = self.spider.fetchData(self.mainUrl) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) brandChunks = self.regex.getAllSearchedData('(?i)<div class="man-group man-group-[a-z]">(.*?)</div>', data) if brandChunks and len(brandChunks) > 0: for brandChunk in brandChunks: brands = self.regex.getAllSearchedData('(?i)<a href="([^"]*)"[^>]*?>([^<]*)</a>', brandChunk) self.notifyBrand.emit("<b>Total Brands Found: %s<b>" % str(len(brands))) if brands and len(brands) > 0: for brand in brands: try: self.scrapBrandInfo(brand[0], "Shop By Brand", brand[1]) except Exception, x: self.logger.error(x)
class CsTest(QThread): notifyProduct = pyqtSignal(object) def __init__(self): QThread.__init__(self) self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows0 = dupCsvReader.readCsvRow('cs_product.csv', 0) self.dupCsvRows = dupCsvReader.readCsvRow('cs_product.csv', 1) self.csvWriter = Csv('cs_product.csv') # self.mainUrl = 'http://www.cs-catering-equipment.co.uk/' self.mainUrl = 'http://www.cs-catering-equipment.co.uk/brands' self.utils = Utils() if 'Product Code' not in self.dupCsvRows: self.csvWriter.writeCsvRow( ['URL', 'Product Code', 'Product Name', 'Manufacturer', 'List Price', 'Product Price', 'Discount', 'Product Short Description', 'Product Long Description', 'Product Technical Specifications', 'Warranty' , 'Delivery', 'Product Image', 'Category 1', 'Category 2', 'Category 3', 'Category 4', 'Brand Image']) self.totalProducts = len(self.dupCsvRows) def run(self): self.scrapBrands() self.notifyProduct.emit('<font color=red><b>Finished Scraping All Brands.</b></font>') def scrapBrands(self): self.notifyProduct.emit('<font color=green><b>Main URL: %s<b></font>' % self.mainUrl) self.notifyProduct.emit('<b>Try To scrap All Brands.<b>') data = self.spider.fetchData(self.mainUrl) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) brandChunks = self.regex.getAllSearchedData('(?i)<div class="man-group man-group-[a-z]">(.*?)</div>', data) if brandChunks and len(brandChunks) > 0: for brandChunk in brandChunks: brands = self.regex.getAllSearchedData('(?i)<a href="([^"]*)"[^>]*?>([^<]*)</a>', brandChunk) self.notifyProduct.emit('<b>Total Brands Found: %s<b>' % str(len(brands))) if brands and len(brands) > 0: for brand in brands: try: self.scrapBrandInfo(brand[0], 'Shop By Brand', brand[1]) except Exception, x: self.logger.error(x)
def __init__(self): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.csvWriter = Csv('nisbets.csv') self.mainUrl = 'http://www.nisbets.co.uk' csvHeaderList = ['Category', 'Product Image Url', 'Product Code', 'Product Name', 'Price'] self.csvWriter.writeCsvRow(csvHeaderList)
def __init__(self, parent=None): super(MainForm, self).__init__(parent) self.regex = Regex() self.alreadyClickedA = False self.alreadyClickedB = False self.fileDir = None self.fileDirB = None self.fileName = None self.fileNameB = None self.totalUrlA = 0 self.totalUrlB = 0 self.currentUrlA = 0 self.currentUrlB = 0 self.pdfCounter = 1 self.pdfCounterB = 1 self.typeName = 'B' self.setupUI()
class AmazonScrapper(): def __init__(self, url): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.url = url self.base_product_url = 'http://www.amazon.com/dp/' self.base_image_url = 'http://ecx.images-amazon.com/images/I/' self.csvWriter = Csv('amazon.csv') csvDataHeader = ['URL', 'HTML Path', 'Image URLS'] self.csvWriter.writeCsvRow(csvDataHeader) def scrapData(self): try: host = ('Host', 'www.amazon.com') data = self.spider.fetchData(self.url, host=host) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) searchParams = self.regex.getSearchedData('(?i)var searchParams = {([^\}]*)}', data) searchParams = searchParams.split(',') seller = '' marketPlaceId = '' useMYI = '' for searchParam in searchParams: searchParam = self.regex.reduceBlankSpace(searchParam) searchParam = self.regex.replaceData('\'', '', searchParam) if searchParam.startswith('seller'): seller = searchParam.split(':')[1].strip() seller = seller.decode('string-escape') if searchParam.startswith('marketplaceID'): marketPlaceId = searchParam.split(':')[1].strip() marketPlaceId = marketPlaceId.decode('string-escape') if searchParam.startswith('useMYI'): useMYI = searchParam.split(':')[1].strip() useMYI = useMYI.decode('string-escape') params = {'seller': seller, 'marketPlaceId': marketPlaceId, 'useMYI': useMYI} ajax_url = 'http://www.amazon.com/gp/aag/ajax/productWidget.html' self.scrapAjaxPage(ajax_url, params, host) except Exception, x: print x
def __init__(self, urllist): QThread.__init__(self) self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() print urllist self.urllist = urllist self.csv = Csv('scrapper.csv')
class AmazonScrapper(QThread): notifyAmazon = pyqtSignal(object) def __init__(self, urlList, category): QThread.__init__(self) self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.urlList = urlList self.category = category dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow(category + '.csv') self.csvWriter = Csv(category + '.csv') csvDataHeader = ['SKU', 'Title', 'Sub Title', 'Price', 'Shipping Weight', 'Image URL'] if csvDataHeader not in self.dupCsvRows: self.dupCsvRows.append(csvDataHeader) self.csvWriter.writeCsvRow(csvDataHeader) self.mainUrl = 'http://www.amazon.com' self.scrapUrl = None self.dbHelper = DbHelper('amazon.db') self.dbHelper.createTable(category) self.total = self.dbHelper.getTotalProduct(category) def run(self, retry=0): try: # self.scrapProductDetail( # 'http://www.amazon.com/Casio-MRW-S300H-8BVCF-Solar-Powered-Analog/dp/B00ELALKH2/ref=sr_1_544/184-7248556-2619812?s=watches&ie=UTF8&qid=1397580509&sr=1-544') # return if self.urlList is not None and len(self.urlList): for url in self.urlList: if len(url) > 0: url = self.regex.replaceData('(?i)\r', '', url) url = self.regex.replaceData('(?i)\n', '', url) self.notifyAmazon.emit('<font color=green><b>Amazon Main URL: %s</b></font>' % url) imUrl = None retry = 0 while imUrl is None and retry < 4: imUrl = self.reformatUrl(url) retry += 1 if imUrl is None: imUrl = url self.total = 0 print 'URL: ' + str(imUrl) sortList = ['relevance-fs-browse-rank', 'price', '-price', 'reviewrank_authority', 'date-desc-rank'] for sort in sortList: self.scrapReformatData(imUrl, sort) self.notifyAmazon.emit( '<font color=red><b>Finish data for Amazon Main URL: %s</b></font><br /><br />' % url) self.notifyAmazon.emit('<font color=red><b>Amazon Data Scraping finished.</b></font>') except Exception, x: print x.message self.logger.error('Exception at run: ', x.message) if retry < 5: self.run(retry + 1)
def __init__(self): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.loginUrl = 'http://www.v4.penta-transaction.com/telematica_v4/login_ing.jsp' self.username = '******' self.password = '******' self.collectionUrl = 'http://www.trggroup.net/victorinox/index.php?p=124' self.mainUrl = 'http://www.penta-transaction.com'
def __init__(self): self.logger = LogManager(__name__) self.spider = Spider() self.browser = BrowserUtil() self.regex = Regex() self.utils = Utils() self.csvHeader = ['Category', 'Sub Category 1', 'Sub Category 2', 'Product Code', 'Product Name', 'Product ShortName', 'Product Description', 'List Price', 'Vendor Price', 'Availability', 'Power', 'Size', 'KW', 'Weight(kg)', 'Other Tech', 'Pdf File', 'Image File'] self.totalProducts = 0
def __init__(self, filename): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.filename = filename self.url = 'http://topsy.com/s?' self.csvWriter = Csv('topsy.csv') csvDataHeader = ['Keyword', 'Tweets in last 30 days', 'Topsy Sentiment Score', ' Date of scrape'] self.csvWriter.writeCsvRow(csvDataHeader)
class PentaTransaction(): def __init__(self): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.loginUrl = 'http://www.v4.penta-transaction.com/telematica_v4/login_ing.jsp' self.username = '******' self.password = '******' self.collectionUrl = 'http://www.trggroup.net/victorinox/index.php?p=124' self.mainUrl = 'http://www.penta-transaction.com' def scrapData(self): self.onLogin() data = self.spider.fetchData(self.mainUrl) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) print data def onLogin(self): ''' Credentials are: action login_access i p password sdfsdf username sdfsdf ''' try: print self.loginUrl loginCredentials = {'username': self.username, 'password': self.password} loginData = self.spider.login(self.loginUrl, loginCredentials) if loginData and len(loginData) > 0: loginData = self.regex.reduceNewLine(loginData) loginData = self.regex.reduceBlankSpace(loginData) print 'Login: '******'There was an error when login' return False
def __init__(self, filename): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.filename = filename self.url = 'https://www.google.com/finance?' self.main_url = 'https://www.google.com' self.csvWriter = Csv('google_finance.csv') csvDataHeader = ['Ticker Symbol', 'Quarter End', 'Revenue', 'Total Revenue', 'Date of Scrape'] self.csvWriter.writeCsvRow(csvDataHeader)
def __init__(self, url): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.url = url self.base_product_url = 'http://www.amazon.com/dp/' self.base_image_url = 'http://ecx.images-amazon.com/images/I/' self.csvWriter = Csv('amazon.csv') csvDataHeader = ['URL', 'HTML Path', 'Image URLS'] self.csvWriter.writeCsvRow(csvDataHeader)
def __init__(self): QThread.__init__(self) self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.mainUrl = 'http://www.bertos.com' self.utils = Utils() self.csvHeader = ['Home Category', 'Sub Category', 'Category Description', 'Category Image', 'Code', 'Product Code', 'Product Name', 'Product Description', 'Product Image File', 'Technical Sheet File', 'Exploded View File'] self.totalProducts = 0
def downloadFile(self, url, downloadPath, proxyHandler=None): try: regex = Regex() opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0)) opener.addheaders = [ config.USER_AGENT, ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' ), ('Connection', 'keep-alive') ] if proxyHandler is not None: opener.add_handler(proxyHandler) resp = urllib2.urlopen(url, timeout=30) contentLength = resp.info()['Content-Length'] contentLength = regex.getSearchedData('(?i)^(\d+)', contentLength) totalSize = float(contentLength) directory = os.path.dirname(downloadPath) if not os.path.exists(directory): os.makedirs(directory) dl_file = open(downloadPath, 'wb') currentSize = 0 CHUNK_SIZE = 32768 while True: data = resp.read(CHUNK_SIZE) if not data: break currentSize += len(data) dl_file.write(data) print('============> ' + str(round(float(currentSize * 100) / totalSize, 2)) + '% of ' + str(totalSize) + ' bytes') if currentSize >= totalSize: dl_file.close() return True except Exception, x: print x
class TopsyScrapper: isFinished = False def __init__(self, filename): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.filename = filename self.url = 'http://topsy.com/s?' self.csvWriter = Csv('topsy.csv') csvDataHeader = [ 'Keyword', 'Tweets in last 30 days', 'Topsy Sentiment Score', ' Date of scrape' ] self.csvWriter.writeCsvRow(csvDataHeader) def run(self): self.scrapData() self.csvWriter.closeWriter() def scrapData(self): try: file = open(self.filename, 'rb') for line in file.readlines(): if self.isFinished: return line = self.regex.replaceData('\r+', '', line) line = self.regex.reduceNewLine(line) line = self.regex.reduceBlankSpace(line) line = line.strip() if len(line) > 0: params = urllib.urlencode({ 'q': line, 'window': 'm', 'type': 'tweet' }) url = self.url + params self.scrapBrowserData(url, line) except Exception, x: print x
def __init__(self): QThread.__init__(self) self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow('cs_cat.csv') self.csvWriter = Csv('cs_cat.csv') dupFilterCsvReader = Csv() self.dupFilterCsvRows = dupFilterCsvReader.readCsvRow('filter_cat' + '.csv') self.csvW = Csv('filter_cat' + '.csv') self.mainUrl = 'http://www.cs-catering-equipment.co.uk/' self.totalCategory = 0
def __init__(self): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.main_url = 'http://www.walgreens.com' self.url = 'http://www.walgreens.com/store/catalog/shopLanding' self.sitemap_xml = 'http://www.walgreens.com/sitemap.xml' dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow('walgreens.csv') self.csvWriter = Csv('walgreens.csv') csvDataHeader = ['Product Name', 'Price', 'Description', 'Shipping', 'Ingredients', 'Image'] if csvDataHeader not in self.dupCsvRows: self.csvWriter.writeCsvRow(csvDataHeader)
def __init__(self): QtCore.QThread.__init__(self) self.isExiting = False self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow('nisbetCat.csv') self.csvWriter = Csv('nisbetCat.csv') self.mainUrl = 'http://www.nisbets.co.uk' csvHeaderList = ['Parent Category', 'Category Name', 'Category Description'] if csvHeaderList not in self.dupCsvRows: self.csvWriter.writeCsvRow(csvHeaderList) self.dupCsvRows.append(csvHeaderList)
def downloadFile(self, url, downloadPath, proxyHandler=None): try: regex = Regex() opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0)) opener.addheaders = [ config.USER_AGENT, ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'), ('Connection', 'keep-alive')] if proxyHandler is not None: opener.add_handler(proxyHandler) resp = urllib2.urlopen(url, timeout=30) contentLength = resp.info()['Content-Length'] contentLength = regex.getSearchedData('(?i)^(\d+)', contentLength) totalSize = float(contentLength) directory = os.path.dirname(downloadPath) if not os.path.exists(directory): os.makedirs(directory) dl_file = open(downloadPath, 'wb') currentSize = 0 CHUNK_SIZE = 32768 while True: data = resp.read(CHUNK_SIZE) if not data: break currentSize += len(data) dl_file.write(data) print('============> ' + str(round(float(currentSize * 100) / totalSize, 2)) + '% of ' + str( totalSize) + ' bytes') if currentSize >= totalSize: dl_file.close() return True except Exception, x: print x
class GoogleFinanceScrapper: isFinished = False def __init__(self, filename): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.filename = filename self.url = 'https://www.google.com/finance?' self.main_url = 'https://www.google.com' self.csvWriter = Csv('google_finance.csv') csvDataHeader = [ 'Ticker Symbol', 'Quarter End', 'Revenue', 'Total Revenue', 'Date of Scrape' ] self.csvWriter.writeCsvRow(csvDataHeader) def run(self): self.scrapData() self.csvWriter.closeWriter() def scrapData(self): try: file = open(self.filename, 'rb') for line in file.readlines(): if self.isFinished: return line = self.regex.replaceData('\r+', '', line) line = self.regex.reduceNewLine(line) line = self.regex.reduceBlankSpace(line) line = line.strip() params = urllib.urlencode({'q': line}) url = self.url + params self.scrapBykeyword(url, line) except Exception, x: print x self.logger.error('Error: ' + x.message)
class WalgreensScrapper(): def __init__(self): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.main_url = 'http://www.walgreens.com' self.url = 'http://www.walgreens.com/store/catalog/shopLanding' self.sitemap_xml = 'http://www.walgreens.com/sitemap.xml' dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow('walgreens.csv') self.csvWriter = Csv('walgreens.csv') csvDataHeader = ['Product Name', 'Price', 'Description', 'Shipping', 'Ingredients', 'Image'] if csvDataHeader not in self.dupCsvRows: self.csvWriter.writeCsvRow(csvDataHeader) def scrapData(self): try: print 'First scrapping sitemap...' self.scrapSiteMap() print 'Main URL: ' + self.url data = self.spider.fetchData(self.url) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) soup = BeautifulSoup(data) categoryBar = soup.find('div', class_='wid150 padrt5px padlt5px float-left') if categoryBar: categories = categoryBar.find_all('li') for category in categories: category_url = self.main_url + category.a.get('href') self.scrapCategory(category_url) except Exception, x: print x
def __init__(self): QtCore.QThread.__init__(self) self.isExiting = False self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow('nisbets.csv', 0) self.csvWriter = Csv('nisbets.csv') self.mainUrl = 'http://www.nisbets.co.uk' csvHeaderList = [ 'URL', 'Product Code', 'Product Technical Specifications', 'Product Name', 'Brand', 'Product Price', 'Product Short Description', 'Product Long Description', 'Image File Name', 'User Manual File Name', 'Exploded View File Name', 'Spares Code', 'Accessories', 'Product Status' 'Category1', 'Category2', 'Category3', 'Category4' ] if 'URL' not in self.dupCsvRows: self.csvWriter.writeCsvRow(csvHeaderList) self.dupCsvRows.append(csvHeaderList[0]) self.utils = Utils()
def __init__(self): self.browser = None self.url = "http://environmentclearance.nic.in/Search.aspx" self.statuses = [] self.categories = [] self.years = [] self.states = [] self.csvDataHeader = [ 'Status', 'Category', 'Year', 'State', 'Serial No', 'Proposal details', 'Location', 'Important Date', 'Category', 'Company Proponent' ] self.logger = LogManager(__name__) self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow('env_clearance.csv') self.csvWriter = Csv('env_clearance.csv') if self.csvDataHeader not in self.dupCsvRows: self.csvWriter.writeCsvRow(self.csvDataHeader) self.dupCsvRows.append(self.csvDataHeader)
class WpScrapper(): def __init__(self, input_file, output_file): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.input_file = input_file self.output_file = output_file def scrapData(self): csv_writer = csv.writer(open(self.output_file, 'wb'), delimiter=';') with open(self.input_file, 'rb') as csvfile: csv_rows = csv.reader(csvfile, delimiter=';') rows = list(csv_rows) total = len(rows) counter = 0 for row in rows: counter += 1 print '---------------- Checking [%d] of [%d] records. ----------------------' % ( counter, total) self.logger.debug('Checking %d of %d records.' % (counter, total)) domain = 'http://' + row[0] + '/wp-login.php' https_domain = 'https://' + row[0] + '/wp-login.php' wp_admin = 'http://' + row[0] + '/wp-admin/' https_wp_admin = 'https://' + row[0] + '/wp-admin/' username = row[1] password = row[2] status = 0 print 'Login Credential => Domain: ' + domain + ' User: '******' Password: '******'Login Credential => Domain: ' + domain + ' User: '******' Password: '******'Successfully logged in.' self.logger.debug('Successfully logged in.') status = 1 else: print 'Login failed!' self.logger.debug('Login failed!') csv_writer.writerow([row[0], username, password, status]) print '---------------- End of checking [%d] of [%d] records. ----------------------' % ( counter, total) print '\n\n' def onLogin(self, url, https_url, wp_url, https_wp_url, username, password): ''' Credentials are: action login_access i p password sdfsdf username sdfsdf ''' try: loginCredentials = { 'log': username, 'pwd': password, 'redirect_to': wp_url } print 'Credentials', loginCredentials print 'Please wait...Try to login with your credentials.' redirected_url, loginData = self.spider.login( url, loginCredentials) print 'redirected url: ', redirected_url if loginData and len(loginData) > 0: loginData = self.regex.reduceNewLine(loginData) loginData = self.regex.reduceBlankSpace(loginData) print 'After login data: ', loginData if redirected_url is not None and redirected_url.strip( ) == wp_url.strip(): return True # if loginData and len(loginData) > 0: # loginData = self.regex.reduceNewLine(loginData) # loginData = self.regex.reduceBlankSpace(loginData) # soup = BeautifulSoup(loginData) # if soup.find('div', {'id': 'login_error'}): # return False # else: # return True except Exception, x: print x print 'There was an error when login with http' try: https_loginCredentials = { 'log': username, 'pwd': password, 'redirect_to': https_wp_url } print 'Credentials', https_loginCredentials print 'Please wait...Try to login with your credentials.' https_redirected_url, https_login_data = self.spider.login( https_url, https_loginCredentials) if https_redirected_url is not None and https_redirected_url.strip( ) == https_wp_url.strip(): return True except Exception, x: print x print 'There was an error when login with https'
class MyLinkedInMembers(QThread): notifyLinkedIn = pyqtSignal(object) notifyMembers = pyqtSignal(object) cookieL = pyqtSignal(object) def __init__(self, spider, url, pageRange=None): QThread.__init__(self) # self.spider = Spider() self.spider = spider self.regex = Regex() self.url = url self.startPage = None self.endPage = None if self.regex.isFoundPattern('(?i)(\d+)-(\d+)', str(pageRange).strip()): pageRangeFormat = self.regex.getSearchedDataGroups('(?i)(\d+)-(\d+)', str(pageRange).strip()) self.startPage = int(pageRangeFormat.group(1)) self.endPage = int(pageRangeFormat.group(2)) elif self.regex.isFoundPattern('(?i)(\d+)', str(pageRange).strip()): pageRangeFormat = self.regex.getSearchedDataGroups('(?i)(\d+)', str(pageRange).strip()) self.startPage = int(pageRangeFormat.group(1)) self.endPage = self.startPage def run(self): self.getMembers(self.url) self.notifyLinkedIn.emit('<font color=red><b>Finish scraping members.<b></font>') def getMembers(self, url, pageNumber=0): print 'Members URL: ' + url self.notifyLinkedIn.emit('<font color=green><b>Start Scraping All Members.<b></font>') self.notifyLinkedIn.emit('<b>Wait For 15 seconds Break...<b>') time.sleep(15) self.notifyLinkedIn.emit('<b>15 seconds Break Finish.<b>') groupData = self.spider.fetchData(str(url).replace('&', '&')) groupData = self.regex.reduceNewLine(groupData) groupData = self.regex.reduceBlankSpace(groupData) print groupData print 'page number: ' + str(pageNumber) if pageNumber > 0: harvestedMembers = [] allMembers = self.regex.getAllSearchedData('(?i)<li class="member" id="member-[^"]*"[^>]*?>(.*?)</div>', groupData) for members in allMembers: memberId = self.regex.getSearchedData('(?i)data-li-memberId="([^"]*)"', members) memberName = self.regex.getSearchedData('(?i)data-li-fullName="([^"]*)"', members) memberTitle = self.regex.getSearchedData('(?i)<p class="headline">([^<]*?)</p>', members) memberTitle = self.regex.replaceData('(?i)&', '&', memberTitle) harvestedMembers.append((memberId, memberName, memberTitle)) self.notifyLinkedIn.emit('<b>Member ID: </b>%s <b>Member Name: </b>%s' % (memberId, memberName + ' (' + memberTitle + ')')) # members = self.regex.getAllSearchedData( # '(?i)class="send-message" data-li-memberId="([^"]*)" data-li-fullName="([^"]*)"', groupData) # print members self.notifyMembers.emit(harvestedMembers) # for member in members: # print member # self.notifyLinkedIn.emit('<b>Member Name: </b>%s <b>Member ID: </b>%s' % (member[1], member[0])) urlNext = self.regex.getSearchedData('(?i)<a href="([^"]*)"[^>]*?>\s*?<strong>\s*?next', groupData) if urlNext and len(urlNext) > 0: # nextP = int(self.regex.getSearchedData('(?i).*?(\d+)$', urlNext.strip())) urlNext = self.regex.replaceData('(?i)&', '&', urlNext) urlNext = self.regex.replaceData('(?i)split_page=\d+', 'split_page=', urlNext) pageNumber += 1 if self.startPage <= pageNumber <= self.endPage: self.notifyLinkedIn.emit('<b>Wait for 15 second break...</b>') time.sleep(15) print 'sleep 15 s' self.notifyLinkedIn.emit('<b>15 second break finish!!!</b>') self.getMembers('http://www.linkedin.com' + urlNext + str(pageNumber), pageNumber) elif pageNumber < self.startPage: pageNumber = self.startPage self.notifyLinkedIn.emit('<b>Wait for 15 second break...</b>') time.sleep(15) print 'page number less 0 sleep' self.notifyLinkedIn.emit('<b>15 second break finish!!!</b>') self.getMembers('http://www.linkedin.com' + urlNext + str(pageNumber), pageNumber) if self.startPage is None and self.endPage is None: pageNumber += 1 self.notifyLinkedIn.emit('<b>Wait for 15 second break...</b>') time.sleep(15) print 'page number less 0 sleep' self.notifyLinkedIn.emit('<b>15 second break finish!!!</b>') self.getMembers('http://www.linkedin.com' + urlNext + str(pageNumber), pageNumber)
class MyLinkedIn(QThread): notifyLinkedIn = pyqtSignal(object) notifyMember = pyqtSignal(object) cookieL = pyqtSignal(object) def __init__(self, username, password): QThread.__init__(self) self.spider = Spider() self.regex = Regex() self.username = username self.password = password def run(self): if self.login(): self.getAllGroups() def login(self): print 'login start' self.notifyLinkedIn.emit('<b>Trying to login. Please wait...</b>') loginPageData = self.spider.fetchData( 'https://www.linkedin.com/uas/login?goback=&trk=hb_signin') loginPageData = self.regex.reduceNewLine(loginPageData) loginPageData = self.regex.reduceBlankSpace(loginPageData) ## <input type="hidden" name="session_redirect" value="" id="session_redirect-login"><input type="hidden" name="csrfToken" value="ajax:9073845200579364133" id="csrfToken-login"><input type="hidden" name="sourceAlias" value="0_7r5yezRXCiA_H0CRD8sf6DhOjTKUNps5xGTqeX8EEoi" id="sourceAlias-login"> self.sessionRedirect = self.regex.getSearchedData( '(?i)<input type="hidden" name="session_redirect" value="([^"]*)"', loginPageData) self.token = self.regex.getSearchedData( '(?i)<input type="hidden" name="csrfToken" value="([^"]*)"', loginPageData) self.alias = self.regex.getSearchedData( '(?i)<input type="hidden" name="sourceAlias" value="([^"]*)"', loginPageData) loginParam = { 'csrfToken': self.token, 'isJsEnabled': 'true', 'session_key': self.username, 'session_password': self.password, # 'session_key': '*****@*****.**', # 'session_password': '******', 'session_redirect': self.sessionRedirect, 'signin': 'Sign In', 'sourceAlias': self.alias, 'source_app': '' } print loginParam print 'start login' time.sleep(5) loginData = self.spider.login( 'https://www.linkedin.com/uas/login-submit', loginParam) loginData = self.regex.reduceNewLine(loginData) loginData = self.regex.reduceBlankSpace(loginData) # print loginData isLoggedIn = self.regex.isFoundPattern('(?i)<li class="signout">', loginData) if isLoggedIn: self.notifyLinkedIn.emit( '<font color=green><b>Successfully Logged In.</b></font>') print 'login success' self.cookieL.emit(self.spider) return True else: self.notifyLinkedIn.emit( '<font color=red><b>Something wrong with logging in. Please try again or check manually with this username/password</b></font>' ) return False def getAllGroups(self): print 'start groups' self.notifyLinkedIn.emit( '<font color=green><b>Start Scraping All Groups.</b></font>') self.notifyLinkedIn.emit('<b>Wait for 15 second break...</b>') time.sleep(15) self.notifyLinkedIn.emit('<b>15 second break finish!!!</b>') self.notifyLinkedIn.emit( '<font color=green><b>Fetching data for scraping your groups.</b></font>' ) groupsUrl = 'http://www.linkedin.com/myGroups?trk=hb_side_grps_top' groupsData = self.spider.fetchData(groupsUrl) self.notifyLinkedIn.emit( '<font color=green><b>Data fetching complete for scraping your groups.</b></font>' ) if groupsData is not None and len(groupsData) > 0: print 'starting groups' groupsData = self.regex.reduceNewLine(groupsData) groupsData = self.regex.reduceBlankSpace(groupsData) print groupsData ## <a href="/groups?gid=72881&trk=myg_ugrp_ovr" class="private" title="This group is members only">MySQL Professionals</a> groupInfo = self.regex.getAllSearchedData( '(?i)<a href="(/groups\?gid=[^"]*)"[^>]*>([^<]*)</a>', groupsData) if groupInfo is not None and len(groupInfo) > 0: members = [] for group in groupInfo: groupUrl = 'http://www.linkedin.com' + str(group[0]) groupName = str(group[1]) self.notifyLinkedIn.emit( '<b>Group Name: </b>%s <b>URL: </b>%s' % (groupName, groupUrl)) # http://www.linkedin.com/groups?members=&gid=65688&trk=anet_ug_memb gid = self.regex.getSearchedData('(?i)gid=(\d+)', group[0]) print gid groupUrl = 'http://www.linkedin.com/groups?members=&gid=' + gid + '&trk=anet_ug_memb' members.append((groupName, groupUrl)) self.notifyMember.emit(members) self.notifyLinkedIn.emit( '<font color=red><b>Finish Scraping All Groups.</b></font>')
def __init__(self, username, password): QThread.__init__(self) self.spider = Spider() self.regex = Regex() self.username = username self.password = password
class WebTable(): def __init__(self): self.logger = LogManager(__name__) self.spider = Spider() self.browser = BrowserUtil() self.regex = Regex() self.utils = Utils() self.csvHeader = [ 'Category', 'Sub Category 1', 'Sub Category 2', 'Product Code', 'Product Name', 'Product ShortName', 'Product Description', 'List Price', 'Vendor Price', 'Availability', 'Power', 'Size', 'KW', 'Weight(kg)', 'Other Tech', 'Pdf File', 'Image File' ] self.totalProducts = 0 def scrapData(self): postParams = { '__ASYNCPOST': 'true', '__EVENTVALIDATION': '/wEWWwKSuN/3AgLi8PP9DgKzpIWvCQKQ3IFsAve1x5EPAu7Dza4GArPM1qoEAvjBhsQDAvjB6qkLAvjB/o4CAvjBwtMJApP48MoOApP4xK8GApP46EYCk/j8qwgCk/jA8AcCk/jU1Q4Ck/i4uQYCk/iMng0Ck/iQ4wQCk/jkyAMC15uNvgYC15uRgw0C15uluggC15uJnwcC15ud5A4C15vhyQUC15v1rg0C15vZ8wQC15ut1wMC15uxvAsC6rKvkwgC6rKz+AcC6rLHkAIC6rKr9AkC6rK/WQLqsoO+CALqspeDBwLqsvvoDgLqss/NBQLqstOSDQK0wsnaCgL4+7LBAQLP5JaqAQKc4P/CDQLl7berDgLurP6CDALvn+2eCwK4pIGBDwKvytzABgLTu7vHBgKFmtaAAwKn0anxCwKZwpi3CgLjlM+OAwLCoMjqAQLWq7m2BALlnqSNBwKbwPKfBgL5j7vvBAKRy8fpCAKI3rXQBwLBhpnRCwLgqNqjBQLEmsPUBgL26MCGDwL0wbKZDgL16ePjAQLhraHjBAKx7Y+rCwKu+uSNDQKDp4fFBwLnmpaQCQKU2LWMCALev//ADgK9osaHBALArtXWDgKhp8iCAwKCs5DBAgKPnOP3DwK0uumDDwKJ4eXWBAKK+5r7AwLj4sWCAQKJgZPYBQL2mPvKBgL/hob0BAKsyvbZDAKSoqqWDwLSwpnTCALN797vDL/8819r5pdL6i1kQizMsBPt83oZ', '__VIEWSTATE': '/wEPDwUKMTU5MjIyNTQ2OQ9kFgICAw9kFgQCAQ9kFgJmD2QWAgIBD2QWAmYPZBYCZg9kFgYCBw8QZBAVIwstQWxsIFllYXJzLQQyMDEzBDIwMTIEMjAxMQQyMDEwBDIwMDkEMjAwOAQyMDA3BDIwMDYEMjAwNQQyMDA0BDIwMDMEMjAwMgQyMDAxBDIwMDAEMTk5OQQxOTk4BDE5OTcEMTk5NgQxOTk1BDE5OTQEMTk5MwQxOTkyBDE5OTEEMTk5MAQxOTg5BDE5ODgEMTk4NwQxOTg2BDE5ODUEMTk4NAQxOTgzBDE5ODIEMTk4MQQxOTgwFSMLLUFsbCBZZWFycy0EMjAxMwQyMDEyBDIwMTEEMjAxMAQyMDA5BDIwMDgEMjAwNwQyMDA2BDIwMDUEMjAwNAQyMDAzBDIwMDIEMjAwMQQyMDAwBDE5OTkEMTk5OAQxOTk3BDE5OTYEMTk5NQQxOTk0BDE5OTMEMTk5MgQxOTkxBDE5OTAEMTk4OQQxOTg4BDE5ODcEMTk4NgQxOTg1BDE5ODQEMTk4MwQxOTgyBDE5ODEEMTk4MBQrAyNnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2RkAgkPZBYCZg9kFgICAQ8QDxYGHg1EYXRhVGV4dEZpZWxkBQ5wcm9qX2NvZGVfbmFtZR4ORGF0YVZhbHVlRmllbGQFCXByb2pfY29kZR4LXyFEYXRhQm91bmRnZBAVCQ4tQWxsIENhdGVnb3J5LQtDb2FsIE1pbmluZxNJbmR1c3RyaWFsIFByb2plY3RzMUluZnJhc3RydWN0dXJlIGFuZCBNaXNjZWxsYW5lb3VzIFByb2plY3RzICAmICBDUloPTWluaW5nIFByb2plY3RzMk5ldyBDb25zdHJ1Y3Rpb24gUHJvamVjdHMgYW5kICBJbmR1c3RyaWFsICBFc3RhdGVzEU51Y2xlYXIgIFByb2plY3RzJ1JpdmVyIFZhbGxleSBhbmQgSHlkcm9lbGVjdHJpYyBQcm9qZWN0cxBUaGVybWFsIFByb2plY3RzFQkOLUFsbCBDYXRlZ29yeS0EQ01JTgNJTkQDTUlTA01JTgNOQ1ADTlVDA1JJVgNUSEUUKwMJZ2dnZ2dnZ2dnZGQCCw8QDxYGHwAFCnN0YXRlX25hbWUfAQUKc3RhdGVfbmFtZR8CZ2QQFSULLUFsbCBTdGF0ZS0TQW5kYW1hbiBhbmQgTmljb2Jhcg5BbmRocmEgUHJhZGVzaBFBcnVuYWNoYWwgUHJhZGVzaAVBc3NhbQVCaWhhcgpDaGFuZGlnYXJoDENoaGF0dGlzZ2FyaBREYWRhciAmIE5hZ2FyIEhhdmVsaQ1EYW1hbiBhbmQgRGl1BURlbGhpA0dvYQdHdWphcmF0B0hhcnlhbmEQSGltYWNoYWwgUHJhZGVzaBFKYW1tdSBhbmQgS2FzaG1pcglKaGFya2hhbmQJS2FybmF0YWthBktlcmFsYQtMYWtzaGFkd2VlcA5NYWRoeWEgUHJhZGVzaAtNYWhhcmFzaHRyYQdNYW5pcHVyCU1lZ2hhbGF5YQdNaXpvcmFtCE5hZ2FsYW5kBk9ycmlzYQZPdGhlcnMLUG9uZGljaGVycnkGUHVuamFiCVJhamFzdGhhbgZTaWtraW0KVGFtaWwgTmFkdQdUcmlwdXJhDVV0dGFyIFByYWRlc2gLVXR0YXJha2hhbmQLV2VzdCBCZW5nYWwVJQstQWxsIFN0YXRlLRNBbmRhbWFuIGFuZCBOaWNvYmFyDkFuZGhyYSBQcmFkZXNoEUFydW5hY2hhbCBQcmFkZXNoBUFzc2FtBUJpaGFyCkNoYW5kaWdhcmgMQ2hoYXR0aXNnYXJoFERhZGFyICYgTmFnYXIgSGF2ZWxpDURhbWFuIGFuZCBEaXUFRGVsaGkDR29hB0d1amFyYXQHSGFyeWFuYRBIaW1hY2hhbCBQcmFkZXNoEUphbW11IGFuZCBLYXNobWlyCUpoYXJraGFuZAlLYXJuYXRha2EGS2VyYWxhC0xha3NoYWR3ZWVwDk1hZGh5YSBQcmFkZXNoC01haGFyYXNodHJhB01hbmlwdXIJTWVnaGFsYXlhB01pem9yYW0ITmFnYWxhbmQGT3JyaXNhBk90aGVycwtQb25kaWNoZXJyeQZQdW5qYWIJUmFqYXN0aGFuBlNpa2tpbQpUYW1pbCBOYWR1B1RyaXB1cmENVXR0YXIgUHJhZGVzaAtVdHRhcmFraGFuZAtXZXN0IEJlbmdhbBQrAyVnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZGQCBQ9kFgJmD2QWAgIBD2QWAmYPZBYCZg9kFgICAQ88KwANAGQYAgUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgIFDEltYWdlQnV0dG9uMQUCc3MFCUdyaWRWaWV3MQ9nZJ2a7Ttf3vWdGuuLrnT2LMPjQW5x', 'btn': 'Search', 'ddlcategory': 'MIN', 'ddlstate': 'Gujarat', 'ddlstatus': 'UPEC', 'ddlyear': '2011', 'textbox2': '', 'ww': 'UpdatePanel3' } data = self.spider.fetchData( 'http://environmentclearance.nic.in/Search.aspx', postParams) print data data = self.spider.fetchData1( 'http://environmentclearance.nic.in/Search.aspx') print data # soup = BeautifulSoup(data) def scrapSubCat1(self, url): print 'url: ', url data = self.spider.fetchData(url) soup = BeautifulSoup(data) for cat in soup.find_all('td', {"class": re.compile("item_level")}): c = cat.find( "a", { "href": re.compile('rayons\.aspx\?value_path=.*?$'), "id": re.compile('ctl00_cph_center_dl_level_ctl\d+_a_level.*?$') }) if c: print c.string.strip() self.scrapSubCat2('http://www.diamond-europe.com/' + c.get("href")) def scrapSubCat2(self, url): print 'url1: ' + url data = self.spider.fetchData(url) soup = BeautifulSoup(data) for cat in soup.find_all('div', {'class': re.compile('bg_ombre')}): self.scrapProducts('http://www.diamond-europe.com/' + cat.find('a').get('href')) def scrapProducts(self, url): print 'url2', url data = self.spider.fetchData(url) soup = BeautifulSoup(data) results = soup.find('table', {'id': 'results'}) if results: for row in results.find_all('tr'): colRef = row.find('td', {'class': 'reference'}) if colRef: prCode = colRef.find('span', {'class': 'imp'}) price1 = colRef.find( 'span', { 'id': re.compile( 'ctl\d+_cph_center_r_articles_ctl\d+_l_prix_barre$' ) }) price2 = colRef.find('span', {'class', 'promo'}) print prCode.string.strip() print price1.string.strip() print price2.string.strip() coldesc = row.find('td', {'class': re.compile('description.*?$')}) if coldesc: pr = coldesc.find('a') print pr.string.strip() self.scrapProductDetails('http://www.diamond-europe.com/' + pr.get('href')) def scrapProductDetails(self, url): print 'Detail url: ' + url data = self.spider.fetchData(url) soup = BeautifulSoup(data) productDescS = soup.find('span', 'h1_nom_article') print productDescS.string.strip() productDesc = soup.find('div', {'id': 'article_right'}) print productDesc.text.strip() specs = soup.find('ul', {'id': 'spec_tech'}) if specs: print specs.contents """ __ASYNCPOST true __EVENTARGUMENT __EVENTTARGET ctl00$cph_center$menu_left1$lb_login __EVENTVALIDATION /wEWEwKOk7qrBAKG4eyLBALGw+PfBwK7jI7eDQL/2fqXBwLH9rmjDwLG2KLDCAKCvreACALPgYP1DQKqvLeACAKKtP7+DAL07MD3CwLksZZaAuSxmloCicn43Q8Cisn43Q8C/Iag2AMClcHvlQgCyNGw1Ax/PwzywfL/ooD/FU51memYxQ1U+Q== __LASTFOCUS __SCROLLPOSITIONX 0 __SCROLLPOSITIONY 0 __VIEWSTATE /wEPDwUINzkzMzQ5OTcPZBYCZg9kFgICAw9kFgICAQ9kFhICAw8WAh4LXyFJdGVtQ291bnQCBhYMZg9kFgICAQ8WBh4FdGl0bGUFB0VuZ2xpc2geBGhyZWYFDC9yYXlvbnMuYXNweB4Hb25jbGljawUcc2V0Q29va2llKCdsYW5ndWUnLCAnZW4tZ2InKRYCZg8WBB4Dc3JjBQ9+L2ltYWdlcy9lbi5wbmceA2FsdAUHRW5nbGlzaGQCAQ9kFgICAQ8WBh8BBQlGcmFuw6dhaXMfAgUZL3JheW9ucy5hc3B4P2xhbmd1ZT1mci1iZR8DBRxzZXRDb29raWUoJ2xhbmd1ZScsICdmci1iZScpFgJmDxYEHwQFD34vaW1hZ2VzL2ZyLnBuZx8FBQlGcmFuw6dhaXNkAgIPZBYCAgEPFgYfAQUHRGV1dHNjaB8CBRkvcmF5b25zLmFzcHg/bGFuZ3VlPWRlLWRlHwMFHHNldENvb2tpZSgnbGFuZ3VlJywgJ2RlLWRlJykWAmYPFgQfBAUPfi9pbWFnZXMvZGUucG5nHwUFB0RldXRzY2hkAgMPZBYCAgEPFgYfAQUKTmVkZXJsYW5kcx8CBRkvcmF5b25zLmFzcHg/bGFuZ3VlPW5sLWJlHwMFHHNldENvb2tpZSgnbGFuZ3VlJywgJ25sLWJlJykWAmYPFgQfBAUPfi9pbWFnZXMvbmwucG5nHwUFCk5lZGVybGFuZHNkAgQPZBYCAgEPFgYfAQUIRXNwYcOxb2wfAgUZL3JheW9ucy5hc3B4P2xhbmd1ZT1lcy1lcx8DBRxzZXRDb29raWUoJ2xhbmd1ZScsICdlcy1lcycpFgJmDxYEHwQFD34vaW1hZ2VzL2VzLnBuZx8FBQhFc3Bhw7FvbGQCBQ9kFgICAQ8WBh8BBQhJdGFsaWFubx8CBRkvcmF5b25zLmFzcHg/bGFuZ3VlPWl0LWl0HwMFHHNldENvb2tpZSgnbGFuZ3VlJywgJ2l0LWl0JykWAmYPFgQfBAUPfi9pbWFnZXMvaXQucG5nHwUFCEl0YWxpYW5vZAIFDw8WBB4EVGV4dAUESG9tZR4LTmF2aWdhdGVVcmwFDH4vaW5kZXguYXNweGRkAgcPDxYEHwYFB0RpYW1vbmQfBwUOfi9kaWFtb25kLmFzcHhkZAIJDw8WBB8GBQhTZXJ2aWNlcx8HBQ9+L3NlcnZpY2VzLmFzcHhkZAILDw8WCB8GBQhQcm9kdWN0cx8HBRR+L3JheW9ucy5hc3B4P3BhZ2U9MR4IQ3NzQ2xhc3MFB2N1cnJlbnQeBF8hU0ICAmRkAg0PDxYEHwYFBE5ld3MfBwULfi9uZXdzLmFzcHhkZAIPDw8WBB8GBQdDb250YWN0HwcFDn4vY29udGFjdC5hc3B4ZGQCEQ9kFgICAQ9kFgICAw9kFgJmD2QWBAIBDxBkZBYBZmQCBw8WBB4KQ29udGV4dEtleQUVUmVmZXJlbmNlfmVuLWdifkZhbHNlHg1Vc2VDb250ZXh0S2V5Z2QCEw9kFggCAQ9kFg4CAQ8PFgIeB1Zpc2libGVoZGQCAw9kFgJmD2QWAgIDDw8WAh8MZ2RkAgUPDxYCHwxoZGQCCRA8KwANAgAPFgIfDGhkDBQrABwFfDA6MCwwOjEsMDoyLDA6MywwOjQsMDo1LDA6NiwwOjcsMDo4LDA6OSwwOjEwLDA6MTEsMDoxMiwwOjEzLDA6MTQsMDoxNSwwOjE2LDA6MTcsMDoxOCwwOjE5LDA6MjAsMDoyMSwwOjIyLDA6MjMsMDoyNCwwOjI1LDA6MjYUKwACFgYfBgVyPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZmY0YzBiJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIENPT0tJTkc8L3NwYW4+HgdUb29sVGlwBQktIENPT0tJTkcfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPVA0N0tIQzc0ODRkFCsAAhYGHwYFigE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNlMThjNDUnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gSE9UIFNOQUNLUyAtIFBBTklOSSAtIEZBU1QgRk9PRDwvc3Bhbj4fDQUhLSBIT1QgU05BQ0tTIC0gUEFOSU5JIC0gRkFTVCBGT09EHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD04M0RDM0Y2Q0FEZBQrAAIWBh8GBZMBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZWNhZTc1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEZSRU5DSCBGUklFUyAtIFJPQVNUSU5HIC0gR1JJTExJTkcgJiBCQlE8L3NwYW4+Hw0FKi0gRlJFTkNIIEZSSUVTIC0gUk9BU1RJTkcgLSBHUklMTElORyAmIEJCUR8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9RTY0NDk0MzdDM2QUKwACFgYfBgV4PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZjNjYmEzJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEFTSUFOIENPT0tJTkc8L3NwYW4+Hw0FDy0gQVNJQU4gQ09PS0lORx8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9OTc4QTE0QzhFNGQUKwACFgYfBgWJATxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6I2ZiZTZkMSc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBTVEVBTSAtIENPTlZFQ1RJT04gLSBNSUNST1dBVkU8L3NwYW4+Hw0FIC0gU1RFQU0gLSBDT05WRUNUSU9OIC0gTUlDUk9XQVZFHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD02N0ZENkIzNjQ2ZBQrAAIWBh8GBXc8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNmYzM0MjgnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gQ09PSyAmIENISUxMPC9zcGFuPh8NBQ4tIENPT0sgJiBDSElMTB8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9TzBRTDhLSDA4VmQUKwACFgYfBgWNATxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6I2UxN2Y1ZCc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBSRUdFTkVSQVRJT04gLSBWQUNVVU0gLSBCQU5RVUVUSU5HPC9zcGFuPh8NBSQtIFJFR0VORVJBVElPTiAtIFZBQ1VVTSAtIEJBTlFVRVRJTkcfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPUU4NDM5Q0U0QTBkFCsAAhYGHwYFdzxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6IzAyOTQ3ZSc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBESVNIIFdBU0hFUlM8L3NwYW4+Hw0FDi0gRElTSCBXQVNIRVJTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD03OE1YQk1KRkdLZBQrAAIWBh8GBXI8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNhMDA4NmQnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gTEFVTkRSWTwvc3Bhbj4fDQUJLSBMQVVORFJZHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD1TWjJOU1ZKUTc4ZBQrAAIWBh8GBYMBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojMDU3M2E1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEdBU1RST05PUk0gUkVGUklHRVJBVElPTjwvc3Bhbj4fDQUaLSBHQVNUUk9OT1JNIFJFRlJJR0VSQVRJT04fBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPUhTVkVROTZYRzRkFCsAAhYGHwYFeDxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6IzAyYTBjNic+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBSRUZSSUdFUkFUSU9OPC9zcGFuPh8NBQ8tIFJFRlJJR0VSQVRJT04fBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPVgxMzY3TTdEOVNkFCsAAhYGHwYFigE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiM2Y2IyZGEnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gU0FORFdJQ0hFUyAtIFNBTEFERVMgLSBTVEFSVEVSUzwvc3Bhbj4fDQUhLSBTQU5EV0lDSEVTIC0gU0FMQURFUyAtIFNUQVJURVJTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD03REU4RUM0RTJDZBQrAAIWBh8GBXY8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiM5NWM3ZTUnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gV0lORSAtIEJFRVI8L3NwYW4+Hw0FDS0gV0lORSAtIEJFRVIfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPTZENDg3NDQzNEFkFCsAAhYGHwYFjAE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNiYmRiZjAnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gU09GVCBEUklOS1MgLSBBTENPSE9MIC0gQ09DS1RBSUxTPC9zcGFuPh8NBSMtIFNPRlQgRFJJTktTIC0gQUxDT0hPTCAtIENPQ0tUQUlMUx8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9Q0RBRTQyMzRCRWQUKwACFgYfBgWHATxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6IzY5N2RiOSc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBJQ0UgQ1JFQU0gLSBTT1JCRVQgLSBHUkFOSVRBPC9zcGFuPh8NBR4tIElDRSBDUkVBTSAtIFNPUkJFVCAtIEdSQU5JVEEfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPTNUTlQwNkJYOTJkFCsAAhYGHwYFhwE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNiMjI4MTQnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gU0VMRiBTRVJWSUNFIC0gQlVGRkVUIC1UQVBBUzwvc3Bhbj4fDQUeLSBTRUxGIFNFUlZJQ0UgLSBCVUZGRVQgLVRBUEFTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD0zT0tIWDA1NzFXZBQrAAIWBh8GBYYBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZWQ5YTA1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIFBBU1RSWSAtIEJBS0VSWSAtIENIT0NPTEFURTwvc3Bhbj4fDQUdLSBQQVNUUlkgLSBCQUtFUlkgLSBDSE9DT0xBVEUfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPU41RjBUNVpWS1pkFCsAAhYGHwYFhQE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNhMzQ0YTgnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gTUVBVCAtIERFTElDQVRFU1NFTiAtIEZJU0g8L3NwYW4+Hw0FHC0gTUVBVCAtIERFTElDQVRFU1NFTiAtIEZJU0gfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPUE0MTFCODA3Q0FkFCsAAhYGHwYFhAE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNmZjAwMGYnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gUElaWkEgLSBQQVNUQSAtIFRBS0UgQVdBWTwvc3Bhbj4fDQUbLSBQSVpaQSAtIFBBU1RBIC0gVEFLRSBBV0FZHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD01STZYNjZSNzYyZBQrAAIWBh8GBZwBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojYTY2YjExJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIENPRkZFRSBURUEgLSBWSUVOTkVTRSBQQVNUUklFUyAtSlVJQ0VTIE1JTEsgU0hBS0U8L3NwYW4+Hw0FMy0gQ09GRkVFIFRFQSAtIFZJRU5ORVNFIFBBU1RSSUVTIC1KVUlDRVMgTUlMSyBTSEFLRR8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9MUZERkZQNUgzMmQUKwACFgYfBgV7PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojYzBjYTBlJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEZPT0QgUFJFUEFSQVRJT048L3NwYW4+Hw0FEi0gRk9PRCBQUkVQQVJBVElPTh8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9NVFKNzQ0MzJTV2QUKwACFgYfBgV5PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojNWQ2MzY3Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIE5FVVRSQUwgLSBJTk9YPC9zcGFuPh8NBRAtIE5FVVRSQUwgLSBJTk9YHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD1ISDI3OTg1Q1pUZBQrAAIWBh8GBX08ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiM0ZWJhYmMnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gQ0xFQU5JTkcgLSBIWUdJRU5FPC9zcGFuPh8NBRQtIENMRUFOSU5HIC0gSFlHSUVORR8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9MU8wN09XMDA2M2QUKwACFgYfBgV/PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZmZiMjA1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIFZBQ1VVTSAmIFZFTlRJTEFUSU9OPC9zcGFuPh8NBRYtIFZBQ1VVTSAmIFZFTlRJTEFUSU9OHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD0xSTRDQzcxM0hCZBQrAAIWBh8GBXg8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNiMGIxYmInPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gR04gQ09OVEFJTkVSUzwvc3Bhbj4fDQUPLSBHTiBDT05UQUlORVJTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD0yNDAxUzk1RDNHZBQrAAIWBh8GBY8BPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojYjM4MzEwJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIERJTk5FUiBTRVJWSUNFIC0gRElTUExBWVMgLSBUUk9MTEVZUzwvc3Bhbj4fDQUmLSBESU5ORVIgU0VSVklDRSAtIERJU1BMQVlTIC0gVFJPTExFWVMfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPVg5VEY5REY0MzdkFCsAAhYGHwYFjwE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNmZmNjMDMnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gUkVDRVBUSU9OIC0gUk9PTSBTRVJWSUNFIC0gQlJFQUtGQVNUPC9zcGFuPh8NBSYtIFJFQ0VQVElPTiAtIFJPT00gU0VSVklDRSAtIEJSRUFLRkFTVB8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9VVE0M1hMTlRBNWRkZAILDw8WBB8GBRJIaXN0b3JpY2FsIGVzdC9vcmQfDGhkZAIRDxYCHwxoFgJmD2QWAgIDDxBkZBYBZmQCEw9kFgICAQ8PFgQfBgUYSGlzdG9yaXF1ZSBkZXZpcyBjbGllbnRzHwxoZGQCAw9kFhICAw8PFgQfBgUMRmluZCBhbiBJdGVtHwcFHX4vZmFxL3JlY2hlcmNoZXJfYXJ0aWNsZS5hc3B4ZGQCBQ8PFgQfBgUjSG93IHRvIG1ha2UgYSAgZXN0aW1hdGUgLyAgYW4gT3JkZXIfBwUffi9mYXEvZmFpcmVfZGV2aXNfY29tbWFuZGUuYXNweGRkAgcPDxYEHwYFG0ZpbmQgYSAgZXN0aW1hdGUgLyBhbiBvcmRlch8HBSN+L2ZhcS9yZXRyb3V2ZXJfZGV2aXNfY29tbWFuZGUuYXNweGRkAgkPDxYEHwYFHVJlbW92ZSBhbiBpdG1lIG9mIGEgIGVzdGltYXRlHwcFGn4vZmFxL3JldGlyZXJfYXJ0aWNsZS5hc3B4ZGQCCw8PFgQfBgUUVG8gZXJhc2UgYW4gZXN0aW1hdGUfBwUXfi9mYXEvZWZhY2VyX2RldmlzLmFzcHhkZAINDxYCHwIFRH4vYXJ0aWNsZXMuYXNweD9zZWFyY2hfdHlwZT1sZXZlbCZ2YWx1ZV9wYXRoPVA0N0tIQzc0ODQmc2k9YmEmcGFnZT0xFgJmDxYCHwQFFX4vaW1hZ2VzL2VuLWdiL2JhLmpwZ2QCDg8WAh8CBUd+L2FydGljbGVzLmFzcHg/c2VhcmNoX3R5cGU9bGV2ZWwmdmFsdWVfcGF0aD1QNDdLSEM3NDg0JnNpPXByb21vJnBhZ2U9MRYCAgEPFgIfBAUYfi9pbWFnZXMvZW4tZ2IvcHJvbW8uanBnZAIQDxYCHwIFRX4vYXJ0aWNsZXMuYXNweD9zZWFyY2hfdHlwZT1sZXZlbCZ2YWx1ZV9wYXRoPVA0N0tIQzc0ODQmc2k9bmV3JnBhZ2U9MRYCZg8WAh8EBRZ+L2ltYWdlcy9lbi1nYi9uZXcuanBnZAIRDxYCHwIFLX4vYXJ0aWNsZXMuYXNweD9zZWFyY2hfdHlwZT1oaXQmc2k9aGl0JnBhZ2U9MRYCZg8WAh8EBRZ+L2ltYWdlcy9lbi1nYi9oaXQuanBnZAIFDw8WAh8MZ2QWBgIDDxQrAAJkZGQCBQ8UKwACDxYEHgtfIURhdGFCb3VuZGcfAAISZGQWAmYPZBYMAgEPZBYCAgEPZBYIAgEPDxYCHwYFB0lENzAvUE1kZAIDDxYCHwYFLFZFTlRJTEFURUQgUkVGUklHRVJBVE9SIDcwMCBMLiAxIERPT1IgKEdOMi8xZAIFDxYCHwIFHn4vYXJ0aWNsZS5hc3B4P2FfaWQ9MTI1MTI4MzM4MhYCZg8WBB8FBSxWRU5USUxBVEVEIFJFRlJJR0VSQVRPUiA3MDAgTC4gMSBET09SIChHTjIvMR8EBWp+L21lcmNhdG9yX2RhdGEvanBlZy9HRC9DQVRBTE9HVUUgMjAxMC9SRUYgR0FTVFJPTk9STS9NRVJDQVRVUyBQSE9UT1MgMTMtMDctMTIvTk9VVkVBVSBET1NTSUVSL0lENzAtUE0uSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDEuNjI5LDAwIOKCrB8JAgJkZAICD2QWAgIBD2QWCAIBDw8WAh8GBQhEVDE3OC9QTWRkAgMPFgIfBgUoVkVOVC4gUkVGUklHLiBUQUJMRSAzIERPT1JTIEdOMS8xIDQwNSBMLmQCBQ8WAh8CBR5+L2FydGljbGUuYXNweD9hX2lkPTEyNTEyODM0MTYWAmYPFgQfBQUoVkVOVC4gUkVGUklHLiBUQUJMRSAzIERPT1JTIEdOMS8xIDQwNSBMLh8EBVt+L21lcmNhdG9yX2RhdGEvanBlZy9HRC9DQVRBTE9HVUUgMjAxMC9SRUYgR0FTVFJPTk9STS9NRVJDQVRVUyBQSE9UT1MgMTMtMDctMTIvRFQxNzgtUE0uSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDEuOTA5LDAwIOKCrB8JAgJkZAIDD2QWAgIBD2QWCAIBDw8WAh8GBQhBUDFOL0w4NmRkAgMPFgIfBgUrUkVGUklHRVIuIDg1MEwuIDEgRC4gNDB4NjAweDQwMC8yMHggNjAweDgwMGQCBQ8WAh8CBR5+L2FydGljbGUuYXNweD9hX2lkPTEyNTEyODM2MDMWAmYPFgQfBQUrUkVGUklHRVIuIDg1MEwuIDEgRC4gNDB4NjAweDQwMC8yMHggNjAweDgwMB8EBU9+L21lcmNhdG9yX2RhdGEvanBlZy9HRC9DQVRBTE9HVUUgMjAxMC9QQVRJU1NFUklFIFBBSU4gQ0hPQ09MQVQvQVAxTi1MNjQtODYuSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDIuNjg5LDAwIOKCrB8JAgJkZAIED2QWAgIBD2QWCAIBDw8WAh8GBQdEQzUwMi1OZGQCAw8WAh8GBRxESVNILVdBU0hFUiBCQVNLRVQgNTAweDUwMG1tZAIFDxYCHwIFHn4vYXJ0aWNsZS5hc3B4P2FfaWQ9MTI1MTI4NDY0OBYCZg8WBB8FBRxESVNILVdBU0hFUiBCQVNLRVQgNTAweDUwMG1tHwQFQ34vbWVyY2F0b3JfZGF0YS9qcGVnL0dEL0NBVEFMT0dVRSAyMDEwL0xBVkFHRS9GQVNUIFdBU0gvREM1MDItTi5KUEdkAgcPDxYGHwgFE2xfcHJpeF9jbGllbnQgcHJvbW8fBgUMMS41ODksMDAg4oKsHwkCAmRkAgUPZBYCAgEPZBYIAgEPDxYCHwYFB0VGUC80NFJkZAIDDxYCHwYFIUVMRUNUUklDIE9WRU4gMnggNCBQSVpaQVMgMiBST09NU2QCBQ8WAh8CBR5+L2FydGljbGUuYXNweD9hX2lkPTEyNTEyNzgzMDcWAmYPFgQfBQUhRUxFQ1RSSUMgT1ZFTiAyeCA0IFBJWlpBUyAyIFJPT01THwQFTX4vbWVyY2F0b3JfZGF0YS9qcGVnL0dEL0NBVEFMT0dVRSAyMDEwL1BJWlpBIEVUIFBBU1RBL1JVU1RJQyBMSU5FL0VGUC02NlIuSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDEuNjI1LDAwIOKCrB8JAgJkZAIGD2QWAgIBD2QWCAIBDw8WAh8GBQVESzctMmRkAgMPFgIfBgUjSE9PRCBESVNIV0FTSEVSLCAgQkFTS0VUIDUwMHg1MDAgTU1kAgUPFgIfAgUefi9hcnRpY2xlLmFzcHg/YV9pZD0xMjUxMjc1NDA1FgJmDxYEHwUFI0hPT0QgRElTSFdBU0hFUiwgIEJBU0tFVCA1MDB4NTAwIE1NHwQFQX4vbWVyY2F0b3JfZGF0YS9qcGVnL0dEL0NBVEFMT0dVRSAyMDEwL0xBVkFHRS9GQVNUIFdBU0gvREs3LTIuSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDIuNTM2LDAwIOKCrB8JAgJkZAIHDxQrAAJkZGQCBw9kFgQCAQ9kFgJmD2QWCgIFDxYCHwxoZAIJDxYCHwxoZAINDxYCHwxoZAIRDw8WBB8GBQxPcGVuIC8gQ2xvc2UfDGhkZAITDw8WAh8MaGRkAgMPPCsACQBkGAMFKWN0bDAwJGNwaF9jZW50ZXIkZHBfYXJ0aWNsZXNfcmF5b25fYm90dG9tDxQrAARkZAIGAhJkBSNjdGwwMCRjcGhfY2VudGVyJGx2X2FydGljbGVzX3JheW9ucw88KwAKAgc8KwAGAAgCEmQFJmN0bDAwJGNwaF9jZW50ZXIkZHBfYXJ0aWNsZXNfcmF5b25fdG9wDxQrAARkZAIGAhJkzw5eBCgUF6HQH+o5L7mrNloYe3w= ctl00$ToolkitScriptManage... ctl00$cph_center$menu_left1$up_login|ctl00$cph_center$menu_left1$lb_login ctl00$cph_center$hf_value... ctl00$cph_center$menu_lef... ctl00$cph_center$menu_lef... C913327 ctl00$cph_center$menu_lef... sdfsdfsdf ctl00$cph_center$n_vei$cp... ctl00$cph_center$n_vei$hf... ctl00$ddl_search_type Reference ctl00$tb_search ctl00_ToolkitScriptManage... ;;AjaxControlToolkit, Version=3.5.40412.0, Culture=neutral, PublicKeyToken=28f01b0e84b6d53e:en-GB:1547e793-5b7e-48fe-8490-03a375b13a33:de1feab2:f2c8e708:720a52bf:f9cec9bc:589eaa30:698129cf:7a92f56c:4a2c8239; hiddenInputToUpdateATBuff... 1 """ def loginDiamondEurope(self): params = { '__ASYNCPOST': 'true', '__EVENTARGUMENT': '', '__EVENTTARGET': 'ctl00$cph_center$menu_left1$lb_login', '__EVENTVALIDATION': '/wEWEwKOk7qrBAKG4eyLBALGw+PfBwK7jI7eDQL/2fqXBwLH9rmjDwLG2KLDCAKCvreACALPgYP1DQKqvLeACAKKtP7+DAL07MD3CwLksZZaAuSxmloCicn43Q8Cisn43Q8C/Iag2AMClcHvlQgCyNGw1Ax/PwzywfL/ooD/FU51memYxQ1U+Q==', '__LASTFOCUS': '', '__SCROLLPOSITIONX': '0', '__SCROLLPOSITIONY': '0', '__VIEWSTATE': '/wEPDwUINzkzMzQ5OTcPZBYCZg9kFgICAw9kFgICAQ9kFhICAw8WAh4LXyFJdGVtQ291bnQCBhYMZg9kFgICAQ8WBh4FdGl0bGUFB0VuZ2xpc2geBGhyZWYFDC9yYXlvbnMuYXNweB4Hb25jbGljawUcc2V0Q29va2llKCdsYW5ndWUnLCAnZW4tZ2InKRYCZg8WBB4Dc3JjBQ9+L2ltYWdlcy9lbi5wbmceA2FsdAUHRW5nbGlzaGQCAQ9kFgICAQ8WBh8BBQlGcmFuw6dhaXMfAgUZL3JheW9ucy5hc3B4P2xhbmd1ZT1mci1iZR8DBRxzZXRDb29raWUoJ2xhbmd1ZScsICdmci1iZScpFgJmDxYEHwQFD34vaW1hZ2VzL2ZyLnBuZx8FBQlGcmFuw6dhaXNkAgIPZBYCAgEPFgYfAQUHRGV1dHNjaB8CBRkvcmF5b25zLmFzcHg/bGFuZ3VlPWRlLWRlHwMFHHNldENvb2tpZSgnbGFuZ3VlJywgJ2RlLWRlJykWAmYPFgQfBAUPfi9pbWFnZXMvZGUucG5nHwUFB0RldXRzY2hkAgMPZBYCAgEPFgYfAQUKTmVkZXJsYW5kcx8CBRkvcmF5b25zLmFzcHg/bGFuZ3VlPW5sLWJlHwMFHHNldENvb2tpZSgnbGFuZ3VlJywgJ25sLWJlJykWAmYPFgQfBAUPfi9pbWFnZXMvbmwucG5nHwUFCk5lZGVybGFuZHNkAgQPZBYCAgEPFgYfAQUIRXNwYcOxb2wfAgUZL3JheW9ucy5hc3B4P2xhbmd1ZT1lcy1lcx8DBRxzZXRDb29raWUoJ2xhbmd1ZScsICdlcy1lcycpFgJmDxYEHwQFD34vaW1hZ2VzL2VzLnBuZx8FBQhFc3Bhw7FvbGQCBQ9kFgICAQ8WBh8BBQhJdGFsaWFubx8CBRkvcmF5b25zLmFzcHg/bGFuZ3VlPWl0LWl0HwMFHHNldENvb2tpZSgnbGFuZ3VlJywgJ2l0LWl0JykWAmYPFgQfBAUPfi9pbWFnZXMvaXQucG5nHwUFCEl0YWxpYW5vZAIFDw8WBB4EVGV4dAUESG9tZR4LTmF2aWdhdGVVcmwFDH4vaW5kZXguYXNweGRkAgcPDxYEHwYFB0RpYW1vbmQfBwUOfi9kaWFtb25kLmFzcHhkZAIJDw8WBB8GBQhTZXJ2aWNlcx8HBQ9+L3NlcnZpY2VzLmFzcHhkZAILDw8WCB8GBQhQcm9kdWN0cx8HBRR+L3JheW9ucy5hc3B4P3BhZ2U9MR4IQ3NzQ2xhc3MFB2N1cnJlbnQeBF8hU0ICAmRkAg0PDxYEHwYFBE5ld3MfBwULfi9uZXdzLmFzcHhkZAIPDw8WBB8GBQdDb250YWN0HwcFDn4vY29udGFjdC5hc3B4ZGQCEQ9kFgICAQ9kFgICAw9kFgJmD2QWBAIBDxBkZBYBZmQCBw8WBB4KQ29udGV4dEtleQUVUmVmZXJlbmNlfmVuLWdifkZhbHNlHg1Vc2VDb250ZXh0S2V5Z2QCEw9kFggCAQ9kFg4CAQ8PFgIeB1Zpc2libGVoZGQCAw9kFgJmD2QWAgIDDw8WAh8MZ2RkAgUPDxYCHwxoZGQCCRA8KwANAgAPFgIfDGhkDBQrABwFfDA6MCwwOjEsMDoyLDA6MywwOjQsMDo1LDA6NiwwOjcsMDo4LDA6OSwwOjEwLDA6MTEsMDoxMiwwOjEzLDA6MTQsMDoxNSwwOjE2LDA6MTcsMDoxOCwwOjE5LDA6MjAsMDoyMSwwOjIyLDA6MjMsMDoyNCwwOjI1LDA6MjYUKwACFgYfBgVyPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZmY0YzBiJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIENPT0tJTkc8L3NwYW4+HgdUb29sVGlwBQktIENPT0tJTkcfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPVA0N0tIQzc0ODRkFCsAAhYGHwYFigE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNlMThjNDUnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gSE9UIFNOQUNLUyAtIFBBTklOSSAtIEZBU1QgRk9PRDwvc3Bhbj4fDQUhLSBIT1QgU05BQ0tTIC0gUEFOSU5JIC0gRkFTVCBGT09EHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD04M0RDM0Y2Q0FEZBQrAAIWBh8GBZMBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZWNhZTc1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEZSRU5DSCBGUklFUyAtIFJPQVNUSU5HIC0gR1JJTExJTkcgJiBCQlE8L3NwYW4+Hw0FKi0gRlJFTkNIIEZSSUVTIC0gUk9BU1RJTkcgLSBHUklMTElORyAmIEJCUR8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9RTY0NDk0MzdDM2QUKwACFgYfBgV4PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZjNjYmEzJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEFTSUFOIENPT0tJTkc8L3NwYW4+Hw0FDy0gQVNJQU4gQ09PS0lORx8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9OTc4QTE0QzhFNGQUKwACFgYfBgWJATxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6I2ZiZTZkMSc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBTVEVBTSAtIENPTlZFQ1RJT04gLSBNSUNST1dBVkU8L3NwYW4+Hw0FIC0gU1RFQU0gLSBDT05WRUNUSU9OIC0gTUlDUk9XQVZFHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD02N0ZENkIzNjQ2ZBQrAAIWBh8GBXc8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNmYzM0MjgnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gQ09PSyAmIENISUxMPC9zcGFuPh8NBQ4tIENPT0sgJiBDSElMTB8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9TzBRTDhLSDA4VmQUKwACFgYfBgWNATxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6I2UxN2Y1ZCc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBSRUdFTkVSQVRJT04gLSBWQUNVVU0gLSBCQU5RVUVUSU5HPC9zcGFuPh8NBSQtIFJFR0VORVJBVElPTiAtIFZBQ1VVTSAtIEJBTlFVRVRJTkcfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPUU4NDM5Q0U0QTBkFCsAAhYGHwYFdzxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6IzAyOTQ3ZSc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBESVNIIFdBU0hFUlM8L3NwYW4+Hw0FDi0gRElTSCBXQVNIRVJTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD03OE1YQk1KRkdLZBQrAAIWBh8GBXI8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNhMDA4NmQnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gTEFVTkRSWTwvc3Bhbj4fDQUJLSBMQVVORFJZHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD1TWjJOU1ZKUTc4ZBQrAAIWBh8GBYMBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojMDU3M2E1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEdBU1RST05PUk0gUkVGUklHRVJBVElPTjwvc3Bhbj4fDQUaLSBHQVNUUk9OT1JNIFJFRlJJR0VSQVRJT04fBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPUhTVkVROTZYRzRkFCsAAhYGHwYFeDxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6IzAyYTBjNic+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBSRUZSSUdFUkFUSU9OPC9zcGFuPh8NBQ8tIFJFRlJJR0VSQVRJT04fBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPVgxMzY3TTdEOVNkFCsAAhYGHwYFigE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiM2Y2IyZGEnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gU0FORFdJQ0hFUyAtIFNBTEFERVMgLSBTVEFSVEVSUzwvc3Bhbj4fDQUhLSBTQU5EV0lDSEVTIC0gU0FMQURFUyAtIFNUQVJURVJTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD03REU4RUM0RTJDZBQrAAIWBh8GBXY8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiM5NWM3ZTUnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gV0lORSAtIEJFRVI8L3NwYW4+Hw0FDS0gV0lORSAtIEJFRVIfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPTZENDg3NDQzNEFkFCsAAhYGHwYFjAE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNiYmRiZjAnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gU09GVCBEUklOS1MgLSBBTENPSE9MIC0gQ09DS1RBSUxTPC9zcGFuPh8NBSMtIFNPRlQgRFJJTktTIC0gQUxDT0hPTCAtIENPQ0tUQUlMUx8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9Q0RBRTQyMzRCRWQUKwACFgYfBgWHATxkaXYgY2xhc3M9J3B1Y2VfbWVudV9yYXlvbicgc3R5bGU9J2JhY2tncm91bmQtY29sb3I6IzY5N2RiOSc+PC9kaXY+PHNwYW4gY2xhc3M9J2l0ZW1fbWVudV9yYXlvbic+LSBJQ0UgQ1JFQU0gLSBTT1JCRVQgLSBHUkFOSVRBPC9zcGFuPh8NBR4tIElDRSBDUkVBTSAtIFNPUkJFVCAtIEdSQU5JVEEfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPTNUTlQwNkJYOTJkFCsAAhYGHwYFhwE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNiMjI4MTQnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gU0VMRiBTRVJWSUNFIC0gQlVGRkVUIC1UQVBBUzwvc3Bhbj4fDQUeLSBTRUxGIFNFUlZJQ0UgLSBCVUZGRVQgLVRBUEFTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD0zT0tIWDA1NzFXZBQrAAIWBh8GBYYBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZWQ5YTA1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIFBBU1RSWSAtIEJBS0VSWSAtIENIT0NPTEFURTwvc3Bhbj4fDQUdLSBQQVNUUlkgLSBCQUtFUlkgLSBDSE9DT0xBVEUfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPU41RjBUNVpWS1pkFCsAAhYGHwYFhQE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNhMzQ0YTgnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gTUVBVCAtIERFTElDQVRFU1NFTiAtIEZJU0g8L3NwYW4+Hw0FHC0gTUVBVCAtIERFTElDQVRFU1NFTiAtIEZJU0gfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPUE0MTFCODA3Q0FkFCsAAhYGHwYFhAE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNmZjAwMGYnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gUElaWkEgLSBQQVNUQSAtIFRBS0UgQVdBWTwvc3Bhbj4fDQUbLSBQSVpaQSAtIFBBU1RBIC0gVEFLRSBBV0FZHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD01STZYNjZSNzYyZBQrAAIWBh8GBZwBPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojYTY2YjExJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIENPRkZFRSBURUEgLSBWSUVOTkVTRSBQQVNUUklFUyAtSlVJQ0VTIE1JTEsgU0hBS0U8L3NwYW4+Hw0FMy0gQ09GRkVFIFRFQSAtIFZJRU5ORVNFIFBBU1RSSUVTIC1KVUlDRVMgTUlMSyBTSEFLRR8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9MUZERkZQNUgzMmQUKwACFgYfBgV7PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojYzBjYTBlJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIEZPT0QgUFJFUEFSQVRJT048L3NwYW4+Hw0FEi0gRk9PRCBQUkVQQVJBVElPTh8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9NVFKNzQ0MzJTV2QUKwACFgYfBgV5PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojNWQ2MzY3Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIE5FVVRSQUwgLSBJTk9YPC9zcGFuPh8NBRAtIE5FVVRSQUwgLSBJTk9YHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD1ISDI3OTg1Q1pUZBQrAAIWBh8GBX08ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiM0ZWJhYmMnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gQ0xFQU5JTkcgLSBIWUdJRU5FPC9zcGFuPh8NBRQtIENMRUFOSU5HIC0gSFlHSUVORR8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9MU8wN09XMDA2M2QUKwACFgYfBgV/PGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojZmZiMjA1Jz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIFZBQ1VVTSAmIFZFTlRJTEFUSU9OPC9zcGFuPh8NBRYtIFZBQ1VVTSAmIFZFTlRJTEFUSU9OHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD0xSTRDQzcxM0hCZBQrAAIWBh8GBXg8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNiMGIxYmInPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gR04gQ09OVEFJTkVSUzwvc3Bhbj4fDQUPLSBHTiBDT05UQUlORVJTHwcFI34vcmF5b25zLmFzcHg/dmFsdWVfcGF0aD0yNDAxUzk1RDNHZBQrAAIWBh8GBY8BPGRpdiBjbGFzcz0ncHVjZV9tZW51X3JheW9uJyBzdHlsZT0nYmFja2dyb3VuZC1jb2xvcjojYjM4MzEwJz48L2Rpdj48c3BhbiBjbGFzcz0naXRlbV9tZW51X3JheW9uJz4tIERJTk5FUiBTRVJWSUNFIC0gRElTUExBWVMgLSBUUk9MTEVZUzwvc3Bhbj4fDQUmLSBESU5ORVIgU0VSVklDRSAtIERJU1BMQVlTIC0gVFJPTExFWVMfBwUjfi9yYXlvbnMuYXNweD92YWx1ZV9wYXRoPVg5VEY5REY0MzdkFCsAAhYGHwYFjwE8ZGl2IGNsYXNzPSdwdWNlX21lbnVfcmF5b24nIHN0eWxlPSdiYWNrZ3JvdW5kLWNvbG9yOiNmZmNjMDMnPjwvZGl2PjxzcGFuIGNsYXNzPSdpdGVtX21lbnVfcmF5b24nPi0gUkVDRVBUSU9OIC0gUk9PTSBTRVJWSUNFIC0gQlJFQUtGQVNUPC9zcGFuPh8NBSYtIFJFQ0VQVElPTiAtIFJPT00gU0VSVklDRSAtIEJSRUFLRkFTVB8HBSN+L3JheW9ucy5hc3B4P3ZhbHVlX3BhdGg9VVE0M1hMTlRBNWRkZAILDw8WBB8GBRJIaXN0b3JpY2FsIGVzdC9vcmQfDGhkZAIRDxYCHwxoFgJmD2QWAgIDDxBkZBYBZmQCEw9kFgICAQ8PFgQfBgUYSGlzdG9yaXF1ZSBkZXZpcyBjbGllbnRzHwxoZGQCAw9kFhICAw8PFgQfBgUMRmluZCBhbiBJdGVtHwcFHX4vZmFxL3JlY2hlcmNoZXJfYXJ0aWNsZS5hc3B4ZGQCBQ8PFgQfBgUjSG93IHRvIG1ha2UgYSAgZXN0aW1hdGUgLyAgYW4gT3JkZXIfBwUffi9mYXEvZmFpcmVfZGV2aXNfY29tbWFuZGUuYXNweGRkAgcPDxYEHwYFG0ZpbmQgYSAgZXN0aW1hdGUgLyBhbiBvcmRlch8HBSN+L2ZhcS9yZXRyb3V2ZXJfZGV2aXNfY29tbWFuZGUuYXNweGRkAgkPDxYEHwYFHVJlbW92ZSBhbiBpdG1lIG9mIGEgIGVzdGltYXRlHwcFGn4vZmFxL3JldGlyZXJfYXJ0aWNsZS5hc3B4ZGQCCw8PFgQfBgUUVG8gZXJhc2UgYW4gZXN0aW1hdGUfBwUXfi9mYXEvZWZhY2VyX2RldmlzLmFzcHhkZAINDxYCHwIFRH4vYXJ0aWNsZXMuYXNweD9zZWFyY2hfdHlwZT1sZXZlbCZ2YWx1ZV9wYXRoPVA0N0tIQzc0ODQmc2k9YmEmcGFnZT0xFgJmDxYCHwQFFX4vaW1hZ2VzL2VuLWdiL2JhLmpwZ2QCDg8WAh8CBUd+L2FydGljbGVzLmFzcHg/c2VhcmNoX3R5cGU9bGV2ZWwmdmFsdWVfcGF0aD1QNDdLSEM3NDg0JnNpPXByb21vJnBhZ2U9MRYCAgEPFgIfBAUYfi9pbWFnZXMvZW4tZ2IvcHJvbW8uanBnZAIQDxYCHwIFRX4vYXJ0aWNsZXMuYXNweD9zZWFyY2hfdHlwZT1sZXZlbCZ2YWx1ZV9wYXRoPVA0N0tIQzc0ODQmc2k9bmV3JnBhZ2U9MRYCZg8WAh8EBRZ+L2ltYWdlcy9lbi1nYi9uZXcuanBnZAIRDxYCHwIFLX4vYXJ0aWNsZXMuYXNweD9zZWFyY2hfdHlwZT1oaXQmc2k9aGl0JnBhZ2U9MRYCZg8WAh8EBRZ+L2ltYWdlcy9lbi1nYi9oaXQuanBnZAIFDw8WAh8MZ2QWBgIDDxQrAAJkZGQCBQ8UKwACDxYEHgtfIURhdGFCb3VuZGcfAAISZGQWAmYPZBYMAgEPZBYCAgEPZBYIAgEPDxYCHwYFB0lENzAvUE1kZAIDDxYCHwYFLFZFTlRJTEFURUQgUkVGUklHRVJBVE9SIDcwMCBMLiAxIERPT1IgKEdOMi8xZAIFDxYCHwIFHn4vYXJ0aWNsZS5hc3B4P2FfaWQ9MTI1MTI4MzM4MhYCZg8WBB8FBSxWRU5USUxBVEVEIFJFRlJJR0VSQVRPUiA3MDAgTC4gMSBET09SIChHTjIvMR8EBWp+L21lcmNhdG9yX2RhdGEvanBlZy9HRC9DQVRBTE9HVUUgMjAxMC9SRUYgR0FTVFJPTk9STS9NRVJDQVRVUyBQSE9UT1MgMTMtMDctMTIvTk9VVkVBVSBET1NTSUVSL0lENzAtUE0uSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDEuNjI5LDAwIOKCrB8JAgJkZAICD2QWAgIBD2QWCAIBDw8WAh8GBQhEVDE3OC9QTWRkAgMPFgIfBgUoVkVOVC4gUkVGUklHLiBUQUJMRSAzIERPT1JTIEdOMS8xIDQwNSBMLmQCBQ8WAh8CBR5+L2FydGljbGUuYXNweD9hX2lkPTEyNTEyODM0MTYWAmYPFgQfBQUoVkVOVC4gUkVGUklHLiBUQUJMRSAzIERPT1JTIEdOMS8xIDQwNSBMLh8EBVt+L21lcmNhdG9yX2RhdGEvanBlZy9HRC9DQVRBTE9HVUUgMjAxMC9SRUYgR0FTVFJPTk9STS9NRVJDQVRVUyBQSE9UT1MgMTMtMDctMTIvRFQxNzgtUE0uSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDEuOTA5LDAwIOKCrB8JAgJkZAIDD2QWAgIBD2QWCAIBDw8WAh8GBQhBUDFOL0w4NmRkAgMPFgIfBgUrUkVGUklHRVIuIDg1MEwuIDEgRC4gNDB4NjAweDQwMC8yMHggNjAweDgwMGQCBQ8WAh8CBR5+L2FydGljbGUuYXNweD9hX2lkPTEyNTEyODM2MDMWAmYPFgQfBQUrUkVGUklHRVIuIDg1MEwuIDEgRC4gNDB4NjAweDQwMC8yMHggNjAweDgwMB8EBU9+L21lcmNhdG9yX2RhdGEvanBlZy9HRC9DQVRBTE9HVUUgMjAxMC9QQVRJU1NFUklFIFBBSU4gQ0hPQ09MQVQvQVAxTi1MNjQtODYuSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDIuNjg5LDAwIOKCrB8JAgJkZAIED2QWAgIBD2QWCAIBDw8WAh8GBQdEQzUwMi1OZGQCAw8WAh8GBRxESVNILVdBU0hFUiBCQVNLRVQgNTAweDUwMG1tZAIFDxYCHwIFHn4vYXJ0aWNsZS5hc3B4P2FfaWQ9MTI1MTI4NDY0OBYCZg8WBB8FBRxESVNILVdBU0hFUiBCQVNLRVQgNTAweDUwMG1tHwQFQ34vbWVyY2F0b3JfZGF0YS9qcGVnL0dEL0NBVEFMT0dVRSAyMDEwL0xBVkFHRS9GQVNUIFdBU0gvREM1MDItTi5KUEdkAgcPDxYGHwgFE2xfcHJpeF9jbGllbnQgcHJvbW8fBgUMMS41ODksMDAg4oKsHwkCAmRkAgUPZBYCAgEPZBYIAgEPDxYCHwYFB0VGUC80NFJkZAIDDxYCHwYFIUVMRUNUUklDIE9WRU4gMnggNCBQSVpaQVMgMiBST09NU2QCBQ8WAh8CBR5+L2FydGljbGUuYXNweD9hX2lkPTEyNTEyNzgzMDcWAmYPFgQfBQUhRUxFQ1RSSUMgT1ZFTiAyeCA0IFBJWlpBUyAyIFJPT01THwQFTX4vbWVyY2F0b3JfZGF0YS9qcGVnL0dEL0NBVEFMT0dVRSAyMDEwL1BJWlpBIEVUIFBBU1RBL1JVU1RJQyBMSU5FL0VGUC02NlIuSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDEuNjI1LDAwIOKCrB8JAgJkZAIGD2QWAgIBD2QWCAIBDw8WAh8GBQVESzctMmRkAgMPFgIfBgUjSE9PRCBESVNIV0FTSEVSLCAgQkFTS0VUIDUwMHg1MDAgTU1kAgUPFgIfAgUefi9hcnRpY2xlLmFzcHg/YV9pZD0xMjUxMjc1NDA1FgJmDxYEHwUFI0hPT0QgRElTSFdBU0hFUiwgIEJBU0tFVCA1MDB4NTAwIE1NHwQFQX4vbWVyY2F0b3JfZGF0YS9qcGVnL0dEL0NBVEFMT0dVRSAyMDEwL0xBVkFHRS9GQVNUIFdBU0gvREs3LTIuSlBHZAIHDw8WBh8IBRNsX3ByaXhfY2xpZW50IHByb21vHwYFDDIuNTM2LDAwIOKCrB8JAgJkZAIHDxQrAAJkZGQCBw9kFgQCAQ9kFgJmD2QWCgIFDxYCHwxoZAIJDxYCHwxoZAINDxYCHwxoZAIRDw8WBB8GBQxPcGVuIC8gQ2xvc2UfDGhkZAITDw8WAh8MaGRkAgMPPCsACQBkGAMFKWN0bDAwJGNwaF9jZW50ZXIkZHBfYXJ0aWNsZXNfcmF5b25fYm90dG9tDxQrAARkZAIGAhJkBSNjdGwwMCRjcGhfY2VudGVyJGx2X2FydGljbGVzX3JheW9ucw88KwAKAgc8KwAGAAgCEmQFJmN0bDAwJGNwaF9jZW50ZXIkZHBfYXJ0aWNsZXNfcmF5b25fdG9wDxQrAARkZAIGAhJkzw5eBCgUF6HQH+o5L7mrNloYe3w=', 'ctl00$ToolkitScriptManager1': 'ctl00$cph_center$menu_left1$up_login|ctl00$cph_center$menu_left1$lb_login', 'ctl00$cph_center$menu_left1$tb_login': '******', 'ctl00$cph_center$menu_left1$tb_password': '******', 'ctl00$ddl_search_type': 'Reference', 'ctl00_ToolkitScriptManager1HiddenField': ';;AjaxControlToolkit, Version=3.5.40412.0, Culture=neutral, PublicKeyToken=28f01b0e84b6d53e:en-GB:1547e793-5b7e-48fe-8490-03a375b13a33:de1feab2:f2c8e708:720a52bf:f9cec9bc:589eaa30:698129cf:7a92f56c:4a2c8239;', 'hiddenInputToUpdateATBuffer_CommonToolkitScripts': '1' } if self.spider.login('http://www.diamond-europe.com/rayons.aspx', params) is not None: return True return False def scrapBertos(self, retry=0): # self.downloadFile('http://s900.bertos.it/download.php?file=editorcms/documentazione/schede/scheda_13722600.pdf', 'a.pdf') # self.scrapSubCategory('http://s900.bertos.it/en/', '', None, None) # self.scrapProducts('http://s900.bertos.it/en/pasta_cookers/', '', '', None, None) # return self.notifyProduct.emit( '<font color=green><b>Try to get all language links.</b></font>') self.logger.debug(self.mainUrl) data = self.spider.fetchData(self.mainUrl) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) languages = self.regex.getAllSearchedData( '(?i)<div class="[^"]*"><a href="([^"]*)"\s*?class="boxalingua">([^<]*)</a>', data) if languages and len(languages) > 0: self.logger.debug('Total languages: %s' % str(len(languages))) self.notifyProduct.emit('<b>Total languages found[%s]</b>' % str(len(languages))) for language in languages: self.totalProducts = 0 url = language[0] # if str(language[1]).lower() != 'en': # continue urlChunk = self.spider.fetchData(url) if urlChunk and len(urlChunk) > 0: urlChunk = self.regex.reduceNewLine(urlChunk) urlChunk = self.regex.reduceBlankSpace(urlChunk) url = self.regex.getSearchedData( '(?i)<a href="([^"]*)" onmouseover="vedi_po_cat\(2\)\s*?"', urlChunk) csvFile = str( language[1].strip()).lower() + '_' + 'bertos.csv' dupCsvReader = Csv() dupCsvRows = dupCsvReader.readCsvRow(csvFile) csvWriter = Csv(csvFile) if self.csvHeader not in dupCsvRows: dupCsvRows.append(self.csvHeader) csvWriter.writeCsvRow(self.csvHeader) self.notifyProduct.emit( '<font color=green><b>Try to get data for language [%s].</b></font>' % language[1]) self.scrapCategory(url, dupCsvRows, csvWriter) self.notifyProduct.emit( '<font color=red><b>===== Finish scraping data for [%s] =====</b></font><br /><br />' % language[1]) else: if retry < 5: return self.scrapBertos(retry + 1) def scrapCategory(self, mainUrl, dupCsvRows, csvWriter): url = mainUrl self.logger.debug('Main URL: ' + url) self.notifyProduct.emit( '<font color=green><b>Main URL: %s</b></font>' % url) data = self.spider.fetchData(url) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) data = self.regex.reduceNbsp(data) self.notifyProduct.emit('<b>Try to scrap all categories.</b>') categoryChunk = self.regex.getSearchedData( '(?i)<div id="contenuto1">(.*?)</div>\s*?</div>', data) if categoryChunk and len(categoryChunk) > 0: categories = self.regex.getAllSearchedData( '(?i)<a href="([^"]*)"[^>]*?>([^<]*)</a>', categoryChunk) if categories and len(categories) > 0: self.notifyProduct.emit( '<b>Total Categories Found: %s</b>' % str(len(categories))) for category in categories: categoryName = category[1].strip() self.scrapSubCategory( str(category[0]).strip(), categoryName, dupCsvRows, csvWriter) def scrapSubCategory(self, url, categoryName, dupCsvRows, csvWriter): self.logger.debug('Category URL: ' + url) self.notifyProduct.emit('<b>Try to scrap subcategories for: %s</b>' % categoryName) data = self.spider.fetchData(url) if data and len(data) > 0: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) subCategories = self.regex.getAllSearchedData( '(?i)<li\s*?><a href="([^"]*)" title="([^"]*)"', data) if subCategories and len(subCategories) > 0: self.notifyProduct.emit( '<font color=green><b>Total subcategories found %s.</b></font>' % str(len(subCategories))) for subCategory in subCategories: subCategoryName = subCategory[1].strip() self.scrapProducts(subCategory[0].strip(), categoryName, subCategoryName, dupCsvRows, csvWriter) def downloadFile(self, url, downloadPath, retry=0): print url self.notifyProduct.emit('<b>File URL: %s.</b>' % url) try: socket.setdefaulttimeout(10) opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0)) opener.addheaders = [( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:14.0) Gecko/20100101 Firefox/14.0.1' )] urllib2.install_opener(opener) # resp = opener.open(url, timeout=30) # resp = urllib2.urlopen(url, timeout=30) resp = None try: # resp = urllib.urlopen(url) resp = opener.open(url, timeout=30) except Exception, x: print x if resp is None: return False # if resp.info()['Connection'] == 'close' or resp.getcode() != 200: # if retry < 3: # self.notifyProduct.emit('<font color=red><b>Failed to download file. Retrying...</b></font>') # return self.downloadFile(url, downloadPath, retry + 1) # else: # self.notifyProduct.emit('<font color=red><b>Failed to download file after 3 retry.</b></font>') # return print resp.info() print 'info.......' contentLength = resp.info()['Content-Length'] contentLength = self.regex.getSearchedData('(?i)^(\d+)', contentLength) totalSize = float(contentLength) directory = os.path.dirname(downloadPath) if not os.path.exists(directory): try: os.makedirs(directory) except Exception, x: print x dl_file = open(downloadPath, 'wb') currentSize = 0 CHUNK_SIZE = 32768 totalSizeKB = totalSize / 1024 if totalSize > 0 else totalSize print 'everything ok............' while True: data = None try: data = resp.read(CHUNK_SIZE) except Exception, x: print x if not data: break currentSize += len(data) dl_file.write(data) print('============> ' + \ str(round(float(currentSize * 100) / totalSize, 2)) + \ '% of ' + str(totalSize) + ' bytes') notifyDl = '===> Downloaded ' + str( round(float(currentSize * 100) / totalSize, 2)) + '% of ' + str(totalSizeKB) + ' KB.' self.notifyProduct.emit('<b>%s</b>' % notifyDl) if currentSize >= totalSize: dl_file.close() return True
class WebPageToPdf(QObject): threadPdfStatusBar = QtCore.pyqtSignal(object) threadPdfWritingStatus = QtCore.pyqtSignal(object) threadPdfWritingDone = QtCore.pyqtSignal(int) def __init__(self): QObject.__init__(self) self.regex = Regex() self.title = '' self.webView = QWebView() self.webView.settings().setAttribute(QWebSettings.AutoLoadImages, True) self.webView.settings().setAttribute(QWebSettings.JavascriptEnabled, True) self.webView.settings().setAttribute(QWebSettings.PluginsEnabled, True) self.webView.settings().setAttribute(QWebSettings.DeveloperExtrasEnabled, True) self.pdfPrinter = QPrinter() self.webView.loadFinished.connect(self.convertToPdf) def setupDefaultPdfPrinter(self, fileName): self.pdfPrinter.setOrientation(QPrinter.Portrait) self.pdfPrinter.setPageSize(QPrinter.A4) self.pdfPrinter.setOutputFormat(QPrinter.PdfFormat) self.pdfPrinter.setOutputFileName(fileName) def printWebHtmlToPdf(self, url, filePath, fileName, groupType): self.tempPdfFile = filePath + 'out.pdf' self.filePath = filePath self.fileName = fileName self.url = url self.groupType = groupType self.setupDefaultPdfPrinter(self.tempPdfFile) self.threadPdfStatusBar.emit('Fetching Data From Web. Please Wait...') # self.threadPdfWritingStatus.emit( # '<font size=4 color=green><b>Method "%s": </b></font><font color=green><b>Fetching Data From Web for</b> %s<b>.<br />Please Wait...</b></font>' % ( # self.groupType, self.url)) self.threadPdfWritingStatus.emit( '<font color=green><b>Fetching Data From Web for</b> %s<b>.<br />Please Wait...</b></font>' % self.url) self.webView.load(QUrl(url)) self.title = self.webView.title() def convertToPdf(self): print 'Generating Pdf' # self.threadPdfWritingStatus.emit( # '<font size=4><b>Method "%s": </b></font><b>Generating Pdf for</b> %s<b>. Please Wait...</b>' % ( # self.groupType, self.url)) self.threadPdfWritingStatus.emit( '<b>Generating Pdf for</b> %s<b>. Please Wait...</b>' % self.url) self.threadPdfStatusBar.emit('Generating Pdf. Please Wait...') self.webView.print_(self.pdfPrinter) print 'Generated Pdf' # self.threadPdfWritingStatus.emit( # '<font size=4><b>Method "%s": </b></font><b>Generated Pdf for</b> %s<b>. Please Wait...</b>' % ( # self.groupType, self.url)) self.threadPdfWritingStatus.emit( '<b>Generated Pdf for</b> %s<b>. Please Wait...</b>' % self.url) self.threadPdfStatusBar.emit('Generated Pdf.') self.mergePdf() self.threadPdfWritingDone.emit(True) def mergePdf(self): # self.threadPdfWritingStatus.emit( # '<font size=4><b>Method "%s": </b></font><b>Setting Title for</b> %s<b>. Please Wait...</b><br />' % ( # self.groupType, self.url)) self.threadPdfWritingStatus.emit( '<b>Setting Title for</b> %s<b>. Please Wait...</b><br />' % self.url) packet = StringIO() # create a new PDF with Reportlab pdfCanvas = canvas.Canvas(packet, pagesize=A4) pdfCanvas.setFont('Helvetica', 8) if len(self.title) is 0: self.title = str(self.url).split('/')[-1] self.title = self.regex.getSearchedData('(?i)([a-zA-Z0-9-_ ]*?)\.[a-zA-Z0-9_]*$', self.title) self.title = self.regex.replaceData('(?i)_', ' ', self.title) title = unicode(self.title[:57] + '...') if (len(self.title) > 60) else unicode(self.title) url = self.url[:57] + '...' if (len(self.title) > 60) else self.url pdfCanvas.drawString(5, 830, title + ' ' + str(url).lower()) d = datetime.datetime.now() strDate = str(d.strftime("%Y-%m-%d %H-%M-%S %p")) pdfCanvas.drawString(420, 5, 'Created Date Time: ' + strDate) pdfCanvas.save() packet.seek(0) newPdf = PdfFileReader(packet) if not os.path.exists(self.tempPdfFile): return self.printWebHtmlToPdf(self.url, self.filePath, self.fileName) writer = PdfFileWriter() tmpPdfFile = file(self.tempPdfFile, 'rb') reader = PdfFileReader(tmpPdfFile) for i in range(0, (reader.getNumPages())): page = reader.getPage(i) page.mergePage(newPdf.getPage(0)) # page = newPdf.getPage(0) # page.mergePage(reader.getPage(i)) writer.addPage(page) print 'Filename: ' + self.fileName outputStream = file(self.filePath + self.fileName, "wb") writer.write(outputStream) outputStream.close() tmpPdfFile.close() os.remove(str(self.tempPdfFile))
class MainForm(QMainWindow): def __init__(self, parent=None): super(MainForm, self).__init__(parent) self.regex = Regex() self.alreadyClickedA = False self.alreadyClickedB = False self.fileDir = None self.fileDirB = None self.fileName = None self.fileNameB = None self.totalUrlA = 0 self.totalUrlB = 0 self.currentUrlA = 0 self.currentUrlB = 0 self.pdfCounter = 1 self.pdfCounterB = 1 self.typeName = 'B' self.setupUI() def setupUI(self): self.isActionEvent = False ## Web URL self.labelUrl = QLabel( '<font size=4><b>Select text File with url List: </b></font>') self.labelUrl.setAlignment(QtCore.Qt.AlignRight | QtCore.Qt.AlignVCenter) self.labelUrl.setFixedWidth(200) self.btnUrlList = QPushButton('&Browse') self.btnUrlList.setFont(QFont('Helvetica', 8, QFont.Bold)) self.btnUrlList.setFixedWidth(100) self.btnUrlList.clicked.connect(self.urlListSelected) self.labelSelectedUrl = QLabel() self.labelSelectedUrl.setAlignment(QtCore.Qt.AlignLeft | QtCore.Qt.AlignVCenter) layoutUrl = QHBoxLayout() layoutUrl.addWidget(self.btnUrlList) layoutUrl.addWidget(self.labelSelectedUrl) ## File Path self.labelPdfPath = QLabel( '<font size=4><b>Select Pdf Path: </b></font>') self.labelPdfPath.setAlignment(QtCore.Qt.AlignRight | QtCore.Qt.AlignVCenter) self.labelPdfPath.setFixedWidth(200) self.btnOpenPdfDir = QPushButton('&Browse') self.btnOpenPdfDir.setFont(QFont('Helvetica', 8, QFont.Bold)) self.btnOpenPdfDir.setFixedWidth(100) self.btnOpenPdfDir.clicked.connect(self.pdfPathSelected) self.labelSelectedPath = QLabel() self.labelSelectedPath.setAlignment(QtCore.Qt.AlignLeft | QtCore.Qt.AlignVCenter) layoutPath = QHBoxLayout() layoutPath.addWidget(self.btnOpenPdfDir) layoutPath.addWidget(self.labelSelectedPath) self.labelGrouping = QLabel( '<font size=4><b>"Raw Numbering" and "Group Similar URLs" (A and B): </b></font>' ) self.labelGrouping.setAlignment(QtCore.Qt.AlignRight | QtCore.Qt.AlignVCenter) self.comboGrouping = QComboBox() self.comboGrouping.setFont(QFont('Helvetica', 8, QFont.Bold)) self.comboGrouping.setFixedWidth(100) self.comboGrouping.addItem('B') self.comboGrouping.addItem('A') self.comboGrouping.activated[str].connect(self.onActivated) layoutComboGrouping = QHBoxLayout() layoutComboGrouping.addWidget(self.comboGrouping) # layoutComboGrouping.addWidget(self.btnGroupingHelp) self.btnPrintPdf = QPushButton('&Start') self.btnPrintPdf.setFixedWidth(100) self.btnPrintPdf.setFont(QFont('Helvetica', 8, QFont.Bold)) self.btnPrintPdf.clicked.connect(self.printPdfAction) self.btnClear = QPushButton('&Clear Results') self.btnClear.setFont(QFont('Helvetica', 8, QFont.Bold)) self.btnClear.setFixedWidth(100) self.btnClear.clicked.connect(self.clearAll) self.btnGroupingHelp = QPushButton('&Help') self.btnGroupingHelp.setFont(QFont('Helvetica', 8, QFont.Bold)) self.btnGroupingHelp.setFixedWidth(100) self.btnGroupingHelp.clicked.connect(self.groupingHelpAction) layoutAction = QHBoxLayout() layoutAction.addWidget(self.btnPrintPdf) layoutAction.addWidget(self.btnClear) layoutAction.addWidget(self.btnGroupingHelp) layoutTop = QGridLayout() layoutTop.addWidget(self.labelUrl, 0, 0) layoutTop.addLayout(layoutUrl, 0, 1, Qt.AlignLeft) layoutTop.addWidget(self.labelPdfPath, 1, 0) layoutTop.addLayout(layoutPath, 1, 1, Qt.AlignLeft) # layoutTop.addWidget(self.labelGrouping, 2, 0) # layoutTop.addLayout(layoutComboGrouping, 2, 1, Qt.AlignLeft) # layoutTop.addWidget(self.btnClear, 3, 0, Qt.AlignRight) layoutTop.addLayout(layoutAction, 2, 1, Qt.AlignLeft) ## Bottom Portion self.labelProStatusA = QLabel() self.labelProStatusB = QLabel() self.labelWebAddress = QLabel('<b>Current URL Being Processed:</b>') self.lineEditWebAddress = QLineEdit() self.lineEditWebAddress.setReadOnly(True) self.labelStatus = QLabel('<b>Pdf Generation Status:</b>') self.textBrowserStatus = QTextBrowser() self.textBrowserStatus.setReadOnly(True) layout = QVBoxLayout() # layout.addLayout(layoutUrl) # layout.addLayout(layoutPath) layout.addLayout(layoutTop) layout.addWidget(self.labelProStatusA) layout.addWidget(self.labelProStatusB) layout.addWidget(self.labelWebAddress) layout.addWidget(self.lineEditWebAddress) layout.addWidget(self.labelStatus) layout.addWidget(self.textBrowserStatus) widget = QWidget() widget.setLayout(layout) self.setCentralWidget(widget) self.statusBar().showMessage(QString("Application Started...."), 500) self.setWindowTitle('PDF Batch Saver') self.setWindowFlags(Qt.WindowCloseButtonHint | Qt.WindowMinimizeButtonHint) screen = QDesktopWidget().screenGeometry() # self.setFixedSize((screen.width() / 2) + 150, (screen.height() / 2) + 150) self.resize((screen.width() / 2) + 150, (screen.height() / 2) + 150) def printPdfAction(self): if self.fileName is not None and self.fileDir is not None and self.alreadyClickedA is False and self.typeName == 'A': self.webToPdf = WebPageToPdf() self.webToPdf.threadPdfStatusBar.connect(self.showStatus) self.webToPdf.threadPdfWritingStatus.connect(self.appendStatus) self.webToPdf.threadPdfWritingDone.connect(self.pdfGenFinished) f = open(self.fileName, 'rb') self.lists = f.readlines() f.close() self.totalUrlA = len(self.lists) self.alreadyClickedA = True self.pdfGenFinished() elif self.fileNameB is not None and self.fileDirB is not None and self.alreadyClickedB is False and self.typeName == 'B': self.webToPdfB = WebPageToPdf() self.webToPdfB.threadPdfStatusBar.connect(self.showStatus) self.webToPdfB.threadPdfWritingStatus.connect(self.appendStatus) self.webToPdfB.threadPdfWritingDone.connect(self.pdfGenFinishedB) f = open(self.fileNameB, 'rb') self.listsB = f.readlines() f.close() pdfFiles = [ f for f in os.listdir(self.fileDirB) if f.endswith('.pdf') ] if len(pdfFiles) > 0: self.pdfCounterB = int( self.regex.getSearchedData('(?i)^(\d+)_', pdfFiles[-1])) + 1 self.totalUrlB = len(self.listsB) self.alreadyClickedB = True self.startTime = time.clock() self.pdfGenFinishedB() else: QMessageBox.warning( None, 'Warning', 'Please Select your URL List and PDF writing Path.') def pdfGenFinished(self): if self.lists is not None and len(self.lists) > 0: self.currentUrlA += 1 url = self.lists.pop(0) self.lineEditWebAddress.setText(url) url = url.strip() self.labelProStatusA.setText( '<font color="green" size=4><b>For grouping "A": <u> %s </u> total items in the batch, processing <u> %s </u> out of <u> %s </u></b></font>' % (str(self.totalUrlA), str( self.currentUrlA), str(self.totalUrlA))) pdfFile = str(url).split('/')[-1] print 'pdf file : ' + pdfFile pdfFile = self.regex.getSearchedData( '(?i)([a-zA-Z0-9-_ ]*?)\.[a-zA-Z0-9_]*$', pdfFile) pdfFiles = [ f for f in os.listdir(self.fileDir) if f.endswith('.pdf') ] finalPdfFile = '' i = 2 for file in pdfFiles: if self.regex.isFoundPattern('(?i)' + pdfFile, file): index = self.regex.getSearchedData('(?i)(\d+).*?$', file) finalPdfFile = str(index) + '_' + str( pdfFile) + '_copy_' + str(i) + '.pdf' i += 1 if len(finalPdfFile) is 0: finalPdfFile = str(self.pdfCounter) + '_' + pdfFile + '.pdf' else: self.pdfCounter -= 1 self.webToPdf.printWebHtmlToPdf(url, self.fileDir + '/', finalPdfFile, 'A') self.pdfCounter += 1 else: self.showStatus('Pdf Generation Completed') self.alreadyClicked = False self.totalUrlA = 0 self.currentUrlA = 0 # self.labelProStatusA.setText('') def pdfGenFinishedB(self): if self.listsB is not None and len(self.listsB) > 0: self.currentUrlB += 1 url = self.listsB.pop(0) self.lineEditWebAddress.setText(url) url = url.strip() # self.labelProStatusB.setText( # '<font color="green" size=4><b>For grouping "B": <u> %s </u> total items in the batch, processing <u> %s </u> out of <u> %s </u></b></font>' % ( # str( # self.totalUrlB), str(self.currentUrlB), str(self.totalUrlB))) elapsedTime = time.clock() - self.startTime print elapsedTime self.labelProStatusB.setText( '<font size=4><b>URL <u> %s </u> of <u> %s </u> being processed. Time elapsed: %s</b></font>' % (str(self.currentUrlB), str(self.totalUrlB), str(time.strftime('%H:%M:%S', time.gmtime(elapsedTime))))) pdfFile = str(url).split('/')[-1] print 'pdf file : ' + pdfFile # pdfFile = self.regex.getSearchedData('(?i)/([a-zA-Z0-9-_. ]*)$', url) pdfFile = self.regex.getSearchedData( '(?i)([a-zA-Z0-9-_ ]*?)\.[a-zA-Z0-9_]*$', pdfFile) pdfFiles = [ f for f in os.listdir(self.fileDirB) if f.endswith('.pdf') ] # self.pdfCounterB = int(self.regex.getSearchedData('(?i)^(\d+)_', pdfFiles[-1])) finalPdfFile = '' i = 2 for file in pdfFiles: if self.regex.isFoundPattern('(?i)' + pdfFile, file): finalPdfFile = str(self.pdfCounterB) + '_' + str( pdfFile) + '_copy_' + str(i) + '.pdf' i += 1 if len(finalPdfFile) is 0: finalPdfFile = str(self.pdfCounterB) + '_' + pdfFile + '.pdf' self.webToPdfB.printWebHtmlToPdf(url, self.fileDirB + '/', finalPdfFile, 'B') self.labelProStatusB.setText( '<font size=4><b>URL <u> %s </u> of <u> %s </u> being processed. Time elapsed: %s</b></font>' % (str(self.currentUrlB), str(self.totalUrlB), str(time.strftime('%H:%M:%S', time.gmtime(elapsedTime))))) self.pdfCounterB += 1 else: self.showStatus('Pdf Generation Completed') self.alreadyClickedB = False self.totalUrlB = 0 self.currentUrlB = 0 self.fileDirB = None self.fileNameB = None # self.labelProStatusB.setText('') def urlListSelected(self): if self.typeName == 'A': self.fileName = QtGui.QFileDialog.getOpenFileName( self, "Select Text File", QDir.homePath() + "/Desktop") if self.typeName == 'B': self.fileNameB = QtGui.QFileDialog.getOpenFileName( self, "Select Text File", QDir.homePath() + "/Desktop") self.labelSelectedUrl.setText('<b>%s</b>' % str(self.fileNameB)) def pdfPathSelected(self): if self.typeName == 'A': self.fileDir = QtGui.QFileDialog.getExistingDirectory( self, "Select Directory", QDir.homePath() + "/Desktop") self.pdfCounter = 1 if self.typeName == 'B': self.fileDirB = QtGui.QFileDialog.getExistingDirectory( self, "Select Directory", QDir.homePath() + "/Desktop") self.pdfCounterB = 1 self.labelSelectedPath.setText('<b>%s</b>' % str(self.fileDirB)) def onActivated(self, text): self.typeName = text self.pdfCounter = 1 def clearAll(self): self.lineEditWebAddress.clear() self.textBrowserStatus.clear() self.statusBar().showMessage('') self.pdfCounterB = 1 self.labelProStatusB.setText('') self.fileDirB = None self.fileNameB = None def groupingHelpAction(self): QMessageBox.information( None, 'Help Message', 'This program reads a text file of URLs and produces a series of PDFs. If the source text file contains more than one listing of the same URL, the program will create an extra copy of the PDF anyway in the output folder.' ) def appendStatus(self, data): self.textBrowserStatus.append(data) def showStatus(self, data): self.statusBar().showMessage(data)
def __init__(self): self.regex = Regex()
class MyLinkedInMessage(QThread): notifyLinkedIn = pyqtSignal(object) def __init__(self, spider, memberList, subject, message): QThread.__init__(self) # self.spider = Spider() self.spider = spider self.regex = Regex() self.memberList = memberList self.subject = unicode(subject) self.message = unicode(message) def run(self): self.sendMessage() self.notifyLinkedIn.emit( '<font color=red><b>Finish Sending All Messages.</b></font>') def sendMessage(self): print self.memberList for member in self.memberList: messageUrl = 'http://www.linkedin.com/inbox/compose/dialog?insider=true&connId=' + str( member[1]) print messageUrl # messageUrl = 'http://www.linkedin.com/inbox/compose/dialog?insider=true&connId=' + '65471931' # data = self.spider.fetchData('http://www.linkedin.com/inbox/compose/dialog?insider=true&connId=65471931') data = self.spider.fetchData(messageUrl) data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) fromName = self.regex.getSearchedData( '(?i)<input type="hidden" name="fromName" value="([^"]*)"', data) fromEmail = self.regex.getSearchedData( '(?i)<input type="hidden" name="fromEmail" value="([^"]*)"', data) # connectionIds = self.regex.getSearchedData('(?i)<input type="hidden" name="connectionIds" value="([^"]*)"', data) csrfToken = self.regex.getSearchedData( '(?i)<input type="hidden" name="csrfToken" value="([^"]*)"', data) sourceAlias = self.regex.getSearchedData( '(?i)<input type="hidden" name="sourceAlias" value="([^"]*)"', data) linkedInSubject = u'Hi ' + unicode( member[0]).split(' ')[0] + self.subject linkedInMessage = u'Hi ' + unicode( member[0]).split(' ')[0] + u',\n' + self.message print linkedInMessage params = { 'addMoreRcpts': 'false', 'ajaxSubmit': 'Send Message', 'allowEditRcpts': 'true', 'body': linkedInMessage, 'connectionIds': str(member[1]), 'connectionNames': '', 'csrfToken': csrfToken, 'fromEmail': fromEmail, 'fromName': fromName, 'itemID': '', 'openSocialAppBodySuffix': '', 'showRecipeints': 'showRecipeints', 'sourceAlias': sourceAlias, 'st': '', 'subject': linkedInSubject, 'submit': 'Send Message', 'viewerDestinationUrl': '' } print params msgUrl = 'http://www.linkedin.com/msgToConns?displayCreate=' data = self.spider.fetchData(msgUrl, params) data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) if self.regex.isFoundPattern('(?i)<div class="alert success">', data): print 'Message Sent.' self.notifyLinkedIn.emit( '<font color=green><b>Successfully Sent Message To: %s</b></font>' % member[0]) else: self.notifyLinkedIn.emit( '<font color=red><b>Something Wrong during Send Message To</b></font>' % member[0]) # params = {'addMoreRcpts': 'false', # 'ajaxSubmit': 'Send Message', # 'allowEditRcpts': 'true', # 'body': 'Script Test', # 'connectionIds': '65471931', # 'connectionNames': '', # 'csrfToken': 'ajax: 6539671039643459056', # 'fromEmail': '467728216', # 'fromName': 'Mehedi Hasan', # 'itemID': '', # 'openSocialAppBodySuffix': '', # 'showRecipeints': 'showRecipeints', # 'sourceAlias': '0_6k2algZhQ6vbvlhlVSByxRKi0OB9NXjxrnJYWBFvfhn', # 'st': '', # 'subject': 'Script Test', # 'submit': 'Send Message', # 'viewerDestinationUrl': ''} #<input type="hidden" name="fromName" value="Mehedi Hasan" id="fromName-msgForm"> # <input type="hidden" name="showRecipeints" value="showRecipeints" id="showRecipeints-msgForm"> # <input type="hidden" name="fromEmail" value="467728216" id="fromEmail-msgForm"> # <input type="hidden" name="connectionIds" value="65471931" id="connectionIds-msgForm"> # <input type="hidden" name="connectionNames" value="" id="connectionNames-msgForm"> # <input type="hidden" name="allowEditRcpts" value="true" id="allowEditRcpts-msgForm"> # <input type="hidden" name="addMoreRcpts" value="false" id="addMoreRcpts-msgForm"> # <input type="hidden" name="itemID" value="" id="itemID-msgForm"> # <input type="hidden" name="openSocialAppBodySuffix" value="" id="openSocialAppBodySuffix-msgForm"> # <input type="hidden" name="st" value="" id="st-msgForm"> # <input type="hidden" name="viewerDestinationUrl" value="" id="viewerDestinationUrl-msgForm"> # <input type="hidden" name="csrfToken" value="ajax:6539671039643459056" id="csrfToken-msgForm"> # <input type="hidden" name="sourceAlias" value="0_6k2algZhQ6vbvlhlVSByxRKi0OB9NXjxrnJYWBFvfhn" id="sourceAlias-msgForm"> """ msgUrl1 = 'http://www.linkedin.com/msgToConns?displayCreate=' msgParams = {} addMoreRcpts false ajaxSubmit Send Message allowEditRcpts true body fdgdfgdfgdfg dg d connectionIds 57414219 connectionNames csrfToken ajax:3480949306085123249 fromEmail 467728216 fromName Mehedi Hasan goback .con.npv_57414219_*1_*1_name_r5tN_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1 itemID openSocialAppBodySuffix showRecipeints showRecipeints sourceAlias 0_6k2algZhQ6vbvlhlVSByxRKi0OB9NXjxrnJYWBFvfhn st subject viewerDestinationUrl """ """addMoreRcpts false
class NisbetProduct(QtCore.QThread): scrapProductData = QtCore.pyqtSignal(object) stopThread = QtCore.pyqtSignal(int) def __init__(self): QtCore.QThread.__init__(self) self.isExiting = False self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() dupCsvReader = Csv() self.dupCsvRows = dupCsvReader.readCsvRow('nisbets.csv', 0) self.csvWriter = Csv('nisbets.csv') self.mainUrl = 'http://www.nisbets.co.uk' csvHeaderList = [ 'URL', 'Product Code', 'Product Technical Specifications', 'Product Name', 'Brand', 'Product Price', 'Product Short Description', 'Product Long Description', 'Image File Name', 'User Manual File Name', 'Exploded View File Name', 'Spares Code', 'Accessories', 'Product Status' 'Category1', 'Category2', 'Category3', 'Category4' ] if 'URL' not in self.dupCsvRows: self.csvWriter.writeCsvRow(csvHeaderList) self.dupCsvRows.append(csvHeaderList[0]) self.utils = Utils() def run(self): self.scrapData() def stop(self): self.isExiting = True def scrapData(self): if self.isExiting: return self.scrapProductData.emit( '<font color=green><b>Main URL: </b>%s</font>' % self.mainUrl) self.logger.debug('===== URL [' + self.mainUrl + '] =====') data = self.spider.fetchData(self.mainUrl) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) category1Chunk = self.regex.getAllSearchedData( '(?i)<li id="li-id-\d+">(.*?)</ul> </li>', data) if category1Chunk: for category1Data in category1Chunk: category1 = self.regex.getSearchedData( '(?i)<a href="[^"]*">([^<]*)</a>', category1Data) category2Chunk = self.regex.getAllSearchedData( '(?i)<li><a href="([^"]*)">([^<]*)</a>', category1Data) if category2Chunk: for category2Data in category2Chunk: self.scrapCategory2Data( self.mainUrl + category2Data[0], category1, category2Data[1]) self.scrapProductData.emit( '<font color=red><b>Finish Scraping Product data from %s</b></font>' % self.mainUrl) def scrapCategory2Data(self, url, category1, category2): if self.isExiting: return self.scrapProductData.emit('<b>Category 2 URL: </b>%s' % url) self.logger.debug('== Category 2 URL [' + url + '] ==') data = self.spider.fetchData(url) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) category3Chunks = self.regex.getSearchedData( '(?i)<ul class="topCat clear-fix">(.*?)</ul>', data) if category3Chunks: category3Chunk = self.regex.getAllSearchedData( '(?i)<a href="([^"]*)">([^<]*)<', category3Chunks) if category3Chunk: for category3Data in category3Chunk: self.scrapCategory3Data( self.mainUrl + category3Data[0], category1, category2, category3Data[1]) def scrapCategory3Data(self, url, category1, category2, category3): if self.isExiting: return self.scrapProductData.emit('<b>Category 3 URL: </b>%s' % url) self.logger.debug('== Category 3 URL [' + url + '] ==') data = self.spider.fetchData(url) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) category4Chunks = self.regex.getSearchedData( '(?i)<ul class="topCat clear-fix">(.*?)</ul>', data) if category4Chunks: category4Chunk = self.regex.getAllSearchedData( '(?i)<a href="([^"]*)">([^<]*)<', category4Chunks) if category4Chunk: for category4Data in category4Chunk: category4Url = self.mainUrl + category4Data[0] self.scrapCategory4Data(category4Url, category1, category2, category3, category4Data[1]) def scrapCategory4Data(self, url, category1, category2, category3, category4): if self.isExiting: return self.scrapProductData.emit('<b>Category 4 URL: </b>%s' % url) self.logger.debug('== Category 4 URL [' + url + '] ==') data = self.spider.fetchData(url) if data: data = self.regex.reduceNewLine(data) data = self.regex.reduceBlankSpace(data) categoryChunk = self.regex.getAllSearchedData( '(?i)<div class="product-list-row clear-after">(.*?)</fieldset>', data) if categoryChunk: for categoryData in categoryChunk: if self.isExiting: return productInfo = self.regex.getSearchedDataGroups( '(?i)<h3 class="product-name"> <a href="([^"]*)"[^>]*?>([^<]*)</a>', categoryData) productUrl = self.mainUrl + productInfo.group(1) productName = productInfo.group(2) if productUrl not in self.dupCsvRows: self.dupCsvRows.append(productUrl) else: self.scrapProductData.emit( '<font color=green><b>Already exists this item in csv Skip it</b></font>' ) self.logger.debug( '========= Already exists this item Skip it ===========' ) return productImageInfo = self.regex.getSearchedDataGroups( '(?i)<img class="primaryImage" src="([^"]*)" alt="([^"]*)"', categoryData) image = self.regex.replaceData( '(?i)medium', 'xlarge', str(productImageInfo.group(1))) productImageUrl = self.mainUrl + image productImage = self.regex.getSearchedData( '(?i)/([a-zA-Z0-9-_.]*)$', image) self.utils.downloadFile(productImageUrl, 'images/' + productImage) productCode = productImageInfo.group(2) productTechSpecs = self.regex.getSearchedData( '(?i)<p class="description">([^<]*)</p>', categoryData) brandName = self.regex.getSearchedData( '(?i)<img class="brand-image" src="[^"]*" alt="([^"]*)"', categoryData) price = self.regex.getSearchedData( '(?i)<div class="reduced-price"> <span class="[^"]*">([^<]*)</span>', categoryData) if price: price = price.strip()[1:] productStatus = self.regex.getSearchedData( '(?i)<div class="availibility"> <img alt="([^"]*)"', categoryData) productDesc = '' productLongDesc = '' spareCodes = '' accessoryCode = '' userManual = '' explodedView = '' self.scrapProductData.emit( '<br /><font color=green><b>Product Details URL: </b>%s</font>' % productUrl) productChunk = self.spider.fetchData(productUrl) if productChunk: productChunk = self.regex.reduceNewLine(productChunk) productChunk = self.regex.reduceBlankSpace( productChunk) productDesc = self.regex.getSearchedData( '(?i)<div class="productDesc"> <h1 class="[^"]*"[^>]*?>[^<]*?</h1>.*?<p>([^<]*)</p>', productChunk) productLongDesc = self.regex.getSearchedData( '(?i)<div class="info-product[^>]*?>(.*?)</div>', productChunk) otherUrl = self.regex.getSearchedData( '(?i)(^.*?/)[a-zA-Z0-9._-]*?$', productUrl) self.logger.debug('== Common Product URL [' + otherUrl + '] ==') sparesUrl = otherUrl + "AjaxProductSpares.raction" self.logger.debug('== Spares URL [' + sparesUrl + '] ==') spares = self.spider.fetchData(sparesUrl) if spares: spares = self.regex.getAllSearchedData( '(?i)<p class="code"><span class="bold">Code:</span>([^<]*)</p>', spares) if spares: spareCodes = ', '.join(spares) accessoriesUrl = otherUrl + "AjaxProductAccessories.raction" self.logger.debug('== Accessories URL [' + accessoriesUrl + '] ==') accessories = self.spider.fetchData(accessoriesUrl) if accessories: accessories = self.regex.getAllSearchedData( '(?i)<p class="code"><span class="bold">Code:</span>([^<]*)</p>', accessories) if accessories: accessoryCode = ', '.join(accessories) docUrl = otherUrl + "AjaxProductDocuments.raction" self.logger.debug('== Document URL[' + docUrl + '] ==') userManuals = self.spider.fetchData(docUrl) if userManuals: userManual = self.regex.getSearchedData( '(?i)<a class="document-icon" href="([^"]*)"[^>]*?>Download User Manual</a>', userManuals) self.logger.debug('Manual URL: ' + userManual) if userManual: userManualUrl = self.mainUrl + self.regex.replaceData( ' ', '%20', userManual) self.logger.debug('User Manual URL: ' + userManualUrl) self.scrapProductData.emit( '<b>User Manual PDF URL: </b>%s' % userManualUrl) userManual = self.regex.getSearchedData( '(?i)/([a-zA-Z0-9-_. ]*)$', userManual) userManual = self.regex.replaceData( '\s+', '_', userManual.strip()) self.scrapProductData.emit( '<font color=green><b>Downloading User Manual: </b>%s <b>Please Wait...</b>' % userManual) self.utils.downloadFile( userManualUrl, 'user_manual/' + userManual) explodedView = self.regex.getSearchedData( '(?i)<a class="document-icon" href="([^"]*)"[^>]*?>Download Exploded Diagram</a>', userManuals) if explodedView: explodedViewUrl = self.mainUrl + self.regex.replaceData( ' ', '%20', explodedView) self.scrapProductData.emit( '<b>Exploded Diagram PDF URL: </b>%s' % explodedViewUrl) explodedView = self.regex.getSearchedData( '(?i)/([a-zA-Z0-9-_. ]*)$', explodedView) explodedView = self.regex.replaceData( '\s+', '_', explodedView.strip()) self.scrapProductData.emit( '<font color=green><b>Downloading Exploded Diagram: </b>%s <b>Please Wait...</b>' % explodedView) self.utils.downloadFile( explodedViewUrl, 'exploded_view/' + explodedView) csvData = [ productUrl, productCode, productTechSpecs, productName, brandName, price.strip(), productDesc, productLongDesc, productImage, userManual, explodedView, spareCodes, accessoryCode, productStatus, category1, category2, category3, category4 ] self.csvWriter.writeCsvRow(csvData) self.logger.debug('Scraped data ' + str(csvData)) self.scrapProductData.emit( '<div><b>Scraped Data: </b>%s<br /></div>' % str(csvData))