def sendSites(self): try: if not self.endPoint: self.getEflashData() self.threadPool(self.listDict) return self.data elif 'xml' == self.endPoint: if any(site in self.site for site in self.atom): self.getAtomData() else: self.getProductJsonData() self.getSitemapData() self.threadPool(self.listDict) return self.data elif 'atom' == self.endPoint: self.getAtomData() self.threadPool(self.listDict) return self.data elif 'json' == self.endPoint: self.getProductJsonData() self.threadPool(self.listDict) return self.data elif 'json2' == self.endPoint: self.getProductJson2Data() return self.data except Exception as e: print(u.getDatetime(), self.site, e)
def getEflashData(self): try: r = self.s.get(self.site + f'?_={uuid4().hex}', proxies={'https': 'http://{}'.format(self.p)}, timeout=(1, 5)) r.raise_for_status() tree = etree.HTML(r.content) self.checkEflashIfOos(tree) for products in tree.xpath('//div[@class="grid-view-item"]'): href = self.site + products.xpath('a/@href')[0] updated = None if href not in self.data: self.data[href] = {'href': href, 'updated': updated} key = { 'href': href, 'updated': updated, 'oldStockCount': 'NEW' } cf.ThreadPoolExecutor().submit( self.getVendorAndStock, key, random.choice( self.infoP)).add_done_callback(self.callback) except (requests.Timeout, requests.ConnectionError): pass except requests.HTTPError: if r.status_code == 430: self.badP = self.p except Exception as e: print(u.getDatetime(), self.site, e)
def getSitemapData(self): try: r = self.s.get(self.site + f'sitemap_products_1.xml?_={uuid4().hex}', proxies={'https': 'http://{}'.format(self.p)}, timeout=(1, 5)) r.raise_for_status() tree = ET.fromstring(r.content) for child in tree[1:]: try: title = child[3][1].text if any(keyword in title.lower() for keyword in self.keywords) and not any( keyword in title.lower() for keyword in self.ignoreKW): continue except: pass href = child[0].text updated = child[1].text if href in self.data: if updated > self.data[href]['updated']: self.data[href]['updated'] = updated key = { 'href': href, 'updated': updated, 'vendor': self.data[href]['vendor'], 'oldStockCount': self.data[href]['stockCount'] } cf.ThreadPoolExecutor().submit( self.getVendorAndStock, key, random.choice(self.infoP)).add_done_callback( self.callback) else: self.data[href] = {'href': href, 'updated': updated} key = { 'href': href, 'updated': updated, 'oldStockCount': 'NEW' } cf.ThreadPoolExecutor().submit( self.getVendorAndStock, key, random.choice( self.infoP)).add_done_callback(self.callback) except (requests.Timeout, requests.ConnectionError) as e: pass except requests.HTTPError: if r.status_code == 429 and 'kith' in self.site: self.getKith429() elif r.status_code == 430: self.badP = self.p except Exception as e: print(u.getDatetime(), self.site, e)
def getProductJson2Data(self, page): try: r = self.s.get(self.site + f'products.json?page={str(page)}&_={uuid4().hex}', proxies={'https': 'http://{}'.format(self.p)}, timeout=(1, 1)) r.raise_for_status() for products in r.json()['products']: title = products['title'] if any(keyword in title.lower() for keyword in self.keywords) and not any( keyword in title.lower() for keyword in self.ignoreKW): continue href = '{}products/{}'.format(self.site, products['handle']) updated = products['updated_at'] vendor = products['vendor'].lower() stockCount = 'IN STOCK/HIDDEN' if [ avail['available'] for avail in products['variants'] if avail['available'] ] else 'OOS/HIDDEN' if href in self.data: if updated > self.data[href]['updated']: self.data[href]['updated'] = updated if self.data[href][ 'stockCount'] == 'OOS/HIDDEN' and stockCount == 'IN STOCK/HIDDEN': if any(brand in vendor for brand in self.brands) or len(vendor) < 3: cf.ThreadPoolExecutor().submit( self.sendToSlack, href, updated, products, stockCount, vendor) self.data[href]['stockCount'] = stockCount else: self.data[href] = { 'href': href, 'updated': updated, 'vendor': vendor, 'stockCount': stockCount } if (any(brand in vendor for brand in self.brands) or len(vendor) < 3 ) and updated.split('T')[0] >= u.getDate(): cf.ThreadPoolExecutor().submit(self.sendToSlack, href, updated, products, stockCount, vendor) except (requests.Timeout, requests.ConnectionError) as e: pass except requests.HTTPError: if r.status_code == 430: self.badP = self.p except Exception as e: print(u.getDatetime(), self.site, e)
def getProductJsonData(self): try: r = self.s.get(self.site + f'products.json?_={uuid4().hex}', proxies={'https': 'http://{}'.format(self.p)}, timeout=(1, 1)) r.raise_for_status() for products in r.json()['products']: title = products['title'] if any(keyword in title.lower() for keyword in self.keywords) and not any( keyword in title.lower() for keyword in self.ignoreKW): continue href = '{}products/{}'.format(self.site, products['handle']) updated = products['updated_at'] if href in self.data: if updated > self.data[href]['updated']: self.data[href]['updated'] = updated key = { 'href': href, 'updated': updated, 'vendor': self.data[href]['vendor'], 'oldStockCount': self.data[href]['stockCount'] } cf.ThreadPoolExecutor().submit( self.getVendorAndStock, key, random.choice(self.infoP)).add_done_callback( self.callback) else: self.data[href] = {'href': href, 'updated': updated} key = { 'href': href, 'updated': updated, 'oldStockCount': 'NEW' } cf.ThreadPoolExecutor().submit( self.getVendorAndStock, key, random.choice( self.infoP)).add_done_callback(self.callback) except (requests.Timeout, requests.ConnectionError) as e: pass except requests.HTTPError: if r.status_code == 430: self.badP = self.p except Exception as e: print(u.getDatetime(), self.site, e)
def run(self): if not self.endPoint: methodToRun = self.runEflash elif self.endPoint == 'xml': methodToRun = self.runSitemap elif self.endPoint == 'atom': methodToRun = self.runAtom elif self.endPoint == 'json': methodToRun = self.runJson elif self.endPoint == 'json2': methodToRun = self.runJson2 while True: try: methodToRun() self.misc() except Exception as e: print(u.getDatetime(), self.site, e)
proxies = u.proxies() random.shuffle(proxies) with open('sites.json') as sitemaps_json: start = time.time() print("Attempting to initialize sitemap data...") sitemaps = json.load(sitemaps_json) sitemaps_length = len(sitemaps['sitemaps']) print(str(sitemaps_length) + " sitemap(s) detected.") data = [0 for x in range(sitemaps_length)] for i in range(sitemaps_length): try: data[i] = Initial(sitemaps['sitemaps'][i], proxies[i]).sendSites() print(u.getDatetime(), i, 'Initialized {}'.format(sitemaps['sitemaps'][i]['sitemap']), len(data[i])) # print(data[i]) except Exception as e: print(e) print("Sitemap data initialized.") print(time.time() - start) try: for i in range(sitemaps_length): Process(target=ShopifyMonitor(data[i], sitemaps['sitemaps'][i]).run, name=sitemaps['sitemaps'][i]['name']).start()
def callback(self, x): try: self.data[x.result()['href']] = x.result() except: print(u.getDatetime(), self.site, 'CF callback exception.')
def getHiddenStockSites(self, key, oldStockCount, p): try: sites = self.s.get(key['href'] + f'?_={uuid4().hex}', proxies={'https': 'http://{}'.format(p)}, timeout=5) sites.raise_for_status() r = [line for line in sites.text.split('\n') if '{"id"' in line] if 'kith.com' in self.site: r = r[2][:-1] r = json.loads(r) vendor = r['vendor'].lower() stockCount = 'IN STOCK/HIDDEN' if r['available'] else 'OOS/HIDDEN' if oldStockCount == 'OOS/HIDDEN' and stockCount == 'IN STOCK/HIDDEN': self.sendToSlack(key['href'], key['updated'], r, stockCount, vendor) elif oldStockCount == 'NEW': if (any(brand in vendor for brand in self.brands) or len(vendor) < 3 ) and key['updated'].split('T')[0] >= u.getDate(): self.sendToSlack(key['href'], key['updated'], r, stockCount, vendor) return { 'href': key['href'], 'updated': key['updated'], 'vendor': vendor, 'stockCount': stockCount } except requests.HTTPError as e: print(u.getDatetime(), e) if sites.status_code == 430: return self.getHiddenStockSites( key, oldStockCount, random.choice(u.getInfoProxy())) if sites.status_code == 404: return { 'href': key['href'], 'updated': key['updated'], 'vendor': 'error', 'stockCount': 'OOS/HIDDEN' } else: return { 'href': key['href'], 'updated': key['updated'], 'vendor': 'error', 'stockCount': 'OOS/HIDDEN' if oldStockCount == 'NEW' else oldStockCount } except IndexError as e: print(u.getDatetime(), key['href'], e) return { 'href': key['href'], 'updated': key['updated'], 'vendor': 'error', 'stockCount': 'OOS/HIDDEN' if oldStockCount == 'NEW' else oldStockCount } except Exception as e: print(u.getDatetime(), p, e) return { 'href': key['href'], 'updated': key['updated'], 'vendor': 'error', 'stockCount': 'OOS/HIDDEN' if oldStockCount == 'NEW' else oldStockCount }
def getHiddenSites(self, key, oldStockCount, p): try: sites = self.s.get(key['href'] + f'?_={uuid4().hex}', proxies={'https': 'http://{}'.format(p)}, timeout=5) sites.raise_for_status() if self.site in [ 'https://lessoneseven.com/', 'https://www.thegoodlifespace.com/' ]: r = [ line for line in sites.text.split('\n') if '{"' in line ] else: r = [ line for line in sites.text.split('\n') if '{"id"' in line ] if 'trophyroomstore.com' in self.site: r = r[4].split(' = ')[1][:-1] elif 'thedarksideinitiative.com' in self.site: r = r[3].split(' = ')[1].replace(';</script>', '') elif 'featuresneakerboutique.com' in self.site: r = r[2].replace('product: ', '').strip()[:-1] if ' = ' in r: r = r.split(' = ')[1] elif 'notre-shop.com' in self.site or 'alifenewyork.com' in self.site: r = r[2].replace('product: ', '').strip()[:-1] elif 'blendsus.com' in self.site: r = r[2].split('product: ')[1].replace(', onVariantSelected:', '') elif 'octobersveryown.com' in self.site: r = r[2].split(' = ')[1].strip()[:-1] elif 'undefeated.com' in self.site: r = r[2].split('product = ')[1].strip()[:-1] elif 'xhibition.co' in self.site: r = r[3] elif 'hanon-shop.com' in self.site: r = r[2].split('{ product: ')[1].split(', onV')[0] elif 'thegoodlifespace.com' in self.site: r = r[0].split('="')[1].split('"')[0].replace('"', '"') elif 'lessoneseven.com' in sites.url: r = r[0].split('"')[1].replace('"', '"') elif 'doverstreetmarket.com' in self.site or 'deadstock.ca' in self.site or 'stashedsf.com' in self.site or\ 'worldofhombre.com' in self.site: r = r[2] r = json.loads(r) vendor = r['vendor'].lower() stockCount = sum(variant['inventory_quantity'] for variant in r['variants'] if variant['inventory_quantity'] >= 1) if oldStockCount == 0 and stockCount >= 1: self.sendToSlack(key['href'], key['updated'], r, stockCount, vendor) elif oldStockCount == 'NEW': if not self.endPoint or ( (any(brand in vendor for brand in self.brands) or len(vendor) < 3) and key['updated'].split('T')[0] >= u.getDate()): self.sendToSlack(key['href'], key['updated'], r, stockCount, vendor) return { 'href': key['href'], 'updated': key['updated'], 'vendor': vendor, 'stockCount': stockCount } except requests.HTTPError as e: print(u.getDatetime(), e) if sites.status_code == 430: self.getHiddenSites(key, oldStockCount, random.choice(u.getInfoProxy())) if sites.status_code == 404: return { 'href': key['href'], 'updated': key['updated'], 'vendor': 'error', 'stockCount': 0 } else: return { 'href': key['href'], 'updated': key['updated'], 'vendor': 'error', 'stockCount': 0 if oldStockCount == 'NEW' else oldStockCount } except IndexError as e: print(u.getDatetime(), key['href'], e) return { 'href': key['href'], 'updated': key['updated'], 'vendor': 'error', 'stockCount': 0 if oldStockCount == 'NEW' else oldStockCount } except Exception as e: print(u.getDatetime(), p, e) return { 'href': key['href'], 'updated': key['updated'], 'vendor': 'error', 'stockCount': 0 if oldStockCount == 'NEW' else oldStockCount }
def getProdInfo(self, key, oldStockCount, p): try: sites = self.s.get(key['href'] + f'.json?_={uuid4().hex}', proxies={'https': 'http://{}'.format(p)}, timeout=5) sites.raise_for_status() vendor = sites.json()['product']['vendor'].lower() try: stockCount = sum( variant['inventory_quantity'] for variant in sites.json()['product']['variants'] if variant['inventory_quantity'] >= 1) except: stockCount = 'HIDDEN' if (oldStockCount == 0 and stockCount >= 1) or oldStockCount == 'HIDDEN': self.sendToSlack(key['href'], key['updated'], sites.json(), stockCount, vendor) elif oldStockCount == 'NEW': if (any(brand in vendor for brand in self.brands) or len(vendor) < 3 ) and key['updated'].split('T')[0] >= u.getDate(): self.sendToSlack(key['href'], key['updated'], sites.json(), stockCount, vendor) return { 'href': key['href'], 'updated': key['updated'], 'vendor': vendor, 'stockCount': stockCount } except requests.HTTPError as e: print(u.getDatetime(), e) if sites.status_code == 430: return self.getProdInfo(key, oldStockCount, random.choice(u.getInfoProxy())) if sites.status_code == 404: return { 'href': key['href'], 'updated': key['updated'], 'vendor': 'error', 'stockCount': 0 } else: return { 'href': key['href'], 'updated': key['updated'], 'vendor': 'error', 'stockCount': 0 if oldStockCount == 'NEW' else oldStockCount } except IndexError as e: print(u.getDatetime(), key['href'], e) return { 'href': key['href'], 'updated': key['updated'], 'vendor': 'error', 'stockCount': 0 if oldStockCount == 'NEW' else oldStockCount } except Exception as e: print(u.getDatetime(), p, e) return { 'href': key['href'], 'updated': key['updated'], 'vendor': 'error', 'stockCount': 0 if oldStockCount == 'NEW' else oldStockCount }