def modelWalker(baseUrl): global prevTrail try: ulog('baseUrl= '+baseUrl) d = pq(url=baseUrl) models = d('.prodContainer:not(#JapaneseProd)') startIdx = getStartIdx() numModels = len(models) ulog('numModels= %s'%numModels) for idx in range(startIdx, numModels): ulog('idx=%s'%idx) try: modelName = [_.strip() for _ in models[idx].text_content().splitlines() if _.strip()][1] except IndexError: ulog('No model name') continue ulog('modelName="%s"'%modelName) onclick = models[idx].attrib['onclick'] href = re.search(r"'(.+)(?<!\\)'", onclick).group(1) prevTrail+=[idx] detailScraper(urlChangePath(d.base_url, href)) prevTrail.pop() except Exception as ex: ipdb.set_trace() traceback.print_exc()
def seriesWalker(baseUrl): global prevTrail try: ulog('baseUrl= '+baseUrl) d = pq(url=baseUrl) seriess = d('.prodContainer a.button') startIdx = getStartIdx() numSeriess = len(seriess) ulog('numSeriess=%s'%numSeriess) for idx in range(startIdx, numSeriess): ulog('idx=%s'%idx) series = seriess[idx] href = series.attrib['href'] prevTrail+=[idx] modelWalker(urlChangePath(d.base_url, href)) prevTrail.pop() except Exception as ex: ipdb.set_trace() traceback.print_exc()