def parse_sold(links_blob, sold_blob): data = json.loads(links_blob) sold_data = json.loads(sold_blob) log("Number of links to scan: {}".format(len(data['links']))) result = {} url = None for link in data['links']: try: url = link['link'] html = HtmlUtil(url) soup = html.get_soup() pris = html.get_price() rows = soup.findAll( "span", {"class": "u-capitalize status status--warning u-mb0"}) if rows: status = rows[0].get_text().strip() result['status'] = status JsonUtil(result, link).prepare_json(price=pris) add_sold(result, sold_data) except Exception as e: log("Bad URL {url}: {e}".format(e=e, url=url)) log("Parsing sold finished..!") data = json.dumps(sold_data, indent=4, sort_keys=True, ensure_ascii=False) return data
def parse_visning(link_blob, visning_blob): visning_data = json.loads(visning_blob) data = json.loads(link_blob) log("Number of links to scan: {}".format(len(data['links']))) result = {} for link in data['links']: try: url = link['link'] html = HtmlUtil(url) soup = html.get_soup() rows = soup.findAll("time") for row in rows: time_txt = row.get_text().strip() result['time'] = time_txt JsonUtil(result, link).prepare_json(price=html.get_price()) add_visning(result, visning_data) except Exception as e: log("Bad URL {url}: {e}".format(e=e, url=url)) log("Parsing visnings finished..!") data = json.dumps(visning_data, indent=4, sort_keys=True, ensure_ascii=False) return data
def add_pris(result, pris_data): exists = link_exists(result['link'], pris_data) if not exists: # new link current = datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ') price = dict() price['details'] = {} output = price['details'] print("Adding pris...") JsonUtil(price, result).prepare_json(output=price['details']) # now price list price['price_list'] = [] new_price = {} new_price['price'] = result['price'] new_price['time'] = current price['price_list'].append(new_price) pris_data['links'].append(price) if exists: # just add the price, rest exists at the correct place # look for the link pris_exists = False for p in pris_data['links']: if result['link'] in p['link']: for pris in p['price_list']: if result['price'] in pris['price']: pris_exists = True break if not pris_exists: current = datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ') price = {} price['price'] = result['price'] price['time'] = current p['price_list'].append(price)
def add_title(result, data): current = datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ') exists = link_exists(result['link'], data) if not exists: new_item = {} JsonUtil(new_item, result).prepare_json(price=result['price']) new_item['text'] = result['text'] new_item['time'] = current data['links'].append(new_item)
def add_sold(result, data): exists = link_exists(result['link'], data) if not exists: current = datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ') new_item = dict() new_item['status'] = result['status'] new_item['time'] = current JsonUtil(new_item, result).prepare_json(price=result['price']) data['links'].append(new_item)
def parse_price(link_blob, price_blob): data = json.loads(link_blob) price_blob = json.loads(price_blob) log("Number of links to scan: {}".format(len(data['links']))) for link in data['links']: try: result = {} url = link['link'] html = HtmlUtil(url) JsonUtil(result, link).prepare_json(price=html.get_price()) add_pris(result, price_blob) except Exception as e: log("Bad URL {url}: {e}".format(e=e, url=url)) log("Parsing price finished..!") data = json.dumps(price_blob, indent=4, sort_keys=True, ensure_ascii=False) return data
def add_visning(result, visning_data): exists = link_exists(result['link'], visning_data) if not exists: # new link visning = {} visning['details'] = {} JsonUtil(visning, result).prepare_json(output=visning['details']) visning['details']['price'] = result['price'] # now price list visning['visnings'] = [] new_visning = result['time'] visning['visnings'].append(new_visning) visning_data['links'].append(visning) if exists: for p in visning_data['links']: if result['link'] in p['link']: if not result['time'] in p['visnings']: new_visning = result['time'] p['visnings'].append(new_visning)