def get_ids(daily=False): for _city in city_osm: try: for_page_len = requests.get( links["products_api"] + "?GID={}&cities={}".format( str(_city["osm_id"]), str(_city["osm_id"]))) page_len_json = for_page_len.json() except: _print.fail(f'line 859: for_page_len - ს აქვს რაღაც ხარვეზი') try: page_len = math.ceil( int(page_len_json["Pagination"]["ContentCount"]) / page_len_json["Pagination"]["PerPage"]) except: _print.fail(f'line 864: page_len - ს აქვს რაღაც ხარვეზი') _print.value(_city['name']) for page_index in range(page_len): _print.value('Page ' + str(page_index + 1)) try: product_pages_res = requests.get( links["products_api"] + "?Page=" + str(page_index + 1) + "&GID={}&cities={}".format(str(_city["osm_id"]), str(_city["osm_id"]))) Products = product_pages_res.json()["Prs"]["Prs"] Users = product_pages_res.json()["Prs"]["Users"]["Data"] if is_old_data( datetime.datetime.strptime( Products[len(Products) - 1]["order_date"], "%Y-%m-%d %H:%M:%S")) and daily: break for prs in Products: try: if not is_duplicate(ids_db, "product_id", prs["product_id"]): ids_db.insert({ "product_id": prs["product_id"], "parsed": False, "city": _city["name"] }) _print.ok(f'{prs["product_id"]} პროდუქტი დაემატა') else: _print.warning( f'{prs["product_id"]} არის დუპლიკატი') except: _print.fail( f'line 884: {prs["product_id"]} პროდუქტზე არის რაღაც ხარვეზი' ) except: _print.fail( f'line 887: {page_index+1} გვერდზე არის რაღაც ხარვეზი')
def get_links(): for_len = requests.get(f'https://ss.ge/en/real-estate/l?Page=2&PriceType=false&CurrencyId=1') page_len = string_to_int(Selector(response=for_len).css(_selectors["page_len"]).get())[0] print(page_len) for page_index in range(page_len): products_req = requests.get(f'https://ss.ge/en/real-estate/l?Page={str(page_index)}&PriceType=false&CurrencyId=1') products_link = Selector(response=products_req).css(_selectors["item_links"]).getall() for _link in products_link: _print.value("https://ss.ge"+_link) if not is_duplicate(links_db, "link", "https://ss.ge"+_link): links_db.insert_one({ 'link': "https://ss.ge"+_link, 'parsed': False, "source": "ss.ge" }) else: _print.fail("დუპლიკატი")
def get_links(): for _product_type in product_types: for_len = requests.get(f'https://www.home.ge/en/{_product_type}.html') page_len = string_to_int( Selector(response=for_len).css(_selectors["page_len"]).get())[0] if page_len == 0: page_len = 1 for page_index in range(page_len): products_req = requests.get( f'https://www.home.ge/en/{_product_type}/index{str(page_index)}.html' ) products_link = Selector(response=products_req).css( _selectors["item_links"]).getall() for _link in products_link: _print.value(_link) if not is_duplicate(links_db, "link", _link): links_db.insert_one({'link': _link, 'parsed': False}) else: _print.fail("დუპლიკატი")
def get_products(): for prod in ids_db.find({"parsed": False}): try: product_details_req = requests.get(links["product_details_api"] + prod['product_id']).json() product_details = product_details_req["PrData"] users_details = product_details_req["User"] except: _print.fail('პროდუქტის მოთხოვნა ვერ შესრულდა') continue if len(product_details) == 0: ids_db.delete_one({"product_id": prod['product_id']}) _print.fail('პროდუქტი არ მოიძებნა') continue try: if not is_duplicate(users_db, "user_id", users_details["user_id"]): user_object = users_db.insert_one({ "user_id": users_details["user_id"], "name": (f'{users_details["user_name"]} {users_details["user_surname"]}' ), "gender": 0 if users_details['gender_id'] == "2" else 1, "number_of_posts": users_details["pr_count"], "phone": { "country_code": int(product_details['client_phone'][0:3]), "number": product_details['client_phone'] [3:len(product_details['client_phone'])] }, "email": [users_details['username']], "created_at": datetime.datetime.utcnow(), "social_media": [{ "provider": "skype", "user": users_details['skype'], "addres": "" }] if users_details['skype'] != "" else [] }) user_object_id = user_object.inserted_id _print.ok(f'{users_details["user_id"]} მომხმარებელი დაემატა') else: try: users_db.update_many( {"user_id": users_details["user_id"]}, { "$set": { "number_of_posts": users_details["pr_count"] } }) except: _print.fail( f'line 934: {users_details["user_id"]} მომხმარებელი ვერ განახლდა' ) user_object_id = ObjectId( users_db.find_one({"user_id": users_details["user_id"]})["_id"]) _print.value( f'{users_details["user_id"]} მომხმარებელი განახლდა') except: _print.fail('line 944: მომხმარებელი არ დაემატა') try: order_date = product_details['order_date'] prod_sdate = datetime.date( int(product_details["order_date"][0:4]), int(product_details["order_date"][5:7]), int(product_details["order_date"][8:10])) prod_edate = prod_sdate + MONTH except: _print.fail('line 954: თარიღზე დაფიქსირდა შეცდომა') log_error(prod['product_id'], "თარიღზე დაფიქსირდა შეცდომა") continue try: real_estate_db.insert_one({ "owner_id": user_object_id, "views": product_details_req['Views'], "is_company": True, "post_status": "active", "deal_type": get_deal_type(product_details["adtype_id"]), "type_of_property": [get_product_type(product_details["product_type_id"])], "floor": int( exceptor(product_details["floor"], "სართულის", prod['product_id'])), "floors": int( exceptor(product_details["floors"], "სართულების რაოდენობის", prod['product_id'])), "car_spaces": int( exceptor(product_details["parking_id"], "გარაჟი", prod['product_id'])), "location": [{ "country": { "id": "GE" }, "city": { "id": geo_names(prod["city"], prod['product_id']), "name": prod["city"], "subdivision": "" }, "street": exceptor(product_details["name"], "ქუჩის", prod['product_id']), "address": exceptor(product_details["street_address"], "მისამართის", prod['product_id']), "geo_cord": { "lang": exceptor(product_details["map_lon"], "long კოორდიანის", prod['product_id']), "lat": exceptor(product_details["map_lat"], "lat კოორდიანტის ", prod['product_id']) }, }], "avilable_from": f'{prod_sdate.year}-{prod_sdate.month}-{prod_sdate.day}', "avilable_to": f'{prod_edate.year}-{prod_edate.month}-{prod_edate.day}', "is_urgent": False, "is_agent": bool(product_details["makler_id"]), "detail": { "title": exceptor( PrTitles[ f'{product_details["adtype_id"]}_{product_details["product_type_id"]}_{product_details["estate_type_id"]}'], "სათაურის", prod['product_id']), "houses_rules": "", "description": product_details["comment"] # "description": Translate(product_details["comment"]) }, "created_at": datetime.datetime.utcnow(), "price": { "price_type": "total_price", "fin_price": 0, "fax_price": 0, "fix_price": int(float(product_details["price_value"])), "currency": "GEL" }, "metric": "feet_square", "total_area": int( exceptor(product_details["area_size_value"], "ფართობის", prod['product_id'])), "bedrooms": int( exceptor(product_details["bedrooms"], "საძინებლების რაოდენობის", prod['product_id'])), "bathrooms": int( exceptor(product_details["bathrooms"], "აბაზანის რაოდენობის", prod['product_id'])), "outdoor_features": get_features("outdoor", product_details), "indoor_features": get_features("indoor", product_details), "climat_control": get_features("climat_control", product_details), "phone": { "country_code": int(product_details['client_phone'][0:3]), "number": product_details['client_phone'] [3:len(product_details['client_phone'])] }, "property_type": get_property_type(product_details["product_type_id"]), "files": get_images(product_details["photos_count"], product_details["photo"], product_details["product_id"]), "status": is_new_building(product_details["estate_type_id"]), }) _print.ok(f'{product_details["product_id"]} პროდუქტი დაემატა') ids_db.update({"product_id": prod['product_id']}, {"$set": { "parsed": True }}) remove_error(prod['product_id']) except: _print.fail('line 987: პროდუქტი არ დაემატა') log_error(prod['product_id'], "პროდუქტი არ დაემატა")
def get_products(): for prod in links_db.find({"parsed": False}): req = requests.get(prod["link"]) _print.value(prod["link"]) city = select_one(req, "#df_field_mdebareoba .value", True) bread_crumbs = select_many(req, "#bread_crumbs .point1 li a::text") geonames_id = geo_names(city) deal_type = get_deal_type(bread_crumbs[2], prod['link']) property_type = get_property_type(bread_crumbs[1], prod['link']) status = get_status( select_one(req, "#df_field_built_status .value", True), prod["link"]) street = select_one(req, "#df_field_mdebareoba_level1 .value", True) address = select_one(req, "#df_field_mdebareoba_level2 .value", True) bedrooms = select_one(req, "#df_field_bedrooms .value", True) bathrooms = select_one(req, "#df_field_bathrooms .value", True) total_area = string_to_int( select_one(req, "#df_field_square_feet .value", True))[0] floor = select_one(req, "#df_field_floor .value", True) floors = select_one(req, "#df_field_number_of_floors .value", True) try: _view = int(select_one(req, "#area_listing .count::text")) except: _view = 0 outdoor_features = get_outdoor_features(req) indoor_features = get_indoor_features(req) climate_control = get_climate_control(req) details = [{ "title": select_one(req, "#area_listing > h1"), "house_rules": "", "description": Translate( select_one(req, "#df_field_additional_information .value", True)) }] price = { "price_type": "total_price", "min_price": 0, "max_price": 0, "fix_price": converted_price(select_one(req, "#lm_loan_amount::attr(value)"), prod["link"]), "currency": "USD" } phones = [{ "country_code": 995, "number": converted_price(select_one(req, "#df_field_phone .value a::text"), prod["link"]) }] files = get_images(req) try: real_estate_db.insert_one({ "location": { "country": { "id": "GE" }, "city": { "id": geonames_id, "name": city, "subdivision": "" }, "street": street, "address": address, }, "created_at": datetime.datetime.utcnow(), "deal_type": deal_type, "type_of_property": [property_type], "status": status, "bedrooms": bedrooms, "bathrooms": bathrooms, "total_area": total_area, "metric": "feet_square", "floor": floor, "floors": floors, "car_spaces": 0, "is_agent": True, "outdoor_features": outdoor_features, "indoor_features": indoor_features, "climate_control": climate_control, "detail": details, "price": price, "phones": phones, "files": files, "source": "Home.ge", "view": _view }) links_db.update_one({"link": prod["link"]}, {"$set": { "parsed": True }}) except: log_error(req.url, "პროდუქტის", True)
def get_products(): from translator import Translate for prod in links_db.find({"parsed":False, "source": "ss.ge"0}): req = requests.get(prod["link"]) _print.value(prod["link"]) bread_crumbs = select_many(req, ".detailed_page_navlist ul li a::text") city = bread_crumbs[3] geonames_id = geo_names(city) deal_type = get_deal_type(bread_crumbs[2].strip(), prod['link']) property_type = get_property_type(bread_crumbs[1].strip(), prod['link']) status = get_status(select_one(req, "fieldValueStatusId2::text"), prod["link"]) street = select_one(req, ".StreeTaddressList.realestatestr:text").strip() address = street bedrooms = int(select_many(req, ".ParamsHdBlk text::text")[2]) bathrooms = "" total_area = string_to_int(select_many(req, ".ParamsHdBlk text::text")[0])[0] floor = string_to_int(select_many(req, ".ParamsHdBlk text::text")[3]) floors = string_to_int(select_one(req, ".ParamsHdBlk text text span::text")) try: _view = int(select_one(req, "#.article_views span::text")) except: _view = 0 outdoor_features = get_outdoor_features(req) indoor_features = get_indoor_features(req) climate_control = get_climate_control(req) details = [{ "title": select_one(req, "#area_listing > h1"), "house_rules": "", "description": Translate(select_one(req, "#df_field_additional_information .value", True)) }] price ={ "price_type":"total_price", "min_price":0, "max_price":0, "fix_price": converted_price(select_one(req, "#lm_loan_amount::attr(value)"),prod["link"]), "currency": "USD" } phones = [{ "country_code":995, "number": converted_price(select_one(req, "#df_field_phone .value a::text"),prod["link"]) }] files = get_images(req) pprint({ "location": { "country":{ "id":"GE" }, "city": { "id":geonames_id, "name": city, "subdivision": "" }, "street":street, "address": address, }, "created_at": datetime.datetime.utcnow(), "deal_type": deal_type, "type_of_property": [property_type], "status": status, "bedrooms": bedrooms, "bathrooms": bathrooms, "total_area":total_area, "metric":"feet_square", "floor":floor, "floors":floors, "car_spaces":0, "is_agent": True, "outdoor_features":outdoor_features, "indoor_features":indoor_features, "climate_control":climate_control, "detail":details, "price":price, "phones":phones, "files": files, "source": "Home.ge", "view":_view })