def log_error(_id, text, saved=False): fail(f'{text} წამოღების დროს დაფიქსირდა შეცდომა') error_db.insert_one({ "product_id":_id, "text": text, "saved":saved, })
def get_features(ar, _data): _loc_data = {"indoor": [], "outdoor": [], "climat_control": []} try: if _data["elevator_1"] != "0": _loc_data["indoor"].append("lift") if _data["bathrooms"] != "0": _loc_data["indoor"].append("dishwasher") if _data["store_type_id"] != "0": _loc_data["indoor"].append("storage_room") if _data["balcony"] != "0": _loc_data["outdoor"].append("balcony") if _data["parking_id"] != "0": _loc_data["outdoor"].append("garage") if _data["yard_size"] != "0": _loc_data["outdoor"].append("outdoor_area") if _data["conditioner"] != "0": _loc_data["climat_control"].append("air_conditioning") if _data["water"] != "0": _loc_data["climat_control"].append("water_tank") if _data["hot_water_id"] != "0": _loc_data["climat_control"].append("solar_hot_water") if _data["hot_water_id"] == "6": _loc_data["climat_control"].append("zonal_heating") if _data["hot_water_id"] == "6": _loc_data["climat_control"].append("heat_pumps") return _loc_data[ar] except: _print.fail(f'line 824: get_features() - ს აქვს რაღაც ხარვეზი') return []
def get_climate_control(req): climate_control = [] try: try: if select_one(req, "#df_field_heating .value",True) == "Central heating system": climate_control.append("zonal_heating") except: log_error(req.url, "ცენტრალური გათბობის", True) try: if select_one(req, "#df_field_heating .value",True) == "Solar heater": climate_control.append("solar_hot_water") except: log_error(req.url, "ცხელი წყლის", True) try: if select_one(req, "#df_field_heating .value",True) == "Tank": climate_control.append("water_tank") except: log_error(req.url, "წყლის რეზერუარი", True) try: if bool(select_one(req, "df_field_technic .checkboxes > .active[title='Air Conditioning']",True)) : climate_control.append("air_conditioning") except: log_error(req.url, "კონდიციონერი", True) except: log_error(req.url, "კლიმატ კონტროლის", True) _print.fail(f'line 124: get_features() - ს აქვს რაღაც ხარვეზი') return climate_control
def get_deal_type(type_id): try: switcher = { "1": "sell", "2": "lease", "3": "rent", "7": "rent", "8": "lease" } return switcher.get(type_id, "") except: _print.fail(f'line 798: get_deal_type() - ს აქვს რაღაც ხარვეზი') return ""
def get_property_type(_type_id): try: switcher = { "1": "land", "2": "houses", "3": "appartments", "4": "commercial_properties", "5": "lease" } return switcher.get(_type_id, "any") except: _print.fail(f'line 839: get_property_type() - ს აქვს რაღაც ხარვეზი') return "any"
def get_product_type(p_type_id): try: switcher = { "1": "appartaments", "2": "houses", "4": "any", "5": "any", "7": "any" } print("------------------------") return switcher.get(p_type_id, "any") except: _print.fail(f'line 784: get_product_type() - ს აქვს რაღაც ხარვეზი') return "any"
def get_ids(daily=False): for _city in city_osm: try: for_page_len = requests.get( links["products_api"] + "?GID={}&cities={}".format( str(_city["osm_id"]), str(_city["osm_id"]))) page_len_json = for_page_len.json() except: _print.fail(f'line 859: for_page_len - ს აქვს რაღაც ხარვეზი') try: page_len = math.ceil( int(page_len_json["Pagination"]["ContentCount"]) / page_len_json["Pagination"]["PerPage"]) except: _print.fail(f'line 864: page_len - ს აქვს რაღაც ხარვეზი') _print.value(_city['name']) for page_index in range(page_len): _print.value('Page ' + str(page_index + 1)) try: product_pages_res = requests.get( links["products_api"] + "?Page=" + str(page_index + 1) + "&GID={}&cities={}".format(str(_city["osm_id"]), str(_city["osm_id"]))) Products = product_pages_res.json()["Prs"]["Prs"] Users = product_pages_res.json()["Prs"]["Users"]["Data"] if is_old_data( datetime.datetime.strptime( Products[len(Products) - 1]["order_date"], "%Y-%m-%d %H:%M:%S")) and daily: break for prs in Products: try: if not is_duplicate(ids_db, "product_id", prs["product_id"]): ids_db.insert({ "product_id": prs["product_id"], "parsed": False, "city": _city["name"] }) _print.ok(f'{prs["product_id"]} პროდუქტი დაემატა') else: _print.warning( f'{prs["product_id"]} არის დუპლიკატი') except: _print.fail( f'line 884: {prs["product_id"]} პროდუქტზე არის რაღაც ხარვეზი' ) except: _print.fail( f'line 887: {page_index+1} გვერდზე არის რაღაც ხარვეზი')
def get_links(): for_len = requests.get(f'https://ss.ge/en/real-estate/l?Page=2&PriceType=false&CurrencyId=1') page_len = string_to_int(Selector(response=for_len).css(_selectors["page_len"]).get())[0] print(page_len) for page_index in range(page_len): products_req = requests.get(f'https://ss.ge/en/real-estate/l?Page={str(page_index)}&PriceType=false&CurrencyId=1') products_link = Selector(response=products_req).css(_selectors["item_links"]).getall() for _link in products_link: _print.value("https://ss.ge"+_link) if not is_duplicate(links_db, "link", "https://ss.ge"+_link): links_db.insert_one({ 'link': "https://ss.ge"+_link, 'parsed': False, "source": "ss.ge" }) else: _print.fail("დუპლიკატი")
def get_links(): for _product_type in product_types: for_len = requests.get(f'https://www.home.ge/en/{_product_type}.html') page_len = string_to_int( Selector(response=for_len).css(_selectors["page_len"]).get())[0] if page_len == 0: page_len = 1 for page_index in range(page_len): products_req = requests.get( f'https://www.home.ge/en/{_product_type}/index{str(page_index)}.html' ) products_link = Selector(response=products_req).css( _selectors["item_links"]).getall() for _link in products_link: _print.value(_link) if not is_duplicate(links_db, "link", _link): links_db.insert_one({'link': _link, 'parsed': False}) else: _print.fail("დუპლიკატი")
def get_products(): for prod in ids_db.find({"parsed": False}): try: product_details_req = requests.get(links["product_details_api"] + prod['product_id']).json() product_details = product_details_req["PrData"] users_details = product_details_req["User"] except: _print.fail('პროდუქტის მოთხოვნა ვერ შესრულდა') continue if len(product_details) == 0: ids_db.delete_one({"product_id": prod['product_id']}) _print.fail('პროდუქტი არ მოიძებნა') continue try: if not is_duplicate(users_db, "user_id", users_details["user_id"]): user_object = users_db.insert_one({ "user_id": users_details["user_id"], "name": (f'{users_details["user_name"]} {users_details["user_surname"]}' ), "gender": 0 if users_details['gender_id'] == "2" else 1, "number_of_posts": users_details["pr_count"], "phone": { "country_code": int(product_details['client_phone'][0:3]), "number": product_details['client_phone'] [3:len(product_details['client_phone'])] }, "email": [users_details['username']], "created_at": datetime.datetime.utcnow(), "social_media": [{ "provider": "skype", "user": users_details['skype'], "addres": "" }] if users_details['skype'] != "" else [] }) user_object_id = user_object.inserted_id _print.ok(f'{users_details["user_id"]} მომხმარებელი დაემატა') else: try: users_db.update_many( {"user_id": users_details["user_id"]}, { "$set": { "number_of_posts": users_details["pr_count"] } }) except: _print.fail( f'line 934: {users_details["user_id"]} მომხმარებელი ვერ განახლდა' ) user_object_id = ObjectId( users_db.find_one({"user_id": users_details["user_id"]})["_id"]) _print.value( f'{users_details["user_id"]} მომხმარებელი განახლდა') except: _print.fail('line 944: მომხმარებელი არ დაემატა') try: order_date = product_details['order_date'] prod_sdate = datetime.date( int(product_details["order_date"][0:4]), int(product_details["order_date"][5:7]), int(product_details["order_date"][8:10])) prod_edate = prod_sdate + MONTH except: _print.fail('line 954: თარიღზე დაფიქსირდა შეცდომა') log_error(prod['product_id'], "თარიღზე დაფიქსირდა შეცდომა") continue try: real_estate_db.insert_one({ "owner_id": user_object_id, "views": product_details_req['Views'], "is_company": True, "post_status": "active", "deal_type": get_deal_type(product_details["adtype_id"]), "type_of_property": [get_product_type(product_details["product_type_id"])], "floor": int( exceptor(product_details["floor"], "სართულის", prod['product_id'])), "floors": int( exceptor(product_details["floors"], "სართულების რაოდენობის", prod['product_id'])), "car_spaces": int( exceptor(product_details["parking_id"], "გარაჟი", prod['product_id'])), "location": [{ "country": { "id": "GE" }, "city": { "id": geo_names(prod["city"], prod['product_id']), "name": prod["city"], "subdivision": "" }, "street": exceptor(product_details["name"], "ქუჩის", prod['product_id']), "address": exceptor(product_details["street_address"], "მისამართის", prod['product_id']), "geo_cord": { "lang": exceptor(product_details["map_lon"], "long კოორდიანის", prod['product_id']), "lat": exceptor(product_details["map_lat"], "lat კოორდიანტის ", prod['product_id']) }, }], "avilable_from": f'{prod_sdate.year}-{prod_sdate.month}-{prod_sdate.day}', "avilable_to": f'{prod_edate.year}-{prod_edate.month}-{prod_edate.day}', "is_urgent": False, "is_agent": bool(product_details["makler_id"]), "detail": { "title": exceptor( PrTitles[ f'{product_details["adtype_id"]}_{product_details["product_type_id"]}_{product_details["estate_type_id"]}'], "სათაურის", prod['product_id']), "houses_rules": "", "description": product_details["comment"] # "description": Translate(product_details["comment"]) }, "created_at": datetime.datetime.utcnow(), "price": { "price_type": "total_price", "fin_price": 0, "fax_price": 0, "fix_price": int(float(product_details["price_value"])), "currency": "GEL" }, "metric": "feet_square", "total_area": int( exceptor(product_details["area_size_value"], "ფართობის", prod['product_id'])), "bedrooms": int( exceptor(product_details["bedrooms"], "საძინებლების რაოდენობის", prod['product_id'])), "bathrooms": int( exceptor(product_details["bathrooms"], "აბაზანის რაოდენობის", prod['product_id'])), "outdoor_features": get_features("outdoor", product_details), "indoor_features": get_features("indoor", product_details), "climat_control": get_features("climat_control", product_details), "phone": { "country_code": int(product_details['client_phone'][0:3]), "number": product_details['client_phone'] [3:len(product_details['client_phone'])] }, "property_type": get_property_type(product_details["product_type_id"]), "files": get_images(product_details["photos_count"], product_details["photo"], product_details["product_id"]), "status": is_new_building(product_details["estate_type_id"]), }) _print.ok(f'{product_details["product_id"]} პროდუქტი დაემატა') ids_db.update({"product_id": prod['product_id']}, {"$set": { "parsed": True }}) remove_error(prod['product_id']) except: _print.fail('line 987: პროდუქტი არ დაემატა') log_error(prod['product_id'], "პროდუქტი არ დაემატა")