def select_many(req, select): try: _sel = Selector(response=req).css(select).getall() return _sel except: log_error(req.url, f'{select} სელექტორების', True) return None
def get_climate_control(req): climate_control = [] try: try: if select_one(req, "#df_field_heating .value",True) == "Central heating system": climate_control.append("zonal_heating") except: log_error(req.url, "ცენტრალური გათბობის", True) try: if select_one(req, "#df_field_heating .value",True) == "Solar heater": climate_control.append("solar_hot_water") except: log_error(req.url, "ცხელი წყლის", True) try: if select_one(req, "#df_field_heating .value",True) == "Tank": climate_control.append("water_tank") except: log_error(req.url, "წყლის რეზერუარი", True) try: if bool(select_one(req, "df_field_technic .checkboxes > .active[title='Air Conditioning']",True)) : climate_control.append("air_conditioning") except: log_error(req.url, "კონდიციონერი", True) except: log_error(req.url, "კლიმატ კონტროლის", True) _print.fail(f'line 124: get_features() - ს აქვს რაღაც ხარვეზი') return climate_control
def converted_price(price, pr_link): price_value = "" try: for price_index in string_to_int(price): price_value += str(price_index) except: log_error(pr_link, "ფასის ან ნომრის", True) return price_value
def select_one(req, select, comments=False): try: _sel = Selector(response=req).css(select).get() if _sel != None and comments: _sel = _sel.split('-->')[1] _sel = _sel.split('<!--')[0] return _sel except: log_error(req.url, f'{select} სელექტორის', True) return None
def get_status(_type_id, _url): try: switcher = { "New building": "new_building", "Under construction": "under_construction", "Old building": "old_build", } return switcher.get(_type_id, "") except: log_error(_url, f'სტატუსის', True) return ""
def get_property_type(_type_id, _url): try: switcher = { "Land": "land", "Houses and Cottages": "houses", "Apartments": "appartments", "Commercial Real Estate": "commercial_properties", } return switcher.get(_type_id, "any") except: log_error(_url, f'Property Type', True) return "any"
def get_deal_type(_type_id,_url): try: switcher={ "For Rent": "rent", "For Sale": "sell", "Daily rent": "rent", "Lease": "lease", } return switcher.get(_type_id, "any") except: log_error(_url, f'Deal Type', True) return "any"
def geo_names(city,prod_id=""): try: local_city = get_data("cities", {"name": city.capitalize()}) if local_city == None: city_data = requests.get(f"http://api.geonames.org/searchJSON?name_startsWith={city}&maxRows=1&username=giorgi0221").json() g_names = city_data["geonames"][0] insert_db("cities", { "name":g_names["name"], "geoname_id": g_names["geonameId"] }) return g_names["geonameId"] return local_city["geoname_id"] except: log_error(prod_id, f"გეოლოკაციაში დაფიქსირდა შეცდომა, ლოკაცია: {city}", True) return ""
def get_deal_type(_type_id, _url): try: switcher = { "For Rent": "rent", "Apartment for sale": "sell", "Daily Apartment Rent": "rent", "Lease Apartment": "commercial_properties", "House For Sale": "sell", "House For Rent": "rent", "Lease House": "lease", "Daily Rent": "rent", "For Sale ": "sell", "Lease": "lease", "Land For Sale": "sell", "Land For Rent": "rent", "Lease Land ": "lease", } return switcher.get(_type_id, "any") except: log_error(_url, f'Deal Type', True) return "any"
def get_outdoor_features(req): outdoor_features = [] try: try: if bool(select_one(req, ".parameteres_item_each:not(.lacking) .fa-pool")): outdoor_features.append("swimming_pool") except: log_error(req.url, "აუზის", True) try: if bool(select_one(req, ".parameteres_item_each:not(.lacking) .fa-garage")): outdoor_features.append("garage") except: log_error(req.url, "გარაჟის", True) try: if bool(select_one(req, ".parameteres_item_each:not(.lacking) .fa-balcony2")): outdoor_features.append("balcony") except: log_error(req.url, "აივნის", True) except: log_error(req.url, "Outdoor Features", True) return outdoor_features
def get_outdoor_features(req): outdoor_features = [] try: try: if select_one(req, "#df_field_pool .value", True) != "No": outdoor_features.append("swimming_pool") except: log_error(req.url, "აუზის", True) try: if select_one(req, "#df_field_parking_new .value", True) == "Garage": outdoor_features.append("garage") except: log_error(req.url, "გარაჟის", True) try: if select_one(req, "#df_field_balcony_terrace .value", True) != "No": outdoor_features.append("balcony") except: log_error(req.url, "აივნის", True) except: log_error(req.url, "Outdoor Features", True) return outdoor_features
def get_indoor_features(req): indoor_features = [] try: try: if select_one(req, "#df_field_elevator .value",True) != "No": indoor_features.append("lift") except: log_error(req.url, "ლიფტის", True) try: if bool(select_one(req, "#df_field_essentials .checkboxes > .active[title='Dishwasher']",True)) : indoor_features.append("dishwasher") except: log_error(req.url, "აბაზანის", True) except: log_error(req.url, "Indoor Features", True) return indoor_features
def get_products(): for prod in ids_db.find({"parsed": False}): try: product_details_req = requests.get(links["product_details_api"] + prod['product_id']).json() product_details = product_details_req["PrData"] users_details = product_details_req["User"] except: _print.fail('პროდუქტის მოთხოვნა ვერ შესრულდა') continue if len(product_details) == 0: ids_db.delete_one({"product_id": prod['product_id']}) _print.fail('პროდუქტი არ მოიძებნა') continue try: if not is_duplicate(users_db, "user_id", users_details["user_id"]): user_object = users_db.insert_one({ "user_id": users_details["user_id"], "name": (f'{users_details["user_name"]} {users_details["user_surname"]}' ), "gender": 0 if users_details['gender_id'] == "2" else 1, "number_of_posts": users_details["pr_count"], "phone": { "country_code": int(product_details['client_phone'][0:3]), "number": product_details['client_phone'] [3:len(product_details['client_phone'])] }, "email": [users_details['username']], "created_at": datetime.datetime.utcnow(), "social_media": [{ "provider": "skype", "user": users_details['skype'], "addres": "" }] if users_details['skype'] != "" else [] }) user_object_id = user_object.inserted_id _print.ok(f'{users_details["user_id"]} მომხმარებელი დაემატა') else: try: users_db.update_many( {"user_id": users_details["user_id"]}, { "$set": { "number_of_posts": users_details["pr_count"] } }) except: _print.fail( f'line 934: {users_details["user_id"]} მომხმარებელი ვერ განახლდა' ) user_object_id = ObjectId( users_db.find_one({"user_id": users_details["user_id"]})["_id"]) _print.value( f'{users_details["user_id"]} მომხმარებელი განახლდა') except: _print.fail('line 944: მომხმარებელი არ დაემატა') try: order_date = product_details['order_date'] prod_sdate = datetime.date( int(product_details["order_date"][0:4]), int(product_details["order_date"][5:7]), int(product_details["order_date"][8:10])) prod_edate = prod_sdate + MONTH except: _print.fail('line 954: თარიღზე დაფიქსირდა შეცდომა') log_error(prod['product_id'], "თარიღზე დაფიქსირდა შეცდომა") continue try: real_estate_db.insert_one({ "owner_id": user_object_id, "views": product_details_req['Views'], "is_company": True, "post_status": "active", "deal_type": get_deal_type(product_details["adtype_id"]), "type_of_property": [get_product_type(product_details["product_type_id"])], "floor": int( exceptor(product_details["floor"], "სართულის", prod['product_id'])), "floors": int( exceptor(product_details["floors"], "სართულების რაოდენობის", prod['product_id'])), "car_spaces": int( exceptor(product_details["parking_id"], "გარაჟი", prod['product_id'])), "location": [{ "country": { "id": "GE" }, "city": { "id": geo_names(prod["city"], prod['product_id']), "name": prod["city"], "subdivision": "" }, "street": exceptor(product_details["name"], "ქუჩის", prod['product_id']), "address": exceptor(product_details["street_address"], "მისამართის", prod['product_id']), "geo_cord": { "lang": exceptor(product_details["map_lon"], "long კოორდიანის", prod['product_id']), "lat": exceptor(product_details["map_lat"], "lat კოორდიანტის ", prod['product_id']) }, }], "avilable_from": f'{prod_sdate.year}-{prod_sdate.month}-{prod_sdate.day}', "avilable_to": f'{prod_edate.year}-{prod_edate.month}-{prod_edate.day}', "is_urgent": False, "is_agent": bool(product_details["makler_id"]), "detail": { "title": exceptor( PrTitles[ f'{product_details["adtype_id"]}_{product_details["product_type_id"]}_{product_details["estate_type_id"]}'], "სათაურის", prod['product_id']), "houses_rules": "", "description": product_details["comment"] # "description": Translate(product_details["comment"]) }, "created_at": datetime.datetime.utcnow(), "price": { "price_type": "total_price", "fin_price": 0, "fax_price": 0, "fix_price": int(float(product_details["price_value"])), "currency": "GEL" }, "metric": "feet_square", "total_area": int( exceptor(product_details["area_size_value"], "ფართობის", prod['product_id'])), "bedrooms": int( exceptor(product_details["bedrooms"], "საძინებლების რაოდენობის", prod['product_id'])), "bathrooms": int( exceptor(product_details["bathrooms"], "აბაზანის რაოდენობის", prod['product_id'])), "outdoor_features": get_features("outdoor", product_details), "indoor_features": get_features("indoor", product_details), "climat_control": get_features("climat_control", product_details), "phone": { "country_code": int(product_details['client_phone'][0:3]), "number": product_details['client_phone'] [3:len(product_details['client_phone'])] }, "property_type": get_property_type(product_details["product_type_id"]), "files": get_images(product_details["photos_count"], product_details["photo"], product_details["product_id"]), "status": is_new_building(product_details["estate_type_id"]), }) _print.ok(f'{product_details["product_id"]} პროდუქტი დაემატა') ids_db.update({"product_id": prod['product_id']}, {"$set": { "parsed": True }}) remove_error(prod['product_id']) except: _print.fail('line 987: პროდუქტი არ დაემატა') log_error(prod['product_id'], "პროდუქტი არ დაემატა")
def exceptor(value, error, prod_id, saved=True): try: return value except: log_error(prod_id, error, saved) return ""
def get_products(): for prod in links_db.find({"parsed": False}): req = requests.get(prod["link"]) _print.value(prod["link"]) city = select_one(req, "#df_field_mdebareoba .value", True) bread_crumbs = select_many(req, "#bread_crumbs .point1 li a::text") geonames_id = geo_names(city) deal_type = get_deal_type(bread_crumbs[2], prod['link']) property_type = get_property_type(bread_crumbs[1], prod['link']) status = get_status( select_one(req, "#df_field_built_status .value", True), prod["link"]) street = select_one(req, "#df_field_mdebareoba_level1 .value", True) address = select_one(req, "#df_field_mdebareoba_level2 .value", True) bedrooms = select_one(req, "#df_field_bedrooms .value", True) bathrooms = select_one(req, "#df_field_bathrooms .value", True) total_area = string_to_int( select_one(req, "#df_field_square_feet .value", True))[0] floor = select_one(req, "#df_field_floor .value", True) floors = select_one(req, "#df_field_number_of_floors .value", True) try: _view = int(select_one(req, "#area_listing .count::text")) except: _view = 0 outdoor_features = get_outdoor_features(req) indoor_features = get_indoor_features(req) climate_control = get_climate_control(req) details = [{ "title": select_one(req, "#area_listing > h1"), "house_rules": "", "description": Translate( select_one(req, "#df_field_additional_information .value", True)) }] price = { "price_type": "total_price", "min_price": 0, "max_price": 0, "fix_price": converted_price(select_one(req, "#lm_loan_amount::attr(value)"), prod["link"]), "currency": "USD" } phones = [{ "country_code": 995, "number": converted_price(select_one(req, "#df_field_phone .value a::text"), prod["link"]) }] files = get_images(req) try: real_estate_db.insert_one({ "location": { "country": { "id": "GE" }, "city": { "id": geonames_id, "name": city, "subdivision": "" }, "street": street, "address": address, }, "created_at": datetime.datetime.utcnow(), "deal_type": deal_type, "type_of_property": [property_type], "status": status, "bedrooms": bedrooms, "bathrooms": bathrooms, "total_area": total_area, "metric": "feet_square", "floor": floor, "floors": floors, "car_spaces": 0, "is_agent": True, "outdoor_features": outdoor_features, "indoor_features": indoor_features, "climate_control": climate_control, "detail": details, "price": price, "phones": phones, "files": files, "source": "Home.ge", "view": _view }) links_db.update_one({"link": prod["link"]}, {"$set": { "parsed": True }}) except: log_error(req.url, "პროდუქტის", True)