Esempio n. 1
0
def get_ids(daily=False):
    for _city in city_osm:
        try:
            for_page_len = requests.get(
                links["products_api"] + "?GID={}&cities={}".format(
                    str(_city["osm_id"]), str(_city["osm_id"])))
            page_len_json = for_page_len.json()
        except:
            _print.fail(f'line 859: for_page_len - ს აქვს რაღაც ხარვეზი')

        try:
            page_len = math.ceil(
                int(page_len_json["Pagination"]["ContentCount"]) /
                page_len_json["Pagination"]["PerPage"])
        except:
            _print.fail(f'line 864: page_len - ს აქვს რაღაც ხარვეზი')

        _print.value(_city['name'])

        for page_index in range(page_len):

            _print.value('Page ' + str(page_index + 1))

            try:
                product_pages_res = requests.get(
                    links["products_api"] + "?Page=" + str(page_index + 1) +
                    "&GID={}&cities={}".format(str(_city["osm_id"]),
                                               str(_city["osm_id"])))
                Products = product_pages_res.json()["Prs"]["Prs"]
                Users = product_pages_res.json()["Prs"]["Users"]["Data"]

                if is_old_data(
                        datetime.datetime.strptime(
                            Products[len(Products) - 1]["order_date"],
                            "%Y-%m-%d %H:%M:%S")) and daily:
                    break

                for prs in Products:
                    try:
                        if not is_duplicate(ids_db, "product_id",
                                            prs["product_id"]):
                            ids_db.insert({
                                "product_id": prs["product_id"],
                                "parsed": False,
                                "city": _city["name"]
                            })
                            _print.ok(f'{prs["product_id"]} პროდუქტი დაემატა')
                        else:
                            _print.warning(
                                f'{prs["product_id"]} არის დუპლიკატი')
                    except:
                        _print.fail(
                            f'line 884: {prs["product_id"]} პროდუქტზე არის რაღაც ხარვეზი'
                        )

            except:
                _print.fail(
                    f'line 887: {page_index+1} გვერდზე არის რაღაც ხარვეზი')
Esempio n. 2
0
def get_links():

        for_len = requests.get(f'https://ss.ge/en/real-estate/l?Page=2&PriceType=false&CurrencyId=1')
        page_len = string_to_int(Selector(response=for_len).css(_selectors["page_len"]).get())[0]
        print(page_len)
        for page_index in range(page_len):
            products_req = requests.get(f'https://ss.ge/en/real-estate/l?Page={str(page_index)}&PriceType=false&CurrencyId=1')
            products_link = Selector(response=products_req).css(_selectors["item_links"]).getall()
            for _link in products_link:
                _print.value("https://ss.ge"+_link)
                if not is_duplicate(links_db, "link", "https://ss.ge"+_link):
                    links_db.insert_one({
                        'link': "https://ss.ge"+_link,
                        'parsed': False,
                        "source": "ss.ge"
                    })
                else:
                    _print.fail("დუპლიკატი")
Esempio n. 3
0
def get_links():
    for _product_type in product_types:
        for_len = requests.get(f'https://www.home.ge/en/{_product_type}.html')
        page_len = string_to_int(
            Selector(response=for_len).css(_selectors["page_len"]).get())[0]
        if page_len == 0:
            page_len = 1

        for page_index in range(page_len):
            products_req = requests.get(
                f'https://www.home.ge/en/{_product_type}/index{str(page_index)}.html'
            )
            products_link = Selector(response=products_req).css(
                _selectors["item_links"]).getall()
            for _link in products_link:
                _print.value(_link)
                if not is_duplicate(links_db, "link", _link):
                    links_db.insert_one({'link': _link, 'parsed': False})
                else:
                    _print.fail("დუპლიკატი")
Esempio n. 4
0
def get_products():

    for prod in ids_db.find({"parsed": False}):
        try:
            product_details_req = requests.get(links["product_details_api"] +
                                               prod['product_id']).json()
            product_details = product_details_req["PrData"]
            users_details = product_details_req["User"]
        except:
            _print.fail('პროდუქტის მოთხოვნა ვერ შესრულდა')
            continue

        if len(product_details) == 0:
            ids_db.delete_one({"product_id": prod['product_id']})
            _print.fail('პროდუქტი არ მოიძებნა')
            continue

        try:
            if not is_duplicate(users_db, "user_id", users_details["user_id"]):
                user_object = users_db.insert_one({
                    "user_id":
                    users_details["user_id"],
                    "name":
                    (f'{users_details["user_name"]} {users_details["user_surname"]}'
                     ),
                    "gender":
                    0 if users_details['gender_id'] == "2" else 1,
                    "number_of_posts":
                    users_details["pr_count"],
                    "phone": {
                        "country_code":
                        int(product_details['client_phone'][0:3]),
                        "number":
                        product_details['client_phone']
                        [3:len(product_details['client_phone'])]
                    },
                    "email": [users_details['username']],
                    "created_at":
                    datetime.datetime.utcnow(),
                    "social_media": [{
                        "provider": "skype",
                        "user": users_details['skype'],
                        "addres": ""
                    }] if users_details['skype'] != "" else []
                })
                user_object_id = user_object.inserted_id
                _print.ok(f'{users_details["user_id"]} მომხმარებელი დაემატა')
            else:

                try:
                    users_db.update_many(
                        {"user_id": users_details["user_id"]}, {
                            "$set": {
                                "number_of_posts": users_details["pr_count"]
                            }
                        })
                except:
                    _print.fail(
                        f'line 934: {users_details["user_id"]} მომხმარებელი ვერ განახლდა'
                    )

                user_object_id = ObjectId(
                    users_db.find_one({"user_id":
                                       users_details["user_id"]})["_id"])
                _print.value(
                    f'{users_details["user_id"]} მომხმარებელი განახლდა')

        except:
            _print.fail('line 944: მომხმარებელი არ დაემატა')

        try:
            order_date = product_details['order_date']
            prod_sdate = datetime.date(
                int(product_details["order_date"][0:4]),
                int(product_details["order_date"][5:7]),
                int(product_details["order_date"][8:10]))
            prod_edate = prod_sdate + MONTH

        except:
            _print.fail('line 954: თარიღზე დაფიქსირდა შეცდომა')
            log_error(prod['product_id'], "თარიღზე დაფიქსირდა შეცდომა")
            continue

        try:
            real_estate_db.insert_one({
                "owner_id":
                user_object_id,
                "views":
                product_details_req['Views'],
                "is_company":
                True,
                "post_status":
                "active",
                "deal_type":
                get_deal_type(product_details["adtype_id"]),
                "type_of_property":
                [get_product_type(product_details["product_type_id"])],
                "floor":
                int(
                    exceptor(product_details["floor"], "სართულის",
                             prod['product_id'])),
                "floors":
                int(
                    exceptor(product_details["floors"],
                             "სართულების რაოდენობის", prod['product_id'])),
                "car_spaces":
                int(
                    exceptor(product_details["parking_id"], "გარაჟი",
                             prod['product_id'])),
                "location": [{
                    "country": {
                        "id": "GE"
                    },
                    "city": {
                        "id": geo_names(prod["city"], prod['product_id']),
                        "name": prod["city"],
                        "subdivision": ""
                    },
                    "street":
                    exceptor(product_details["name"], "ქუჩის",
                             prod['product_id']),
                    "address":
                    exceptor(product_details["street_address"], "მისამართის",
                             prod['product_id']),
                    "geo_cord": {
                        "lang":
                        exceptor(product_details["map_lon"], "long კოორდიანის",
                                 prod['product_id']),
                        "lat":
                        exceptor(product_details["map_lat"],
                                 "lat კოორდიანტის ", prod['product_id'])
                    },
                }],
                "avilable_from":
                f'{prod_sdate.year}-{prod_sdate.month}-{prod_sdate.day}',
                "avilable_to":
                f'{prod_edate.year}-{prod_edate.month}-{prod_edate.day}',
                "is_urgent":
                False,
                "is_agent":
                bool(product_details["makler_id"]),
                "detail": {
                    "title":
                    exceptor(
                        PrTitles[
                            f'{product_details["adtype_id"]}_{product_details["product_type_id"]}_{product_details["estate_type_id"]}'],
                        "სათაურის", prod['product_id']),
                    "houses_rules":
                    "",
                    "description":
                    product_details["comment"]
                    # "description": Translate(product_details["comment"])
                },
                "created_at":
                datetime.datetime.utcnow(),
                "price": {
                    "price_type": "total_price",
                    "fin_price": 0,
                    "fax_price": 0,
                    "fix_price": int(float(product_details["price_value"])),
                    "currency": "GEL"
                },
                "metric":
                "feet_square",
                "total_area":
                int(
                    exceptor(product_details["area_size_value"], "ფართობის",
                             prod['product_id'])),
                "bedrooms":
                int(
                    exceptor(product_details["bedrooms"],
                             "საძინებლების რაოდენობის", prod['product_id'])),
                "bathrooms":
                int(
                    exceptor(product_details["bathrooms"],
                             "აბაზანის რაოდენობის", prod['product_id'])),
                "outdoor_features":
                get_features("outdoor", product_details),
                "indoor_features":
                get_features("indoor", product_details),
                "climat_control":
                get_features("climat_control", product_details),
                "phone": {
                    "country_code":
                    int(product_details['client_phone'][0:3]),
                    "number":
                    product_details['client_phone']
                    [3:len(product_details['client_phone'])]
                },
                "property_type":
                get_property_type(product_details["product_type_id"]),
                "files":
                get_images(product_details["photos_count"],
                           product_details["photo"],
                           product_details["product_id"]),
                "status":
                is_new_building(product_details["estate_type_id"]),
            })
            _print.ok(f'{product_details["product_id"]} პროდუქტი დაემატა')
            ids_db.update({"product_id": prod['product_id']},
                          {"$set": {
                              "parsed": True
                          }})
            remove_error(prod['product_id'])
        except:
            _print.fail('line 987: პროდუქტი არ დაემატა')
            log_error(prod['product_id'], "პროდუქტი არ დაემატა")
Esempio n. 5
0
def get_products():
    for prod in links_db.find({"parsed": False}):
        req = requests.get(prod["link"])
        _print.value(prod["link"])
        city = select_one(req, "#df_field_mdebareoba .value", True)

        bread_crumbs = select_many(req, "#bread_crumbs .point1 li a::text")
        geonames_id = geo_names(city)
        deal_type = get_deal_type(bread_crumbs[2], prod['link'])
        property_type = get_property_type(bread_crumbs[1], prod['link'])
        status = get_status(
            select_one(req, "#df_field_built_status .value", True),
            prod["link"])
        street = select_one(req, "#df_field_mdebareoba_level1 .value", True)
        address = select_one(req, "#df_field_mdebareoba_level2 .value", True)
        bedrooms = select_one(req, "#df_field_bedrooms .value", True)
        bathrooms = select_one(req, "#df_field_bathrooms .value", True)
        total_area = string_to_int(
            select_one(req, "#df_field_square_feet .value", True))[0]
        floor = select_one(req, "#df_field_floor .value", True)
        floors = select_one(req, "#df_field_number_of_floors .value", True)
        try:
            _view = int(select_one(req, "#area_listing .count::text"))
        except:
            _view = 0

        outdoor_features = get_outdoor_features(req)
        indoor_features = get_indoor_features(req)
        climate_control = get_climate_control(req)
        details = [{
            "title":
            select_one(req, "#area_listing > h1"),
            "house_rules":
            "",
            "description":
            Translate(
                select_one(req, "#df_field_additional_information .value",
                           True))
        }]
        price = {
            "price_type":
            "total_price",
            "min_price":
            0,
            "max_price":
            0,
            "fix_price":
            converted_price(select_one(req, "#lm_loan_amount::attr(value)"),
                            prod["link"]),
            "currency":
            "USD"
        }
        phones = [{
            "country_code":
            995,
            "number":
            converted_price(select_one(req, "#df_field_phone .value a::text"),
                            prod["link"])
        }]
        files = get_images(req)
        try:
            real_estate_db.insert_one({
                "location": {
                    "country": {
                        "id": "GE"
                    },
                    "city": {
                        "id": geonames_id,
                        "name": city,
                        "subdivision": ""
                    },
                    "street": street,
                    "address": address,
                },
                "created_at": datetime.datetime.utcnow(),
                "deal_type": deal_type,
                "type_of_property": [property_type],
                "status": status,
                "bedrooms": bedrooms,
                "bathrooms": bathrooms,
                "total_area": total_area,
                "metric": "feet_square",
                "floor": floor,
                "floors": floors,
                "car_spaces": 0,
                "is_agent": True,
                "outdoor_features": outdoor_features,
                "indoor_features": indoor_features,
                "climate_control": climate_control,
                "detail": details,
                "price": price,
                "phones": phones,
                "files": files,
                "source": "Home.ge",
                "view": _view
            })
            links_db.update_one({"link": prod["link"]},
                                {"$set": {
                                    "parsed": True
                                }})
        except:
            log_error(req.url, "პროდუქტის", True)
Esempio n. 6
0
def get_products():
    from translator import Translate
    for prod in links_db.find({"parsed":False, "source": "ss.ge"0}):
        req = requests.get(prod["link"])
        _print.value(prod["link"])
        
        bread_crumbs = select_many(req, ".detailed_page_navlist ul li a::text")


        city = bread_crumbs[3]

        geonames_id = geo_names(city)

        deal_type = get_deal_type(bread_crumbs[2].strip(), prod['link'])

        property_type = get_property_type(bread_crumbs[1].strip(), prod['link'])

        status = get_status(select_one(req, "fieldValueStatusId2::text"), prod["link"])
        street = select_one(req, ".StreeTaddressList.realestatestr:text").strip()
        address = street
        bedrooms = int(select_many(req, ".ParamsHdBlk text::text")[2])
        bathrooms = ""
        total_area = string_to_int(select_many(req, ".ParamsHdBlk text::text")[0])[0]
        floor = string_to_int(select_many(req, ".ParamsHdBlk text::text")[3])
        floors = string_to_int(select_one(req, ".ParamsHdBlk text text span::text"))
        try:
            _view = int(select_one(req, "#.article_views span::text"))
        except:
            _view = 0

        outdoor_features =  get_outdoor_features(req)
        indoor_features = get_indoor_features(req)
        climate_control = get_climate_control(req)
        details = [{
            "title": select_one(req, "#area_listing > h1"),
            "house_rules": "",
            "description": Translate(select_one(req, "#df_field_additional_information .value", True))
        }]
        price ={
            "price_type":"total_price",
            "min_price":0,
            "max_price":0,
            "fix_price": converted_price(select_one(req, "#lm_loan_amount::attr(value)"),prod["link"]),
            "currency": "USD"
        }
        phones =  [{
            "country_code":995,
            "number":  converted_price(select_one(req, "#df_field_phone .value a::text"),prod["link"])
        }]
        files = get_images(req)
        pprint({
            "location": {
                "country":{
                    "id":"GE"
                },
                "city": {
                    "id":geonames_id,
                    "name": city,
                    "subdivision": ""
                },
                "street":street,
                "address": address,
            },
            "created_at": datetime.datetime.utcnow(),
            "deal_type": deal_type,
            "type_of_property": [property_type],
            "status": status,
            "bedrooms": bedrooms,
            "bathrooms": bathrooms,
            "total_area":total_area,
            "metric":"feet_square",
            "floor":floor,
            "floors":floors,
            "car_spaces":0,
            "is_agent":	True,
            "outdoor_features":outdoor_features,
            "indoor_features":indoor_features,
            "climate_control":climate_control,
            "detail":details,
            "price":price,
            "phones":phones,
            "files": files,
            "source": "Home.ge",
            "view":_view
        })