def compute_properties_used_to_calculate_average():
    start = time.time()
    client = connect_to_mongodb()
    collection = client['antunedo']['offers']
    avg_collection = client['antunedo']['average_prices']
    cpt = 0
    avg_prices = avg_collection.find()
    for i in avg_prices:
        print(cpt)
        cpt += 1
        collection.update_many(
            {
                'ratio_to_average_price': {
                    '$exists': True
                },
                'properties_used_to_calculate_average': {
                    '$exists': False
                },
                'property.immotype.id': i['immo_type_id'],
                'geo.city': i['city']
            }, {
                '$set': {
                    'properties_used_to_calculate_average':
                    i['amount_of_properties']
                }
            })

    print("\nComputing of average prices took {0:.2f}".format(time.time() -
                                                              start) +
          " secs\n")
    client.close()
Ejemplo n.º 2
0
def get_offers(city=None, nb_offers=20):
    client = connect_to_mongodb()
    if city is not None:
        print('Filter by city')

    result = query_offers(client, nb_offers)

    client.close()
    return result
def compute_average_prices():
    start = time.time()
    client = connect_to_mongodb()
    collection = client['antunedo']['offers']
    avg_collection = client['antunedo']['average_prices']

    avg_prices = collection.aggregate([{
        '$project': {
            'property.immotype.label': 1,
            'property.immotype.id': 1,
            'geo': 1,
            'price_by_m2': 1
        }
    }, {
        '$match': {
            'price_by_m2': {
                '$exists': True,
                '$gte': 100,
                '$lt': 30000
            }
        }
    }, {
        '$group': {
            '_id': {
                'type': '$property.immotype.label',
                'type_id': '$property.immotype.id',
                'geo': '$geo'
            },
            'amount_of_properties': {
                '$sum': 1
            },
            'average': {
                '$avg': '$price_by_m2'
            }
        }
    }])
    for i in avg_prices:
        avg_collection.replace_one(
            {
                'immo_type_id': i['_id']['type_id'],
                'city': i['_id']['geo']['city']
            }, {
                'immo_type_id': i['_id']['type_id'],
                'immo_type': i['_id']['type'],
                'country': i['_id']['geo']['country'],
                'city': i['_id']['geo']['city'],
                'amount_of_properties': i['amount_of_properties'],
                'average_price': i['average']
            },
            upsert=True)

    print("\nComputing of average prices took {0:.2f}".format(time.time() -
                                                              start) +
          " secs\n")
    client.close()
def compute_ratio_to_average_price():
    start = time.time()
    client = connect_to_mongodb()
    collection = client['antunedo']['offers']
    avg_collection = client['antunedo']['average_prices']
    average_prices_dict = {}
    print("Storing average prices ... \n")
    for i in avg_collection.find():
        average_prices_dict[str(i['immo_type_id']) +
                            str(i['city'])] = i['average_price']

    offers_cursor = collection.find(
        {'ratio_to_average_price': {
            '$exists': False
        }})
    cpt = 0
    for offer in offers_cursor:
        if cpt % 100 == 0:
            print(cpt)
        cpt += 1
        if 'price_by_m2' in offer and 'property' in offer and 'immotype' in offer['property'] \
                and 'id' in offer['property']['immotype'] \
                and 'geo' in offer and 'city' in offer['geo']:
            average_price = None if (
                str(offer['property']['immotype']['id']) +
                str(offer['geo']['city'])
            ) not in average_prices_dict else average_prices_dict[
                str(offer['property']['immotype']['id']) +
                str(offer['geo']['city'])]
            if average_price is not None:
                magic_ratio = round(
                    ((offer['price_by_m2'] / average_price) - 1) * 100, 3)
                collection.update_one(
                    {'id': offer['id']},
                    {'$set': {
                        'ratio_to_average_price': magic_ratio
                    }})

    print("\nComputing took {0:.2f}".format(time.time() - start) + " secs\n")
    client.close()
Ejemplo n.º 5
0
def sniffer():
    time.sleep(randint(0, 10))
    begin_time = time.time()
    client = connect_to_mongodb()
    offers_collection = client['antunedo']['offers']
    logs_collection = client['antunedo']['logs']
    avg_collection = client['antunedo']['average_prices']

    page = 1
    cpt = 0
    already_added_cpt = 0
    new_offers_cpt = 0
    total_pages = 501
    last_exec_time = get_last_maradona_execution(client)
    while page < total_pages and page < 501:
        next_page, total_pages = last_inserted_offers(page, last_exec_time)
        for offer in next_page:
            print(cpt)
            if offers_collection.find_one({'id': offer['id']}) is not None:
                already_added_cpt += 1
                print('Already added')
            else:
                new_offers_cpt += 1
                add_fields_to_offer(offer, int(begin_time), avg_collection)
                offers_collection.insert_one(offer)
                print('New offer')
            cpt += 1
        page += 1
    logs_collection.insert_one({
        "start_time": int(begin_time),
        "already_added_offers": already_added_cpt,
        "new_offers": new_offers_cpt,
        "duration": round(time.time() - begin_time, 2)
    })

    client.close()
    return
def main():
    start = time.time()
    client = connect_to_mongodb()
    collection = client['antunedo']['offers']

    res = collection.find(
        {
            'property.characteristic.property_surface': {
                '$exists': True,
                '$gt': 10
            },
            'price': {
                '$exists': True,
                '$gt': 10000
            }
        }, {
            'property.characteristic.property_surface': 1,
            'id': 1,
            'price': 1
        })

    cpt = 0
    for i in res:
        if cpt % 50 == 0:
            print("Processing tracking data... " + "{0:.2f}".format(
                (cpt / 26849) * 100) + "% in " +
                  "{0:.2f}".format(time.time() - start) + " secs",
                  end='\r')
        price_by_m2 = i['price'] / i['property']['characteristic'][
            'property_surface']
        collection.update_one({'id': i['id']},
                              {'$set': {
                                  'price_by_m2': price_by_m2
                              }})
        cpt += 1

    client.close()
Ejemplo n.º 7
0
def get_offers_by_immotype(collection, immotype, total_pages, sort="asc"):
    if total_pages == 0:
        return
    i = 0
    while i <= total_pages and i <= 500:
        result = query_immo_offers(i, immotype, sort)
        print("Processing data... " +
              "{0:.2f}".format((i / total_pages) * 100) + "%, i=" + str(i) +
              ", total=" + str(total_pages) + '\n')
        if result:
            collection.insert_many(result)
        i += 1


if __name__ == "__main__":
    client = connect_to_mongodb()
    db = client['antunedo']
    collection = db['offers']

    print('starting ...')
    for j in range(12, 52):
        immotype = str(j + 1)
        total_pages = get_amount_of_pages(immotype)
        print("\nNouvel Immotype : " + immotype + ", avec un total de " +
              str(total_pages) + " pages\n\n")
        if total_pages > 1000:
            # TODO : filter this immo category into smaller elements
            print('DEAAAAAD')
        elif total_pages > 500:
            get_offers_by_immotype(collection, immotype, total_pages)
            # TODO : test if total_pages - 500 if right