Example #1
0
def main():
    air_listing = AirBnBListing(db_name=DB_NAME, coll_name=COLL_NAME)

    # grab a dict of listings that haven't yet been scraped
    # based off of the existings of the 'dt' field
    listing_dicts = list(air_listing.coll.find({"dt": {"$exists": 0}}, {"_id": 1}))

    # for each listing not yet pulled, attempt to scrape & insert into the db

    for listing in listing_dicts:
        listing_id = listing["_id"]
        air_listing.scrape_and_insert(listing_id=listing_id, overwrite=True)

        # print "Scraping & Adding %s" % listing_id

        time.sleep(3)  # as to not get banned from AirBnB
Example #2
0
def main():
    air_listing = AirBnBListing(db_name=DB_NAME, coll_name=COLL_NAME)

    # grab a dict of listings that haven't yet been scraped
    # based off of the existings of the 'dt' field
    listing_dicts = list(air_listing.coll.find({'dt': {'$exists': 0}}, {'_id': 1}))

    # for each listing not yet pulled, attempt to scrape & insert into the db

    for listing in listing_dicts:
        listing_id = listing['_id']
        air_listing.scrape_and_insert(listing_id=listing_id, overwrite=True)

        # print "Scraping & Adding %s" % listing_id

        time.sleep(3)    # as to not get banned from AirBnB
def main():
    air_listing = AirBnBListing(db_name=DB_NAME, coll_name=COLL_NAME)
    listing_dict = list(air_listing.coll.find({}, {'_id': 1}))

    for listing in listing_dict:
        listing_id = listing['_id']
        air_listing.pull_from_db(listing_id=listing_id)
        air_listing.extract_and_add_features()
        print "Extracting Features for %s" % listing_id