def main(): air_listing = AirBnBListing(db_name=DB_NAME, coll_name=COLL_NAME) # grab a dict of listings that haven't yet been scraped # based off of the existings of the 'dt' field listing_dicts = list(air_listing.coll.find({"dt": {"$exists": 0}}, {"_id": 1})) # for each listing not yet pulled, attempt to scrape & insert into the db for listing in listing_dicts: listing_id = listing["_id"] air_listing.scrape_and_insert(listing_id=listing_id, overwrite=True) # print "Scraping & Adding %s" % listing_id time.sleep(3) # as to not get banned from AirBnB
def main(): air_listing = AirBnBListing(db_name=DB_NAME, coll_name=COLL_NAME) # grab a dict of listings that haven't yet been scraped # based off of the existings of the 'dt' field listing_dicts = list(air_listing.coll.find({'dt': {'$exists': 0}}, {'_id': 1})) # for each listing not yet pulled, attempt to scrape & insert into the db for listing in listing_dicts: listing_id = listing['_id'] air_listing.scrape_and_insert(listing_id=listing_id, overwrite=True) # print "Scraping & Adding %s" % listing_id time.sleep(3) # as to not get banned from AirBnB
def main(): air_listing = AirBnBListing(db_name=DB_NAME, coll_name=COLL_NAME) listing_dict = list(air_listing.coll.find({}, {'_id': 1})) for listing in listing_dict: listing_id = listing['_id'] air_listing.pull_from_db(listing_id=listing_id) air_listing.extract_and_add_features() print "Extracting Features for %s" % listing_id