def process_entry(entry): global all_prices_calendar, most_recent_prices #`listing_id`,`date_start`, `date_end`, `rate` # #default_daily, weekend_increase, currency_id listing_id = entry[0] if listing_id not in valid_listings: return temp_price = decode_rates(entry[3]) if temp_price['default_daily'] == 0: return True for day in time._daterange(entry[1], entry[2]): if day >= datetime.date(2014, 1, 1) and day <= datetime.date(2016, 1, 29): if _check_weekday: #true when weekend all_prices_calendar[listing_id][day.strftime("%Y")][day.strftime("%Y-%m-%d")] = _convert_currency((temp_price["default_daily"] + temp_price["weekend_increase"]), temp_price["currency_id"]) else: all_prices_calendar[listing_id][day.strftime("%Y")][day.strftime("%Y-%m-%d")] = _convert_currency(temp_price["default_daily"], temp_price["currency_id"]) this_try = fill_nones(listing_id, entry[1]) if this_try is None: #in the case that this is the first price and it doesn't cover up to the beginning of the trianing period most_recent_prices[listing_id] = temp_price fill_nones(listing_id, entry[1]) most_recent_prices[listing_id] = temp_price
def process_entry(entry): global all_prices_calendar, most_recent_prices #`listing_id`,`date_start`, `date_end`, `rate` # #default_daily, weekend_increase, currency_id listing_id = entry[0] if listing_id not in valid_listings: return temp_price = decode_rates(entry[3]) if temp_price['default_daily'] == 0: return True for day in time._daterange(entry[1], entry[2]): if day >= datetime.date(2014, 1, 1) and day <= datetime.date( 2016, 1, 29): if _check_weekday: #true when weekend all_prices_calendar[listing_id][day.strftime("%Y")][ day.strftime("%Y-%m-%d")] = _convert_currency( (temp_price["default_daily"] + temp_price["weekend_increase"]), temp_price["currency_id"]) else: all_prices_calendar[listing_id][day.strftime("%Y")][ day.strftime("%Y-%m-%d")] = _convert_currency( temp_price["default_daily"], temp_price["currency_id"]) this_try = fill_nones(listing_id, entry[1]) if this_try is None: #in the case that this is the first price and it doesn't cover up to the beginning of the trianing period most_recent_prices[listing_id] = temp_price fill_nones(listing_id, entry[1]) most_recent_prices[listing_id] = temp_price
def fill_training_and_testing_data(listing_id, testing_dates, training_dates = None): global all_data ''' if listing_ids: training_with = listing_ids else: training_with = [entry[0] for entry in testing_listings] ''' training_data = {"features": [], "classification" : []} testing_data = {"features": [], "classification" : []} #does listing have occupancy data to begin with try: occupancy_test = json_files["occupancy_dict"][str(listing_id)] if training_dates: #force it to take only listings that have at least started by the start date start = training_dates["start_date"] #dis-allowing majority of dataset ''' if listing_important_dates[listing_id]["created_at"].date() > start: print "not enough training data for listing: ", listing_id #can put in training? return None ''' else: start = datetime.date(2008, 01, 01) for day in time._daterange(start, testing_dates["end_date"]): try: if day < testing_dates["start_date"]: training_data["features"].append(all_data[listing_id][day]["feature_data"]) training_data["classification"].append(all_data[listing_id][day]["classification"]) else: testing_data["features"].append(all_data[listing_id][day]["feature_data"]) testing_data["classification"].append(all_data[listing_id][day]["classification"]) testing_data["days"].append(day.strftime("%Y-%m-%d")) #print "putting data in testing_data" except KeyError as e: #this day didn't have data, but listing might have data #shouldn't be an issue anymore because we deleted the day structure if it has no data in all_data pass except KeyError as e: print e print "in 122" return None return (training_data, testing_data)