def predict(self, business_forecast, training_data=pd.DataFrame()):
        sum_bins = np.zeros(6)
        for i in business_forecast:

            sched = ActualSchedule(training_data[\
                    training_data.user_tz == i['user_tz']])
            stuff = (sched.bins() + self.smoother) / (sched.bins() + self.smoother).sum()
            sum_bins +=  stuff * i['frequency'] * i['schedule_type']

        return sum_bins
    def predict(self, business_forecast, training_data=pd.DataFrame()):
        sum_bins = np.zeros(6)
        smoother = np.array([0.01, 0.01, 0.01, 0.01, 0.01, 0.01])
        for i in business_forecast:
            sched = ActualSchedule(self.training_data[\
                    self.training_data.schedule_type == i['schedule_type']])
            stuff = (sched.bins() + smoother) / (sched.bins() + smoother).sum()
            sum_bins +=  stuff * i['frequency'] * i['schedule_type']

        return sum_bins
    def predict(self, business_forecast, training_data=pd.DataFrame()):
        # try company - show chi-square test
        # try timezone - show chi-square test
            # timezone generally is not statistically significant (2x and 3x).
            # Significant for 4x and 5x but
        # try company and timezone
        # too difficult. Already pretty accurate

        actual_schedule = ActualSchedule(self.filtered_training_data(business_forecast))
        bins = actual_schedule.bins()

        b =  bins / bins.sum() * self.num_business_forecast_lessons(business_forecast)

        return b
    def errors(self):
        indices = []  # index of a row, formatted YYYY-M (e.g. 2016-8, 2016-9)
        predictions = []
        errors = []  # errors. Lower is better
        test_data_size = []
        actuals = []

        for i in range(0, self.num_months() - self.training_data_span_months):
            m = self._model()
            training_data = self.training_data(i,
                                               self.training_data_span_months)
            m.fit(training_data)

            td = self.test_data(i + self.training_data_span_months)

            print "Generating prediction..."
            prediction = m.predict(\
                    BusinessForecast(\
                    td).convert(),
                    training_data=training_data)

            actual = ActualSchedule(\
                    td).bins()

            print "prediction "
            print prediction
            print "\n"

            print "actual"
            print actual
            print "\n"

            error = mean_absolute_error(actual, prediction)
            errors.append(error)

            index = self.year_month_index(i + self.training_data_span_months)
            indices.append(index)
            print "time: {}, error: {}".format(index, error)

            predictions.append(prediction)
            actuals.append(actual)
            test_data_size.append(actual.sum())

        return pd.DataFrame({
            'errors': errors,
            'test_data_size': test_data_size,
            'predictions': predictions,
            'actuals': actuals
        }).set_index([indices])
    def errors(self):
        indices = [] # index of a row, formatted YYYY-M (e.g. 2016-8, 2016-9)
        predictions = []
        errors = [] # errors. Lower is better
        test_data_size = []
        actuals = []

        for i in range(0,self.num_months()-self.training_data_span_months):
            m = self._model()
            training_data = self.training_data(i, self.training_data_span_months)
            m.fit(training_data)

            td = self.test_data(i+self.training_data_span_months)

            print "Generating prediction..."
            prediction = m.predict(\
                    BusinessForecast(\
                    td).convert(),
                    training_data=training_data)

            actual = ActualSchedule(\
                    td).bins()

            print "prediction "
            print prediction
            print "\n"

            print "actual"
            print actual
            print "\n"

            error = mean_absolute_error(actual, prediction)
            errors.append(error)

            index = self.year_month_index(i+self.training_data_span_months)
            indices.append(index)
            print "time: {}, error: {}".format(index, error)

            predictions.append(prediction)
            actuals.append(actual)
            test_data_size.append(actual.sum())

        return pd.DataFrame({
            'errors': errors,
            'test_data_size': test_data_size,
            'predictions': predictions,
            'actuals': actuals
            }).set_index([indices])
        def converts_it_to_an_actual_schedule(self):
            user_tz = ["Brasilia",
                    "Brasilia",
                    "Pacific (US & Canada)",
                    "Eastern (US & Canada)",
                    "Brasilia"
                    ]

            l1_time = range(0, 5)
            l1_day = range(0, 5)
            l2_time = range(1, 6)
            l2_day = range(0, 5)
            l3_time = range(2, 7)
            l3_day = range(0, 5)
            l4_time = range(3, 8)
            l4_day = range(0, 5)

            schedule_type = [4,4,4,4,4]

            args = {
                    'user_tz': user_tz,
                    'l1_time': l1_time,
                    'l1_day': l1_day,
                    'l2_time': l2_time,
                    'l2_day': l2_day,
                    'l3_time': l3_time,
                    'l3_day': l3_day,
                    'l4_time': l4_time,
                    'l4_day': l4_day,
                    'schedule_type': schedule_type
                    }

            unique_user_summaries = pd.DataFrame(args)
            schedule = ActualSchedule(unique_user_summaries)
            bins = schedule.bins()
            expect(bins[0]).to.equal(10)
            expect(bins[1]).to.equal(10)
            expect(bins[2]).to.equal(0)
            expect(bins[3]).to.equal(0)
            expect(bins[4]).to.equal(0)
            expect(bins[5]).to.equal(0)
        def converts_it_to_an_actual_schedule(self):
            user_tz = [
                "Brasilia", "Brasilia", "Pacific (US & Canada)",
                "Eastern (US & Canada)", "Brasilia"
            ]

            l1_time = range(0, 5)
            l1_day = range(0, 5)
            l2_time = range(1, 6)
            l2_day = range(0, 5)
            l3_time = range(2, 7)
            l3_day = range(0, 5)
            l4_time = range(3, 8)
            l4_day = range(0, 5)

            schedule_type = [4, 4, 4, 4, 4]

            args = {
                'user_tz': user_tz,
                'l1_time': l1_time,
                'l1_day': l1_day,
                'l2_time': l2_time,
                'l2_day': l2_day,
                'l3_time': l3_time,
                'l3_day': l3_day,
                'l4_time': l4_time,
                'l4_day': l4_day,
                'schedule_type': schedule_type
            }

            unique_user_summaries = pd.DataFrame(args)
            schedule = ActualSchedule(unique_user_summaries)
            bins = schedule.bins()
            expect(bins[0]).to.equal(10)
            expect(bins[1]).to.equal(10)
            expect(bins[2]).to.equal(0)
            expect(bins[3]).to.equal(0)
            expect(bins[4]).to.equal(0)
            expect(bins[5]).to.equal(0)