def test_happy_path(self):
     settings.SCORE_COEFFICIENT_OF_VARIATION_MAX = 1.0
     settings.HIGH_SCORE_COMPANIES_COUNT_MIN = 0
     make_office()
     make_dpae()
     dpae_date = datetime.now()
     departement = "57"
     compute_score.run(settings.OFFICE_TABLE, settings.DPAE_TABLE,
                       departement, dpae_date)
 def test_happy_path(self):
     make_offices()
     make_hirings()
     departement = "57"
     prediction_beginning_date = get_prediction_beginning_date()
     result = compute_score.run(departement, prediction_beginning_date)
     self.assertEqual(result, True)  # successful computation
 def test_unhappy_path(self):
     make_offices()
     make_hirings()
     departement = "58"
     prediction_beginning_date = get_prediction_beginning_date()
     result = compute_score.run(departement, prediction_beginning_date)
     self.assertEqual(
         result, False)  # failed computation (no data for this departement)
Exemple #4
0
def compute(etab, dpae, departement, dpae_date):
    try:
        result = compute_score.run(etab, dpae, departement, dpae_date)
        logger.info("finished compute_score.run (%s)", departement)
    except:
        logger.error("error in departement %s : %s", departement,
                     sys.exc_info()[1])
        logger.error("compute_score traceback: %s", traceback.format_exc())
        result = None
    return result
def compute(departement):
    result = compute_score.run(departement)
    logger.info("finished compute_score.run (%s)", departement)
    return result
    def test_happy_path_investigation(self):
        make_offices()
        make_hirings()
        departement = "57"
        prediction_beginning_date = get_prediction_beginning_date()
        df_etab = compute_score.run(departement,
                                    prediction_beginning_date,
                                    return_df_etab_if_successful=True)
        columns = df_etab.columns.values

        self.assertEqual(len(df_etab), OFFICES_HAVING_HIRINGS)

        # The realistic (past) situation we simulate here is the following:
        # Today is 2012 Jan 10th (as we usually run the importer each 10 of the month)
        # thus the beggining of prediction is 2012 Jan 1st (1st of current month since we are
        # in the first half of the current month),
        # note that DPAE ends at 2011 Dec 31th,
        # and that last alternance data is at 2011 Aug 31th.
        # There is gap of several months for the alternance data, and this is what happens
        # in real life as of now :/
        self.assertEqual(get_dpae_last_historical_data_date(),
                         datetime(2011, 12, 31))
        #self.assertEqual(importer_settings.ALTERNANCE_LAST_HISTORICAL_DATA_DATE, datetime(2011, 8, 31))
        self.assertEqual(prediction_beginning_date,
                         datetime(2012, 1, 1))  # for both DPAE and Alternance

        # --- DPAE/LBB checks

        # we should have exactly 5 years of hirings including the last month (2011-12)
        self.assertNotIn('dpae-2006-12', columns)
        self.assertIn('dpae-2007-1', columns)
        self.assertIn('dpae-2011-12', columns)
        self.assertNotIn('dpae-2012-1', columns)

        # Reminder: for DPAE 1 period = 6 months.
        # We expect 7+2+2=11 (past) periods to be computed, here is why:
        # LIVE set is based on 7 last periods (i.e. number of features fed to the model).
        # TEST set is like LIVE set slided 12 months earlier,
        # and thus ignores last 2 periods and is based on 7 periods before that.
        # TRAIN set is like LIVE set slided 24 months earlier,
        # and thus ignores last 4 periods and is based on 7 periods before that.
        self.assertNotIn('dpae-period-0', columns)
        self.assertIn('dpae-period-1', columns)
        self.assertIn('dpae-period-11', columns)
        self.assertNotIn('dpae-period-12', columns)

        # final score columns
        self.assertIn('score', columns)
        self.assertIn('score_regr', columns)

        # --- Alternance/LBA checks

        # we should have exactly 5 years of hirings including the last month (2011-12)
        self.assertNotIn('alt-2006-12', columns)
        self.assertIn('alt-2007-1', columns)
        self.assertIn('alt-2011-12', columns)
        self.assertNotIn('alt-2012-1', columns)

        # Reminder: for Alternance 1 period = 6 months.
        # We expect 7+2+2+1=12 (past) periods to be computed, here is why:
        # There is a gap of 4 months between the last Alternance data and today: 2011-9,10,11,12
        # rounded up to a 1 period data gap.
        # LIVE set is based on the 7 last periods (i.e. number of features fed to the model)
        # before the 1 period of the data gap.
        # TEST set is like LIVE set slided 12 months earlier,
        # and thus ignores 2 more periods and is based on 7 periods before that.
        # TRAIN set is like LIVE set slided 24 months earlier,
        # and thus ignores 4 more periods and is based on 7 periods before that.

        self.assertNotIn('alt-period-0', columns)
        self.assertIn('alt-period-1', columns)
        self.assertIn('alt-period-11', columns)
        self.assertNotIn('alt-period-12', columns)

        # final score columns
        self.assertIn('score_alternance', columns)
        self.assertIn('score_alternance_regr', columns)