def test_happy_path(self): settings.SCORE_COEFFICIENT_OF_VARIATION_MAX = 1.0 settings.HIGH_SCORE_COMPANIES_COUNT_MIN = 0 make_office() make_dpae() dpae_date = datetime.now() departement = "57" compute_score.run(settings.OFFICE_TABLE, settings.DPAE_TABLE, departement, dpae_date)
def test_happy_path(self): make_offices() make_hirings() departement = "57" prediction_beginning_date = get_prediction_beginning_date() result = compute_score.run(departement, prediction_beginning_date) self.assertEqual(result, True) # successful computation
def test_unhappy_path(self): make_offices() make_hirings() departement = "58" prediction_beginning_date = get_prediction_beginning_date() result = compute_score.run(departement, prediction_beginning_date) self.assertEqual( result, False) # failed computation (no data for this departement)
def compute(etab, dpae, departement, dpae_date): try: result = compute_score.run(etab, dpae, departement, dpae_date) logger.info("finished compute_score.run (%s)", departement) except: logger.error("error in departement %s : %s", departement, sys.exc_info()[1]) logger.error("compute_score traceback: %s", traceback.format_exc()) result = None return result
def compute(departement): result = compute_score.run(departement) logger.info("finished compute_score.run (%s)", departement) return result
def test_happy_path_investigation(self): make_offices() make_hirings() departement = "57" prediction_beginning_date = get_prediction_beginning_date() df_etab = compute_score.run(departement, prediction_beginning_date, return_df_etab_if_successful=True) columns = df_etab.columns.values self.assertEqual(len(df_etab), OFFICES_HAVING_HIRINGS) # The realistic (past) situation we simulate here is the following: # Today is 2012 Jan 10th (as we usually run the importer each 10 of the month) # thus the beggining of prediction is 2012 Jan 1st (1st of current month since we are # in the first half of the current month), # note that DPAE ends at 2011 Dec 31th, # and that last alternance data is at 2011 Aug 31th. # There is gap of several months for the alternance data, and this is what happens # in real life as of now :/ self.assertEqual(get_dpae_last_historical_data_date(), datetime(2011, 12, 31)) #self.assertEqual(importer_settings.ALTERNANCE_LAST_HISTORICAL_DATA_DATE, datetime(2011, 8, 31)) self.assertEqual(prediction_beginning_date, datetime(2012, 1, 1)) # for both DPAE and Alternance # --- DPAE/LBB checks # we should have exactly 5 years of hirings including the last month (2011-12) self.assertNotIn('dpae-2006-12', columns) self.assertIn('dpae-2007-1', columns) self.assertIn('dpae-2011-12', columns) self.assertNotIn('dpae-2012-1', columns) # Reminder: for DPAE 1 period = 6 months. # We expect 7+2+2=11 (past) periods to be computed, here is why: # LIVE set is based on 7 last periods (i.e. number of features fed to the model). # TEST set is like LIVE set slided 12 months earlier, # and thus ignores last 2 periods and is based on 7 periods before that. # TRAIN set is like LIVE set slided 24 months earlier, # and thus ignores last 4 periods and is based on 7 periods before that. self.assertNotIn('dpae-period-0', columns) self.assertIn('dpae-period-1', columns) self.assertIn('dpae-period-11', columns) self.assertNotIn('dpae-period-12', columns) # final score columns self.assertIn('score', columns) self.assertIn('score_regr', columns) # --- Alternance/LBA checks # we should have exactly 5 years of hirings including the last month (2011-12) self.assertNotIn('alt-2006-12', columns) self.assertIn('alt-2007-1', columns) self.assertIn('alt-2011-12', columns) self.assertNotIn('alt-2012-1', columns) # Reminder: for Alternance 1 period = 6 months. # We expect 7+2+2+1=12 (past) periods to be computed, here is why: # There is a gap of 4 months between the last Alternance data and today: 2011-9,10,11,12 # rounded up to a 1 period data gap. # LIVE set is based on the 7 last periods (i.e. number of features fed to the model) # before the 1 period of the data gap. # TEST set is like LIVE set slided 12 months earlier, # and thus ignores 2 more periods and is based on 7 periods before that. # TRAIN set is like LIVE set slided 24 months earlier, # and thus ignores 4 more periods and is based on 7 periods before that. self.assertNotIn('alt-period-0', columns) self.assertIn('alt-period-1', columns) self.assertIn('alt-period-11', columns) self.assertNotIn('alt-period-12', columns) # final score columns self.assertIn('score_alternance', columns) self.assertIn('score_alternance_regr', columns)