def import_reference_data(): log.info("-> Start data load") import_traffic(file_type=CSVType.Sea, run_id=run_id, file_name=reference_data['Sea']) import_traffic(file_type=CSVType.Air, run_id=run_id, file_name=reference_data['Air']) import_traffic(file_type=CSVType.Tunnel, run_id=run_id, file_name=reference_data['Tunnel']) import_shift(file_type=CSVType.Shift, run_id=run_id, file_name=reference_data['Shift']) import_non_response(file_type=CSVType.NonResponse, run_id=run_id, file_name=reference_data['Non Response']) import_unsampled(file_type=CSVType.Unsampled, run_id=run_id, file_name=reference_data['Unsampled']) import_survey_file(survey_data_path=survey_data, run_id=run_id) log.info("-> End data load")
def on_put(self, req: Request, resp: Response, run_id: str) -> None: # Start a run if self.workflow.in_progress(): error = f"Can only run one instance of a workflow at a time, {run_id} rejected." log.error(error) raise falcon.HTTPError(falcon.HTTP_403, 'Concurrency Error', error) log.info("Starting calculations for RUN_ID: " + run_id) try: if not db.is_valid_run_id(run_id): result = {'status': "invalid job id: " + run_id} resp.status = falcon.HTTP_401 resp.body = json.dumps(result) return thr = threading.Thread(target=self.workflow.run_calculations, args=(run_id,)) thr.start() log.info(f"started job: {run_id}") result = {'status': "started job: " + run_id} resp.body = json.dumps(result) except ValueError: raise falcon.HTTPError(falcon.HTTP_400, 'Invalid JSON', 'Could not decode the request body. The JSON was invalid.')
def wrapper(self, run_id: str): if self.is_cancelled(): self.set_status(func.__name__[1:], self._CANCELLED) log.info( f"Processing cancelled. Skipping step {func.__name__[1:]}") else: self.set_status(func.__name__[1:], self._IN_PROGRESS) func(self, run_id) self.set_status(func.__name__[1:], self._DONE)
def _initialize(self) -> None: clear_memory_table("SURVEY_SUBSAMPLE")() clear_memory_table("SAS_SURVEY_SUBSAMPLE")() self._current_status = {} for x in range(14): self._current_status["step_" + str(x + 1)] = IPSWorkflow._NOT_STARTED log.info("Cleared current_status")
def run_calculations(self, run_id: str) -> None: self._initialize() self._in_progress = True for x in self._dag_list: lst = self._dag_list[x] log.info(f"--> Start Step: {x}") self._run_steps(lst, run_id) log.info(f"--> End Step: {x}\n") self._in_progress = False
def run_r_ges_script() -> None: """ Author : David Powell Date : 07/06/2018 Purpose : Calls R Script to run GES Weighting Parameters : Returns : Writes GES output to SQL Database Requirements : NA Dependencies : NA """ log.info("Starting R script.....") step5 = resource_filename(__name__, 'r_scripts/step5.R') subprocess.call([ "Rscript", "--vanilla", step5, db.username, db.password, db.server, db.database ]) log.info("R process finished.")
def test_workflow(): endpoint = 'http://localhost:8000/ips-service/start/' + run_id log.info(f"Starting request... {endpoint}") r = requests.put(endpoint) assert (r.status_code == 200) status_endpoint = 'http://localhost:8000/ips-service/status/' + run_id done = False perc = 0 while not done: r = requests.get(status_endpoint) assert (r.status_code == 200) result = r.json() perc_done = result['percentage_done'] if perc_done != perc: log.info(f"Percentage Done: {perc_done}") perc = perc_done if perc_done == 100: done = True else: time.sleep(10)
def _step_11(self, run_id: str) -> None: log.info( f"Calculation 11, [rail_imputation_step], process id: {os.getpid()}" ) rail_imputation.rail_imputation_step(run_id)
def set_status(self, step: str, status: int) -> None: self._current_status[step] = status log.info(f"Step: {step}, status: {status}")
def _step_3(self, run_id: str) -> None: log.info( f"Calculation 3, [minimums_weight_step], process id: {os.getpid()}" ) minimums_weight.minimums_weight_step(run_id)
def _step_5(self, run_id: str) -> None: log.info( f"Calculation 5, [unsampled_weight_step], process id: {os.getpid()}" ) unsampled_weight.unsampled_weight_step(run_id)
def _step_4(self, run_id: str) -> None: log.info( f"Calculation 4, [traffic_weight_step], process id: {os.getpid()}") traffic_weight.traffic_weight_step(run_id)
def _step_14(self, run_id: str) -> None: log.info( f"Calculation 14, [airmiles.airmiles_step], process id: {os.getpid()}" ) airmiles.airmiles_step(run_id)
def _step_6(self, run_id: str) -> None: log.info( f"Calculation 6, [imbalance_weight_step], process id: {os.getpid()}" ) imbalance_weight.imbalance_weight_step(run_id)
def _step_2(self, run_id: str) -> None: log.info( f"Calculation 2, [non_response_weight_step], process id: {os.getpid()}" ) non_response_weight.non_response_weight_step(run_id)
def _step_13(self, run_id: str) -> None: log.info( f"Calculation 13, [town_stay_expenditure_imputation_step], process id: {os.getpid()}" ) town_stay_expenditure.town_stay_expenditure_imputation_step(run_id)
def compute_additional_spend(row): # Compute spend per person per visit # For package holidays, spend is imputed if the package cost is less # than the cost of the fares. If all relevant fields are 0, participant # is assumed to have spent no money. if row[PACKAGE_VARIABLE] == 1: if not row['DISCNT_PACKAGE_COST_PV']: row['DISCNT_PACKAGE_COST_PV'] = np.NaN if row[PACKAGE_COST_VARIABLE] == 0 and row[ EXPENDITURE_VARIABLE] == 0 and row[BEFAF_VARIABLE] == 0: row[SPEND_VARIABLE] = 0 elif (row[PACKAGE_COST_VARIABLE] == 999999 or row[PACKAGE_COST_VARIABLE] == np.nan or row[DISCOUNTED_PACKAGE_COST_VARIABLE] == np.nan or row[PERSONS_VARIABLE] == np.nan or row[OUTPUT_VARIABLE] == np.nan or row[EXPENDITURE_VARIABLE] == 999999 or row[EXPENDITURE_VARIABLE] == np.nan or row[BEFAF_VARIABLE] == np.nan or row[BEFAF_VARIABLE] == 999999): row[SPEND_VARIABLE] = np.nan elif (((row[DISCOUNTED_PACKAGE_COST_VARIABLE] + row[EXPENDITURE_VARIABLE] + row[BEFAF_VARIABLE]) / row[PERSONS_VARIABLE]) < (row[OUTPUT_VARIABLE] * 2)): log.info(row['SERIAL']) row[SPEND_VARIABLE] = np.nan row[SPEND_REASON_KEY_VARIABLE] = 1 else: row[SPEND_VARIABLE] = ( (row[DISCOUNTED_PACKAGE_COST_VARIABLE] + row[EXPENDITURE_VARIABLE] + row[BEFAF_VARIABLE]) / row[PERSONS_VARIABLE]) - (row[OUTPUT_VARIABLE] - 2) # DVPackage is 0 else: if row[OLD_PACKAGE_VARIABLE] == 9: row[SPEND_VARIABLE] = np.nan elif row[EXPENDITURE_VARIABLE] == 0 and row[BEFAF_VARIABLE] == 0: row[SPEND_VARIABLE] = 0 elif row[EXPENDITURE_VARIABLE] == 999999 or row[EXPENDITURE_VARIABLE] == np.nan \ or row[BEFAF_VARIABLE] == 999999 or row[BEFAF_VARIABLE] == np.nan \ or row[PERSONS_VARIABLE] == np.nan: row[SPEND_VARIABLE] = np.nan else: row[SPEND_VARIABLE] = (row[EXPENDITURE_VARIABLE] + row[BEFAF_VARIABLE]) / row[PERSONS_VARIABLE] if row[SPEND_VARIABLE] != np.nan: row[SPEND_VARIABLE] = row[SPEND_VARIABLE] + row[DUTY_FREE_VARIABLE] # Ensure the spend values are integers row[SPEND_VARIABLE] = round(row[SPEND_VARIABLE], 0) return row
def _step_10(self, run_id: str) -> None: log.info( f"Calculation 10, [spend_imputation_step], process id: {os.getpid()}" ) spend_imputation.spend_imputation_step(run_id)
def _step_9(self, run_id: str) -> None: log.info( f"Calculation 9, [fares_imputation_step], process id: {os.getpid()}" ) fares_imputation.fares_imputation_step(run_id)
def _step_8(self, run_id: str) -> None: log.info( f"Calculation 8, [stay_imputation_step], process id: {os.getpid()}" ) stay_imputation.stay_imputation_step(run_id)
def _step_7(self, run_id: str) -> None: log.info( f"Calculation 7, [final_weight_step], process id: {os.getpid()}") final_weight.final_weight_step(run_id)
def _step_12(self, run_id: str) -> None: log.info( f"Calculation 12, [regional_weights_step] process id: {os.getpid()}" ) regional_weights.regional_weights_step(run_id)
def _step_1(self, run_id: str) -> None: log.info( f"Calculation 1, [shift_weight calculation], process id: {os.getpid()}" ) shift_weight.shift_weight_step(run_id)