def import_reference_data():
    log.info("-> Start data load")

    import_traffic(file_type=CSVType.Sea,
                   run_id=run_id,
                   file_name=reference_data['Sea'])
    import_traffic(file_type=CSVType.Air,
                   run_id=run_id,
                   file_name=reference_data['Air'])
    import_traffic(file_type=CSVType.Tunnel,
                   run_id=run_id,
                   file_name=reference_data['Tunnel'])

    import_shift(file_type=CSVType.Shift,
                 run_id=run_id,
                 file_name=reference_data['Shift'])
    import_non_response(file_type=CSVType.NonResponse,
                        run_id=run_id,
                        file_name=reference_data['Non Response'])
    import_unsampled(file_type=CSVType.Unsampled,
                     run_id=run_id,
                     file_name=reference_data['Unsampled'])

    import_survey_file(survey_data_path=survey_data, run_id=run_id)

    log.info("-> End data load")
    def on_put(self, req: Request, resp: Response, run_id: str) -> None:
        # Start a run

        if self.workflow.in_progress():
            error = f"Can only run one instance of a workflow at a time, {run_id} rejected."
            log.error(error)
            raise falcon.HTTPError(falcon.HTTP_403, 'Concurrency Error', error)

        log.info("Starting calculations for RUN_ID: " + run_id)

        try:
            if not db.is_valid_run_id(run_id):
                result = {'status': "invalid job id: " + run_id}
                resp.status = falcon.HTTP_401
                resp.body = json.dumps(result)
                return

            thr = threading.Thread(target=self.workflow.run_calculations, args=(run_id,))

            thr.start()

            log.info(f"started job: {run_id}")

            result = {'status': "started job: " + run_id}
            resp.body = json.dumps(result)

        except ValueError:
            raise falcon.HTTPError(falcon.HTTP_400, 'Invalid JSON',
                                   'Could not decode the request body. The JSON was invalid.')
 def wrapper(self, run_id: str):
     if self.is_cancelled():
         self.set_status(func.__name__[1:], self._CANCELLED)
         log.info(
             f"Processing cancelled. Skipping step {func.__name__[1:]}")
     else:
         self.set_status(func.__name__[1:], self._IN_PROGRESS)
         func(self, run_id)
         self.set_status(func.__name__[1:], self._DONE)
    def _initialize(self) -> None:
        clear_memory_table("SURVEY_SUBSAMPLE")()
        clear_memory_table("SAS_SURVEY_SUBSAMPLE")()

        self._current_status = {}
        for x in range(14):
            self._current_status["step_" +
                                 str(x + 1)] = IPSWorkflow._NOT_STARTED
        log.info("Cleared current_status")
    def run_calculations(self, run_id: str) -> None:
        self._initialize()
        self._in_progress = True

        for x in self._dag_list:
            lst = self._dag_list[x]
            log.info(f"--> Start Step: {x}")
            self._run_steps(lst, run_id)
            log.info(f"--> End Step: {x}\n")

        self._in_progress = False
Beispiel #6
0
def run_r_ges_script() -> None:
    """
    Author       : David Powell
    Date         : 07/06/2018
    Purpose      : Calls R Script to run GES Weighting
    Parameters   :
    Returns      : Writes GES output to SQL Database
    Requirements : NA
    Dependencies : NA
    """

    log.info("Starting R script.....")

    step5 = resource_filename(__name__, 'r_scripts/step5.R')

    subprocess.call([
        "Rscript", "--vanilla", step5, db.username, db.password, db.server,
        db.database
    ])

    log.info("R process finished.")
def test_workflow():
    endpoint = 'http://localhost:8000/ips-service/start/' + run_id
    log.info(f"Starting request... {endpoint}")
    r = requests.put(endpoint)

    assert (r.status_code == 200)

    status_endpoint = 'http://localhost:8000/ips-service/status/' + run_id

    done = False
    perc = 0

    while not done:
        r = requests.get(status_endpoint)
        assert (r.status_code == 200)
        result = r.json()
        perc_done = result['percentage_done']
        if perc_done != perc:
            log.info(f"Percentage Done: {perc_done}")
            perc = perc_done
        if perc_done == 100:
            done = True
        else:
            time.sleep(10)
 def _step_11(self, run_id: str) -> None:
     log.info(
         f"Calculation 11, [rail_imputation_step], process id: {os.getpid()}"
     )
     rail_imputation.rail_imputation_step(run_id)
 def set_status(self, step: str, status: int) -> None:
     self._current_status[step] = status
     log.info(f"Step: {step}, status: {status}")
 def _step_3(self, run_id: str) -> None:
     log.info(
         f"Calculation 3, [minimums_weight_step], process id: {os.getpid()}"
     )
     minimums_weight.minimums_weight_step(run_id)
 def _step_5(self, run_id: str) -> None:
     log.info(
         f"Calculation 5, [unsampled_weight_step], process id: {os.getpid()}"
     )
     unsampled_weight.unsampled_weight_step(run_id)
 def _step_4(self, run_id: str) -> None:
     log.info(
         f"Calculation 4, [traffic_weight_step], process id: {os.getpid()}")
     traffic_weight.traffic_weight_step(run_id)
 def _step_14(self, run_id: str) -> None:
     log.info(
         f"Calculation 14, [airmiles.airmiles_step], process id: {os.getpid()}"
     )
     airmiles.airmiles_step(run_id)
 def _step_6(self, run_id: str) -> None:
     log.info(
         f"Calculation 6, [imbalance_weight_step], process id: {os.getpid()}"
     )
     imbalance_weight.imbalance_weight_step(run_id)
 def _step_2(self, run_id: str) -> None:
     log.info(
         f"Calculation 2, [non_response_weight_step], process id: {os.getpid()}"
     )
     non_response_weight.non_response_weight_step(run_id)
 def _step_13(self, run_id: str) -> None:
     log.info(
         f"Calculation 13, [town_stay_expenditure_imputation_step], process id: {os.getpid()}"
     )
     town_stay_expenditure.town_stay_expenditure_imputation_step(run_id)
def compute_additional_spend(row):
    # Compute spend per person per visit
    # For package holidays, spend is imputed if the package cost is less
    # than the cost of the fares. If all relevant fields are 0, participant
    # is assumed to have spent no money.
    if row[PACKAGE_VARIABLE] == 1:
        if not row['DISCNT_PACKAGE_COST_PV']:
            row['DISCNT_PACKAGE_COST_PV'] = np.NaN

        if row[PACKAGE_COST_VARIABLE] == 0 and row[
                EXPENDITURE_VARIABLE] == 0 and row[BEFAF_VARIABLE] == 0:
            row[SPEND_VARIABLE] = 0

        elif (row[PACKAGE_COST_VARIABLE] == 999999
              or row[PACKAGE_COST_VARIABLE] == np.nan
              or row[DISCOUNTED_PACKAGE_COST_VARIABLE] == np.nan
              or row[PERSONS_VARIABLE] == np.nan
              or row[OUTPUT_VARIABLE] == np.nan
              or row[EXPENDITURE_VARIABLE] == 999999
              or row[EXPENDITURE_VARIABLE] == np.nan
              or row[BEFAF_VARIABLE] == np.nan
              or row[BEFAF_VARIABLE] == 999999):
            row[SPEND_VARIABLE] = np.nan

        elif (((row[DISCOUNTED_PACKAGE_COST_VARIABLE] +
                row[EXPENDITURE_VARIABLE] + row[BEFAF_VARIABLE]) /
               row[PERSONS_VARIABLE]) < (row[OUTPUT_VARIABLE] * 2)):
            log.info(row['SERIAL'])
            row[SPEND_VARIABLE] = np.nan
            row[SPEND_REASON_KEY_VARIABLE] = 1

        else:
            row[SPEND_VARIABLE] = (
                (row[DISCOUNTED_PACKAGE_COST_VARIABLE] +
                 row[EXPENDITURE_VARIABLE] + row[BEFAF_VARIABLE]) /
                row[PERSONS_VARIABLE]) - (row[OUTPUT_VARIABLE] - 2)

    # DVPackage is 0
    else:
        if row[OLD_PACKAGE_VARIABLE] == 9:
            row[SPEND_VARIABLE] = np.nan

        elif row[EXPENDITURE_VARIABLE] == 0 and row[BEFAF_VARIABLE] == 0:
            row[SPEND_VARIABLE] = 0

        elif row[EXPENDITURE_VARIABLE] == 999999 or row[EXPENDITURE_VARIABLE] == np.nan \
                or row[BEFAF_VARIABLE] == 999999 or row[BEFAF_VARIABLE] == np.nan \
                or row[PERSONS_VARIABLE] == np.nan:
            row[SPEND_VARIABLE] = np.nan

        else:
            row[SPEND_VARIABLE] = (row[EXPENDITURE_VARIABLE] +
                                   row[BEFAF_VARIABLE]) / row[PERSONS_VARIABLE]

    if row[SPEND_VARIABLE] != np.nan:
        row[SPEND_VARIABLE] = row[SPEND_VARIABLE] + row[DUTY_FREE_VARIABLE]

    # Ensure the spend values are integers
    row[SPEND_VARIABLE] = round(row[SPEND_VARIABLE], 0)

    return row
 def _step_10(self, run_id: str) -> None:
     log.info(
         f"Calculation 10, [spend_imputation_step], process id: {os.getpid()}"
     )
     spend_imputation.spend_imputation_step(run_id)
 def _step_9(self, run_id: str) -> None:
     log.info(
         f"Calculation 9, [fares_imputation_step], process id: {os.getpid()}"
     )
     fares_imputation.fares_imputation_step(run_id)
 def _step_8(self, run_id: str) -> None:
     log.info(
         f"Calculation 8, [stay_imputation_step], process id: {os.getpid()}"
     )
     stay_imputation.stay_imputation_step(run_id)
 def _step_7(self, run_id: str) -> None:
     log.info(
         f"Calculation 7,  [final_weight_step], process id: {os.getpid()}")
     final_weight.final_weight_step(run_id)
 def _step_12(self, run_id: str) -> None:
     log.info(
         f"Calculation 12, [regional_weights_step] process id: {os.getpid()}"
     )
     regional_weights.regional_weights_step(run_id)
 def _step_1(self, run_id: str) -> None:
     log.info(
         f"Calculation 1, [shift_weight calculation], process id: {os.getpid()}"
     )
     shift_weight.shift_weight_step(run_id)