from sqlalchemy import MetaData from sqlalchemy.engine import RowProxy from sqlalchemy.exc import DatabaseError import luigi from etl_tasks import (SqlScriptTask, UploadTask, SourceTask, DBAccessTask, DBTarget, LoggedConnection, SchemaTarget, I2B2Task, TimeStampParameter) from param_val import StrParam, IntParam import param_val as pv from script_lib import Script import script_lib as lib from sql_syntax import Environment, Params log = logging.getLogger(__name__) TimeStampParam = pv._valueOf(datetime(2001, 1, 1, 0, 0, 0), TimeStampParameter) class CMSExtract(SourceTask, DBAccessTask): download_date = TimeStampParam(description='see client.cfg') cms_rif = StrParam(description='see client.cfg') script_variable = 'cms_source_cd' # ISSUE: separate source_cd for yr1-3 vs 4-5? see MappingReset source_cd = "'ccwdata.org'" table_eg = 'pde' def _dbtarget(self) -> DBTarget: return SchemaTarget(self._make_url(self.account),
from datetime import datetime from typing import Iterable, List from sqlalchemy.exc import DatabaseError import luigi import pandas as pd # type: ignore from script_lib import Script from sql_syntax import Environment, Params import cms_etl import cms_i2p import cms_pd as rif_etl import etl_tasks as et import param_val as pv DateParam = pv._valueOf(datetime(2001, 1, 1, 0, 0, 0), luigi.DateParameter) ListParam = pv._valueOf(['abc'], luigi.ListParameter) SourceTaskParam = pv._valueOf(et.SourceTask(), luigi.TaskParameter) class CohortDatamart(cms_etl.FromCMS, et.UploadTask): site_star_list = ListParam(description='DATA_KUMC,DATA_MCW,...') script = Script.cohort_i2b2_datamart def requires(self) -> List[luigi.Task]: return [just_task for just_task in self._cohort_tasks()] def _cohort_tasks(self) -> List['BuildCohort']: return [ BuildCohort(site_star_schema=star_schema) for star_schema in self.site_star_list