def createTransferLearningDAG(params): today = datetime,now().strftime("%Y%m%d") print("Creating Transfer Learning DAG for ", today) default_cfg = master_env.get('default', {}) env_cfg = master_env.get(params_get('env', 'default'), {}) env = dict_merge(default_cfg, env_cfg) default_args = { 'owner': 'Dr Cadx', 'depends_on_past': False, 'start_date': datetime(2020,1,1), 'email': cfg.get('email', ''), 'email_on_failure': True, 'email_on_retry': True, 'retries': cfg.get('airflow_retries', 1), 'retry_delay': timedelta(minutes=5), 'provide_context':True } dag_name = 'Transfer_Learning_Modeling' print('Initialize DAG: {}'.format(dag_name)) dag = DAG(dag_name, default_args=default_args) dag.catchup = False print('Retrieve The Transfer Learning Tasks') dag_tasks = createTransferLearningTasks(params, default_args) print('Sequencing the Transfer Learning Tasks') dag >> dag_tasks.transfer_learning # >> dag_tasks.validation return dag params = { 'model_name': Variable,get("model_name"), 'train_path': Variable,get("train_path"), 'test_path': Variable,get("test_path"), 'val_path': Variable,get("val_path") } dag = createTransferLearningDAG(params)
def create_dag(self, dag_id, schedule_interval, catchup=None, is_paused_upon_creation=None, default_args={}, owner='Dagen', **options): default_args['owner'] = owner default_args['pool'] = options.get('pool', 'default_pool') or 'default_pool' dag = DAG( dag_id, default_args=default_args, schedule_interval=schedule_interval, is_paused_upon_creation=is_paused_upon_creation, ) max_active_runs = options.get('max_active_runs', None) if max_active_runs is not None: dag.max_active_runs = max_active_runs if catchup is not None: dag.catchup = catchup return dag
def load_target_db(**kwargs): logging.info("Loading to target db ...") # Create DAG dag = DAG( 'skip_past', description='Tests bash,operator,email cases', start_date=datetime(2017, 11, 8, 23, 31, 00) #,start_date=datetime.now()+timedelta(minutes=-20) , schedule_interval="*/5 * * * *" #,default_args=default_args) ) dag.catchup = False def get_num_active_dagruns(dag_id, conn_id='airflow_db'): # airflow_db = PostgresHook(postgres_conn_id=conn_id) # conn = airflow_db.get_conn() # cursor = conn.cursor() # sql = """ # select count(*) # from public.dag_run # where dag_id = '{dag_id}' # and state in ('running', 'queued', 'up_for_retry') # """.format(dag_id=dag_id) # cursor.execute(sql) # num_active_dagruns = cursor.fetchone()[0] #return num_active_dagruns
dest_table_countries = "umg-swift.swift_alerts.trending_tracks_countries" dest_table_regions = "umg-swift.swift_alerts.trending_tracks_regions" sql_wait = "SELECT * FROM `" + aux_table + "` WHERE _PARTITIONTIME='" + today_prime + "' LIMIT 2" print("wait_sql=" + sql_wait) #declare DAG dagvelocitydaily = DAG( 'velocity_queries_daily', description='Builds velocity daily by daily increments', start_date=datetime(2017, 12, 14, 0, 0, 0), schedule_interval="30 15 * * *" #,schedule_interval = "None" , default_args=default_args) dagvelocitydaily.catchup = False task_streams_daily_velocity_base = VelocityStreamByDateOperator( task_id="create_release_table_daily", #schema_out = schema_out1, schema_out=None, sql=sql1.replace("@datePartition", 'date("{datePartition}")'), destination_table=aux_table, start_date=yesterday, end_date=today, dag=dagvelocitydaily) #task_sleep = BashOperator( # task_id='sleep', # bash_command='sleep 1200', # dag=dagvelocitydaily
{'name': 'lean_back_stream_count','type': 'INTEGER'}, {'name': 'users','type': 'INTEGER'}, {'name': 'users_isrc_day_sos','type': 'INTEGER'}, {'name': 'users_isrc_day_lf','type': 'INTEGER'}, {'name': 'users_isrc_day','type': 'INTEGER'}, {'name': 'load_datetime','type': 'TIMESTAMP'} ] #declare DAG dagarchivedaily = DAG('archive_first_year_daily' ,description='Builds archive by daily increments' ,start_date=datetime(2017, 12, 20, 0, 0, 0) #,start_date=datetime.now()+timedelta(hours=-2) ,schedule_interval = "0 16 * * *" ,default_args=default_args) dagarchivedaily.catchup=False task_release_daily = CreateReleaseOperator( task_id="create_release_table_daily", sql = sql1, destination_table="umg-dev.swift_alerts.isrc_first_stream_date", dag=dagarchivedaily ) # task_streams_daily = StreamByDateOperator( # task_id="create_stream_by_date_daily", # sql = sql2, # destination_table="umg-dev.swift_alerts.track_archive_by_stream_date", # #start_date="2017-10-26", # start_date=str(datetime.now(pytz.utc)+timedelta(days=-1))[:10], # for daily processing - run yesterday only # dag=dagarchivedaily
+ ' --input_file ' + inputfile + "" \ + ' --temp_directory '+temp_directory +'' \ + ' --schema track_history_schema.txt' #loadGCStoBQ = "echo loadGCStoBQ is running" loadGCStoBQ = loadGCStoBQ_beginning + loadGCStoBQ + t1nn_ending print loadGCStoBQ dagTask = DAG('swift_trends_subscriptions', default_args=default_args, schedule_interval=schedule, start_date=datetime(2017, 11, 01, 0, 0, 0) #,schedule_interval=timedelta(days=1) ) dagTask.catchup = False # t1,t2 and t3 are tasks created by instantiating operators t11 = BashOperator( task_id='build_email_table_mongodb', #bash_command='java -cp /opt/app/swift-subscriptions/track-alerts/SwiftTrendSubscriptions-0.1.jar TrackAction.TrackActionSubscription --executionDateTest 2017-09-21 --project umg-dev --runner DataflowRunner --mongodb', bash_command=step1, dag=dagTask) t12 = BashOperator( task_id='build_email_table_enrichment', #bash_command='java -cp /opt/app/swift-subscriptions/track-alerts/SwiftTrendSubscriptions-0.1.jar TrackAction.TrackActionSubscription --executionDateTest 2017-09-21 --project umg-dev --runner DataflowRunner --enrichment', bash_command=step2, dag=dagTask) t13 = BashOperator( task_id='build_email_table_major_sql',
# DAG configuration #start_date = datetime(2017, 7, 12, 13, 0, 0, tzinfo=pytz.utc) default_args = { 'owner': 'airflow', 'depends_on_past': False, 'schedule_interval': None, 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 0, 'retry_delay': timedelta(minutes=5) } # declare DAG dagtest = DAG( 'test_bash_operator', description='Tests bash,operator,email cases', start_date=datetime(2017, 11, 03, 0, 10, 0) #,start_date=datetime.now()+timedelta(hours=-2) , schedule_interval="*/5 * * * *", default_args=default_args) dagtest.catchup = False dagtest.catchup_by_default = False t11 = BashOperator(task_id='catchup_control', bash_command="echo AAAAAAAAAAAAA BBBBBBBBBB CCCCCCCCCC " + str(datetime.now()), dag=dagtest)