def createTransferLearningDAG(params):
    today = datetime,now().strftime("%Y%m%d")
    print("Creating Transfer Learning DAG for ", today)

    default_cfg = master_env.get('default', {})
    env_cfg = master_env.get(params_get('env', 'default'), {})
    env = dict_merge(default_cfg, env_cfg)

    default_args = {
        'owner': 'Dr Cadx',
        'depends_on_past':  False,
        'start_date': datetime(2020,1,1),
        'email': cfg.get('email', ''),
        'email_on_failure': True,
        'email_on_retry': True,
        'retries': cfg.get('airflow_retries', 1),
        'retry_delay': timedelta(minutes=5),
        'provide_context':True
    }

    dag_name = 'Transfer_Learning_Modeling'
    print('Initialize DAG: {}'.format(dag_name))

    dag = DAG(dag_name,
              default_args=default_args)

    dag.catchup = False

    print('Retrieve The Transfer Learning Tasks')
    dag_tasks =  createTransferLearningTasks(params, default_args)

    print('Sequencing the Transfer Learning Tasks')
    dag >> dag_tasks.transfer_learning
    # >> dag_tasks.validation

    return dag
    

    params = {
        'model_name': Variable,get("model_name"),
        'train_path': Variable,get("train_path"),
        'test_path': Variable,get("test_path"),
        'val_path': Variable,get("val_path")
    }

    dag =  createTransferLearningDAG(params)      
예제 #2
0
 def create_dag(self,
                dag_id,
                schedule_interval,
                catchup=None,
                is_paused_upon_creation=None,
                default_args={},
                owner='Dagen',
                **options):
     default_args['owner'] = owner
     default_args['pool'] = options.get('pool',
                                        'default_pool') or 'default_pool'
     dag = DAG(
         dag_id,
         default_args=default_args,
         schedule_interval=schedule_interval,
         is_paused_upon_creation=is_paused_upon_creation,
     )
     max_active_runs = options.get('max_active_runs', None)
     if max_active_runs is not None:
         dag.max_active_runs = max_active_runs
     if catchup is not None:
         dag.catchup = catchup
     return dag
예제 #3
0
def load_target_db(**kwargs):
    logging.info("Loading to target db ...")


# Create DAG
dag = DAG(
    'skip_past',
    description='Tests bash,operator,email cases',
    start_date=datetime(2017, 11, 8, 23, 31, 00)
    #,start_date=datetime.now()+timedelta(minutes=-20)
    ,
    schedule_interval="*/5 * * * *"
    #,default_args=default_args)
)
dag.catchup = False


def get_num_active_dagruns(dag_id, conn_id='airflow_db'):
    #     airflow_db = PostgresHook(postgres_conn_id=conn_id)
    #     conn = airflow_db.get_conn()
    #     cursor = conn.cursor()
    #     sql = """
    # select count(*)
    # from public.dag_run
    # where dag_id = '{dag_id}'
    #   and state in ('running', 'queued', 'up_for_retry')
    # """.format(dag_id=dag_id)
    #     cursor.execute(sql)
    #     num_active_dagruns = cursor.fetchone()[0]
    #return num_active_dagruns
예제 #4
0
dest_table_countries = "umg-swift.swift_alerts.trending_tracks_countries"
dest_table_regions = "umg-swift.swift_alerts.trending_tracks_regions"

sql_wait = "SELECT * FROM `" + aux_table + "` WHERE _PARTITIONTIME='" + today_prime + "' LIMIT 2"
print("wait_sql=" + sql_wait)

#declare DAG
dagvelocitydaily = DAG(
    'velocity_queries_daily',
    description='Builds velocity daily by daily increments',
    start_date=datetime(2017, 12, 14, 0, 0, 0),
    schedule_interval="30 15 * * *"
    #,schedule_interval = "None"
    ,
    default_args=default_args)
dagvelocitydaily.catchup = False

task_streams_daily_velocity_base = VelocityStreamByDateOperator(
    task_id="create_release_table_daily",
    #schema_out = schema_out1,
    schema_out=None,
    sql=sql1.replace("@datePartition", 'date("{datePartition}")'),
    destination_table=aux_table,
    start_date=yesterday,
    end_date=today,
    dag=dagvelocitydaily)

#task_sleep = BashOperator(
#    task_id='sleep',
#    bash_command='sleep 1200',
#    dag=dagvelocitydaily
예제 #5
0
    {'name': 'lean_back_stream_count','type': 'INTEGER'},
    {'name': 'users','type': 'INTEGER'},
    {'name': 'users_isrc_day_sos','type': 'INTEGER'},
    {'name': 'users_isrc_day_lf','type': 'INTEGER'},
    {'name': 'users_isrc_day','type': 'INTEGER'},
    {'name': 'load_datetime','type': 'TIMESTAMP'}
]

#declare DAG
dagarchivedaily = DAG('archive_first_year_daily'
              ,description='Builds archive by daily increments'
              ,start_date=datetime(2017, 12, 20, 0, 0, 0)
              #,start_date=datetime.now()+timedelta(hours=-2)
              ,schedule_interval = "0 16 * * *"
              ,default_args=default_args)
dagarchivedaily.catchup=False

task_release_daily = CreateReleaseOperator(
   task_id="create_release_table_daily",
   sql = sql1,
   destination_table="umg-dev.swift_alerts.isrc_first_stream_date",
   dag=dagarchivedaily
)

# task_streams_daily = StreamByDateOperator(
#     task_id="create_stream_by_date_daily",
#     sql = sql2,
#     destination_table="umg-dev.swift_alerts.track_archive_by_stream_date",
#     #start_date="2017-10-26",
#     start_date=str(datetime.now(pytz.utc)+timedelta(days=-1))[:10],   # for daily processing - run yesterday only
#     dag=dagarchivedaily
예제 #6
0
    + ' --input_file ' + inputfile + "" \
    + ' --temp_directory '+temp_directory +'' \
    + ' --schema track_history_schema.txt'

#loadGCStoBQ = "echo loadGCStoBQ is running"

loadGCStoBQ = loadGCStoBQ_beginning + loadGCStoBQ + t1nn_ending
print loadGCStoBQ

dagTask = DAG('swift_trends_subscriptions',
              default_args=default_args,
              schedule_interval=schedule,
              start_date=datetime(2017, 11, 01, 0, 0, 0)
              #,schedule_interval=timedelta(days=1)
              )
dagTask.catchup = False
# t1,t2 and t3 are tasks created by instantiating operators
t11 = BashOperator(
    task_id='build_email_table_mongodb',
    #bash_command='java -cp /opt/app/swift-subscriptions/track-alerts/SwiftTrendSubscriptions-0.1.jar TrackAction.TrackActionSubscription --executionDateTest 2017-09-21 --project umg-dev --runner DataflowRunner --mongodb',
    bash_command=step1,
    dag=dagTask)

t12 = BashOperator(
    task_id='build_email_table_enrichment',
    #bash_command='java -cp /opt/app/swift-subscriptions/track-alerts/SwiftTrendSubscriptions-0.1.jar TrackAction.TrackActionSubscription --executionDateTest 2017-09-21 --project umg-dev --runner DataflowRunner --enrichment',
    bash_command=step2,
    dag=dagTask)

t13 = BashOperator(
    task_id='build_email_table_major_sql',
예제 #7
0
# DAG configuration
#start_date = datetime(2017, 7, 12, 13, 0, 0, tzinfo=pytz.utc)

default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'schedule_interval': None,
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 0,
    'retry_delay': timedelta(minutes=5)
}

# declare DAG
dagtest = DAG(
    'test_bash_operator',
    description='Tests bash,operator,email cases',
    start_date=datetime(2017, 11, 03, 0, 10, 0)
    #,start_date=datetime.now()+timedelta(hours=-2)
    ,
    schedule_interval="*/5 * * * *",
    default_args=default_args)
dagtest.catchup = False
dagtest.catchup_by_default = False

t11 = BashOperator(task_id='catchup_control',
                   bash_command="echo AAAAAAAAAAAAA BBBBBBBBBB CCCCCCCCCC " +
                   str(datetime.now()),
                   dag=dagtest)