from task_builder import TaskBuilder from airflow.models import DAG from datetime import datetime dag_id = 'kde_sample' tasks = '''{ init_time:"-1d", export_train:{command:"ssh [email protected] /home/web_admin/log-quality/export_from_hive.sh bnsmkr completedmainquestcnt %Y%m%d train 10000 false"}, export_eval:{command:"ssh [email protected] /home/web_admin/log-quality/export_from_hive.sh bnsmkr completedmainquestcnt %Y%m%d eval 100 false", upstream:[export_train]}, train:{command:"ssh [email protected] /home/web_admin/log-quality/train_on_spark.sh bnsmkr completedmainquestcnt %Y%m%d simple_kde_app 10", upstream:[export_eval]}, export_test:{command:"ssh [email protected] /home/web_admin/log-quality/export_from_hive.sh bnsmkr completedmainquestcnt %Y%m%d test -1 false", upstream:[train]}, predict:{command:"ssh [email protected] /home/web_admin/log-quality/predict_on_spark.sh bnsmkr completedmainquestcnt %Y%m%d 10", upstream:[export_test]}, report:{command:"ssh [email protected] /home/web_admin/log-quality/report.sh hdfs://datalake/lqad bnsmkr completedmainquestcnt %Y%m%d 10 -1", upstream:[predict]} }''' default_args = {'start_date': datetime(2019, 2, 5)} dag = DAG(dag_id, schedule_interval='0 0 * * *', concurrency=5, max_active_runs=2, default_args=default_args) TaskBuilder().set(dag, tasks).build_tasks()
def add_task(self, raw_task): self._tasks.append(TaskBuilder.build_temperature_task(self._settings, raw_task))
def update_task(self, raw_tasks): old_task = TaskBuilder.build_temperature_task(self._settings, raw_tasks[0]) new_task = TaskBuilder.build_temperature_task(self._settings, raw_tasks[1]) self._tasks = [ new_task if x == old_task else x for x in self._tasks ]
def remove_task(self, raw_task): self._tasks.remove(TaskBuilder.build_temperature_task(self._settings, raw_task))
batch_size_eval=args.batch_size_eval, learning_rate=args.LR, epsilon=args.EPS, weight_decay=args.WD, early_stopping_patience=args.early_stopping_patience, num_epochs=args.num_epochs, num_fine_tuning_epochs=args.num_fine_tuning_epochs, best_metric=args.best_metric, zero_shot_label=args.zero_shot_label, random_state=args.random_state, cpu=args.cpu) logger = NeptuneLogger(args.neptune_username) logger.create_experiment(PARAMS) task_builder = TaskBuilder(random_state=PARAMS.random_state) task_dict = task_builder.build_tasks(dataset_list, PARAMS) run_type = "multi_task" if len(dataset_list) > 1 else "single_task" # Create the test_task_dict for zero-shot evaluation if we have been supplied test tasks if len(test_dataset_list) > 0: # If we already have a task created in the task_dict, it makes sense to just copy that task into the test_task_dict instead of creating a new task (saves on memory) already_created_tasks = [ x for x in test_dataset_list if x in task_dict.keys() ] not_already_created_tasks = [ x for x in test_dataset_list if x not in task_dict.keys() ] test_task_dict = task_builder.build_tasks(not_already_created_tasks,