Example #1
0
from task_builder import TaskBuilder
from airflow.models import DAG
from datetime import datetime

dag_id = 'kde_sample'

tasks = '''{

init_time:"-1d",
export_train:{command:"ssh [email protected] /home/web_admin/log-quality/export_from_hive.sh bnsmkr completedmainquestcnt %Y%m%d train 10000 false"},
export_eval:{command:"ssh [email protected] /home/web_admin/log-quality/export_from_hive.sh bnsmkr completedmainquestcnt %Y%m%d eval 100 false", upstream:[export_train]},
train:{command:"ssh [email protected] /home/web_admin/log-quality/train_on_spark.sh bnsmkr completedmainquestcnt %Y%m%d simple_kde_app 10", upstream:[export_eval]},

export_test:{command:"ssh [email protected] /home/web_admin/log-quality/export_from_hive.sh bnsmkr completedmainquestcnt %Y%m%d test -1 false", upstream:[train]},
predict:{command:"ssh [email protected] /home/web_admin/log-quality/predict_on_spark.sh bnsmkr completedmainquestcnt %Y%m%d 10", upstream:[export_test]},
report:{command:"ssh [email protected] /home/web_admin/log-quality/report.sh hdfs://datalake/lqad bnsmkr completedmainquestcnt %Y%m%d 10 -1", upstream:[predict]}

}'''

default_args = {'start_date': datetime(2019, 2, 5)}
dag = DAG(dag_id,
          schedule_interval='0 0 * * *',
          concurrency=5,
          max_active_runs=2,
          default_args=default_args)
TaskBuilder().set(dag, tasks).build_tasks()
Example #2
0
 def add_task(self, raw_task):
     self._tasks.append(TaskBuilder.build_temperature_task(self._settings, raw_task))
Example #3
0
 def update_task(self, raw_tasks):
     old_task = TaskBuilder.build_temperature_task(self._settings, raw_tasks[0])
     new_task = TaskBuilder.build_temperature_task(self._settings, raw_tasks[1])
     self._tasks = [ new_task if x == old_task else x for x in self._tasks ]
Example #4
0
 def remove_task(self, raw_task):
     self._tasks.remove(TaskBuilder.build_temperature_task(self._settings, raw_task))
Example #5
0
                    batch_size_eval=args.batch_size_eval,
                    learning_rate=args.LR,
                    epsilon=args.EPS,
                    weight_decay=args.WD,
                    early_stopping_patience=args.early_stopping_patience,
                    num_epochs=args.num_epochs,
                    num_fine_tuning_epochs=args.num_fine_tuning_epochs,
                    best_metric=args.best_metric,
                    zero_shot_label=args.zero_shot_label,
                    random_state=args.random_state,
                    cpu=args.cpu)

logger = NeptuneLogger(args.neptune_username)
logger.create_experiment(PARAMS)

task_builder = TaskBuilder(random_state=PARAMS.random_state)

task_dict = task_builder.build_tasks(dataset_list, PARAMS)

run_type = "multi_task" if len(dataset_list) > 1 else "single_task"

# Create the test_task_dict for zero-shot evaluation if we have been supplied test tasks
if len(test_dataset_list) > 0:
    # If we already have a task created in the task_dict, it makes sense to just copy that task into the test_task_dict instead of creating a new task (saves on memory)
    already_created_tasks = [
        x for x in test_dataset_list if x in task_dict.keys()
    ]
    not_already_created_tasks = [
        x for x in test_dataset_list if x not in task_dict.keys()
    ]
    test_task_dict = task_builder.build_tasks(not_already_created_tasks,