コード例 #1
0
def test_resource_static_path(setup_output_path):
    out_dir = get_out_dir(cfg_name=cfg_name)
    dag = DAG('test_stat',
              default_args=default_args,
              schedule_interval=timedelta(days=1))

    def callable1_dummy_text_read(tp, in_files, out_files, *op_args,
                                  **op_kwargs):
        with open(in_files['test_file'].path, 'r') as file:
            assert file.readline(
            ) == 'test_data file content', 'Invalid file content!'
            return 'succeeded'
        return 'failed'

    # Creating an input file
    with open(osp.join(out_dir, 'test_data.txt'), 'w') as file:
        file.write("test_data file content")

    input_files = {
        'test_file':
        ResourcePathStatic(path=osp.join(out_dir, 'test_data.txt'))
    }

    read_input_file = PythonPersistentOperator(
        task_id='read_input_file',
        force_execution=True,
        python_callable=callable1_dummy_text_read,
        input_files=input_files,
        dag=dag,
        cfg_name=cfg_name)

    ti = TaskInstance(task=read_input_file, execution_date=datetime.now())

    result = read_input_file.execute(ti.get_template_context())
    assert result == 'succeeded'
コード例 #2
0
def get_create_interval_metrics(dag, cfg_name, force_exec=False):
    task_id = 'create_interval_metrics'
    return PythonPersistentOperator(
        task_id=task_id,
        force_execution=force_exec,
        python_callable=create_interval_metrics,
        ppo_kwargs={
            'interval_width': (parser.getint(cfg_name,
                                             'test_interval_width'), HASH_IT),
            'title': (cfg_name, HASH_IT),
        },
        input_files={
            'prediction_df':
            ResourcePathById(cfg_name=cfg_name,
                             origin_task_id='predict',
                             origin_resource_id='prediction_df')
        },
        output_files={
            'grid_png':
            ResourcePathOutput(cfg_name=cfg_name,
                               task_id=task_id,
                               resource_filename=f"grid_{cfg_name}.png"),
            'metrics_summary_file':
            ResourcePathOutput(
                cfg_name=cfg_name,
                task_id=task_id,
                resource_filename=f"metrics_summary_file_{cfg_name}.txt"),
        },
        dag=dag,
        cfg_name=cfg_name)
コード例 #3
0
def get_create_interval_metrics_tabular_xgboost(dag,
                                                cfg_name,
                                                force_exec=False):
    # Exact same than get_create_interval_metrics,
    # except the input files comes from another task... handle file in-out from higher level?
    task_id = 'create_interval_metrics_tabular_xgboost'
    return PythonPersistentOperator(
        task_id=task_id,
        force_execution=force_exec,
        python_callable=create_interval_metrics,
        ppo_kwargs={
            'interval_width': (parser.getint(cfg_name,
                                             'interval_width'), HASH_IT),
            'title': (cfg_name, HASH_IT),
        },
        input_files={
            'prediction_df':
            ResourcePathById(cfg_name=cfg_name,
                             origin_task_id='fit_predict_xgboost',
                             origin_resource_id='df_interval_predictions')
        },
        output_files={
            'grid_png':
            ResourcePathOutput(cfg_name=cfg_name,
                               task_id=task_id,
                               resource_filename=f"grid_{cfg_name}.png"),
            'metrics_summary_file':
            ResourcePathOutput(
                cfg_name=cfg_name,
                task_id=task_id,
                resource_filename=f"metrics_summary_file_{cfg_name}.txt"),
        },
        dag=dag,
        cfg_name=cfg_name)
コード例 #4
0
def get_create_inference_dataset(dag, cfg_name, force_exec=False):
    """
        create_inference_dataset
    """
    task_id = 'create_inference_dataset'

    return PythonPersistentOperator(
        task_id=task_id,
        force_execution=force_exec,
        python_callable=create_dataset,
        ppo_kwargs={
            'start': (parser.getint(cfg_name, 'test_start'), HASH_IT),
            'end': (parser.getint(cfg_name, 'test_end'), HASH_IT),
            'interval_width': (parser.getint(cfg_name,
                                             'test_interval_width'), HASH_IT),
            'interval_overlap':
            (parser.getint(cfg_name, 'test_interval_overlap'), HASH_IT),
            'graph_representation':
            (parser.get(cfg_name, 'graph_representation'), HASH_IT),
            'feature_extractor': (parser.get(cfg_name,
                                             'feature_extractor'), HASH_IT)
        },
        input_files={
            'raw_file':
            ResourcePathStatic(path=parser.get(cfg_name, 'raw_file'))
        },
        output_files=get_inference_dataset_output_files(cfg_name),
        dag=dag,
        cfg_name=cfg_name)
コード例 #5
0
def get_fit_predict_random_forest_also(dag, cfg_name, force_exec=False):
    task_id = 'fit_predict_random_forest_ALSO'

    cont_variables = parser.get(cfg_name, 'cont_variables').splitlines()

    return PythonPersistentOperator(
        task_id=task_id,
        force_execution=force_exec,
        python_callable=fit_predict_random_forest_also,
        ppo_kwargs={
            'train_start': (parser.getint(cfg_name, 'train_start'), HASH_IT),
            'train_end': (parser.getint(cfg_name, 'train_end'), HASH_IT),
            'test_start': (parser.getint(cfg_name, 'test_start'), HASH_IT),
            'test_end': (parser.getint(cfg_name, 'test_end'), HASH_IT),
            'interval_width': (parser.getint(cfg_name,
                                             'interval_width'), HASH_IT),
            'cont_variables': (cont_variables, HASH_IT),
            'mean_scaling_threshold':
            (parser.getfloat(cfg_name, 'mean_scaling_threshold'), HASH_IT),
            'random_forest_max_depth':
            (parser.getint(cfg_name, 'random_forest_max_depth'), HASH_IT),
            'random_forest_random_state':
            (parser.getint(cfg_name, 'random_forest_random_state'), HASH_IT),
            'random_forest_n_estimators':
            (parser.getint(cfg_name, 'random_forest_n_estimators'), HASH_IT),
            'folds': (parser.getint(cfg_name, 'folds'), HASH_IT),
            'samples_training_ratio':
            (parser.getfloat(cfg_name, 'samples_training_ratio'), HASH_IT),
            'model_type': (parser.get(cfg_name, 'model_type'), HASH_IT),
        },
        input_files={
            'raw_file':
            ResourcePathStatic(path=parser.get(cfg_name, 'raw_file')),
            'features_file':
            ResourcePathStatic(path=parser.get(cfg_name, 'features_file'))
        },
        output_files={
            'df_also_predicted_scores':
            ResourcePathOutput(
                cfg_name=cfg_name,
                task_id=task_id,
                resource_filename='df_also_predicted_scores.h5'),
            'df_all_predictions':
            ResourcePathOutput(cfg_name=cfg_name,
                               task_id=task_id,
                               resource_filename='df_all_predictions.h5'),
            'df_interval_predictions':
            ResourcePathOutput(cfg_name=cfg_name,
                               task_id=task_id,
                               resource_filename='df_interval_predictions.h5'),
            'row_based_metrics':
            ResourcePathOutput(
                cfg_name=cfg_name,
                task_id=task_id,
                resource_filename=
                f'row_based_predictions_metrics_{cfg_name}.txt'),
        },
        dag=dag,
        cfg_name=cfg_name)
コード例 #6
0
def get_fit_predict_local_outlier_factor(dag,
                                         cfg_name,
                                         force_exec=False,
                                         use_smote=False):
    task_id = 'fit_predict_local_outlier_factor'

    cont_variables = parser.get(cfg_name, 'cont_variables').splitlines()
    cat_variables = parser.get(cfg_name, 'cont_variables').splitlines()

    smote_random_state = parser.getint(
        cfg_name, 'smote_random_state') if use_smote is True else None

    return PythonPersistentOperator(
        task_id=task_id,
        force_execution=force_exec,
        python_callable=fit_predict_local_outlier_factor,
        ppo_kwargs={
            'train_start': (parser.getint(cfg_name, 'train_start'), HASH_IT),
            'train_end': (parser.getint(cfg_name, 'train_end'), HASH_IT),
            'test_start': (parser.getint(cfg_name, 'test_start'), HASH_IT),
            'test_end': (parser.getint(cfg_name, 'test_end'), HASH_IT),
            'interval_width': (parser.getint(cfg_name,
                                             'interval_width'), HASH_IT),
            'cont_variables': (cont_variables, HASH_IT),
            'cat_variables': (cat_variables, HASH_IT),
            'n_neighbors': (parser.getint(cfg_name, 'n_neighbors'), HASH_IT),
            'contamination': (parser.getfloat(cfg_name,
                                              'contamination'), HASH_IT),
            'use_smote': (use_smote, HASH_IT),
            'smote_random_state': (smote_random_state, HASH_IT),
        },
        input_files={
            'raw_file':
            ResourcePathStatic(path=parser.get(cfg_name, 'raw_file')),
            'features_file':
            ResourcePathStatic(path=parser.get(cfg_name, 'features_file'))
        },
        output_files={
            'df_all_predictions':
            ResourcePathOutput(cfg_name=cfg_name,
                               task_id=task_id,
                               resource_filename='df_all_predictions.h5'),
            'df_interval_predictions':
            ResourcePathOutput(cfg_name=cfg_name,
                               task_id=task_id,
                               resource_filename='df_interval_predictions.h5'),
            'row_based_metrics':
            ResourcePathOutput(
                cfg_name=cfg_name,
                task_id=task_id,
                resource_filename=
                f'row_based_predictions_metrics_{cfg_name}.txt'),
        },
        dag=dag,
        cfg_name=cfg_name)
コード例 #7
0
def get_predict(dag, cfg_name, use_all_nodes=True, force_exec=False):
    """
        use_all_nodes: True, False. When True, nodes_of_interest is set to an empty list and the predict function
        will run on all nodes. When False, it gets the list from the ini file.
    """
    task_id = 'predict'
    nodes_of_interest = [] if use_all_nodes else parser.getnodelist(
        cfg_name, get_nodes_of_interest(cfg_name))

    return PythonPersistentOperator(
        task_id=task_id,
        force_execution=force_exec,
        python_callable=predict,
        ppo_kwargs={
            'start': (parser.gettimestamp(cfg_name, 'test_start'), HASH_IT),
            'end': (parser.gettimestamp(cfg_name, 'test_end'), HASH_IT),
            'interval_width':
            (parser.gettimedelta(cfg_name, 'test_interval_width'), HASH_IT),
            'svm_training_technique':
            (parser.get(cfg_name, 'svm_training_technique'), HASH_IT),
            'nodes_of_interest': (nodes_of_interest, HASH_IT),
            'reference_nodes':
            (parser.getnodelist(cfg_name,
                                parser.get(cfg_name,
                                           'reference_nodes')), HASH_IT),
            'reference_victim_node':
            (parser.get(cfg_name, 'reference_victim_node'), HASH_IT),
            'airflow_vars': ({
                'training_intervals_count':
                cfg_name + 'training_intervals_count'
            }, NO_HASH)
        },
        input_files={
            **get_inference_dataset_output_files(cfg_name), 'node_embeddings':
            ResourcePathById(
                cfg_name=cfg_name,
                origin_task_id='create_graph_model_node_embeddings',
                origin_resource_id='node_embeddings'),
            'trained_model':
            ResourcePathById(cfg_name=cfg_name,
                             origin_task_id='train_graph_model',
                             origin_resource_id='trained_model')
        },
        output_files={
            'prediction_df':
            ResourcePathOutput(cfg_name=cfg_name,
                               task_id=task_id,
                               resource_filename='df_prediction.h5'),
            'df_metrics':
            ResourcePathOutput(cfg_name=cfg_name,
                               task_id=task_id,
                               resource_filename='df_metrics.h5')
        },
        dag=dag,
        cfg_name=cfg_name)
コード例 #8
0
def test_resource_task_indexed_path(setup_output_path):
    dag = DAG('test_dyn',
              default_args=default_args,
              schedule_interval=timedelta(days=1))

    # def callable1_create_file(tp, in_files, out_files, *op_args, **op_kwargs):
    def callable1_create_file(log, in_files, out_files, **op_kwargs):
        with open(out_files['test_file_dyn_location'].path, 'w') as file:
            file.write("testing dynamic paths")
            return 'succeeded'
        return 'failed'

    # def callable2_read_file(tp, in_files, out_files, *op_args, **op_kwargs):
    def callable2_read_file(log, in_files, out_files, **op_kwargs):
        with open(in_files['test_file_dyn_location'].path, 'r') as file:
            assert file.readline(
            ) == 'testing dynamic paths', 'Invalid file content!'
            return 'succeeded'
        return 'failed'

    create_file = PythonPersistentOperator(
        task_id='create_file',
        force_execution=True,
        python_callable=callable1_create_file,
        output_files={
            'test_file_dyn_location':
            ResourcePathDynamic(
                path=[('var', cfg_name +
                       'out_dir'), ('var', cfg_name +
                                    'create_file_hash'), (
                                        'const',
                                        'training'), ('const',
                                                      'test_data.txt')])
        },
        dag=dag,
        cfg_name=cfg_name)

    read_file = PythonPersistentOperator(
        task_id='read_file',
        force_execution=True,
        python_callable=callable2_read_file,
        input_files={
            'test_file_dyn_location':
            ResourcePathById(cfg_name=cfg_name,
                             origin_task_id='create_file',
                             origin_resource_id='test_file_dyn_location')
        },
        dag=dag,
        cfg_name=cfg_name)

    ti1 = TaskInstance(task=create_file, execution_date=datetime.now())
    ti2 = TaskInstance(task=read_file, execution_date=datetime.now())

    result1 = create_file.execute(ti1.get_template_context())
    result2 = read_file.execute(ti2.get_template_context())

    assert result1 == 'succeeded'
    assert result2 == 'succeeded'
コード例 #9
0
def get_fit_predict_random_forest_classifier(dag, cfg_name, force_exec=False):
    task_id = 'fit_predict_random_forest_classifier'

    cont_variables = parser.get(cfg_name, 'cont_variables').splitlines()
    cat_variables = []

    return PythonPersistentOperator(
        task_id=task_id,
        force_execution=force_exec,
        python_callable=fit_predict_random_forest_classifier,
        ppo_kwargs={
            'train_start': (parser.getint(cfg_name, 'train_start'), HASH_IT),
            'train_end': (parser.getint(cfg_name, 'train_end'), HASH_IT),
            'test_start': (parser.getint(cfg_name, 'test_start'), HASH_IT),
            'test_end': (parser.getint(cfg_name, 'test_end'), HASH_IT),
            'interval_width': (parser.getint(cfg_name,
                                             'interval_width'), HASH_IT),
            'cont_variables': (cont_variables, HASH_IT),
            'cat_variables': (cat_variables, HASH_IT),
            'n_estimators': (parser.getint(cfg_name, 'n_estimators'), HASH_IT),
            'max_depth': (parser.getint(cfg_name, 'max_depth'), HASH_IT),
            'random_state': (parser.getint(cfg_name, 'random_state'), HASH_IT),
        },
        input_files={
            'raw_file':
            ResourcePathStatic(path=parser.get(cfg_name, 'raw_file')),
            'features_file':
            ResourcePathStatic(path=parser.get(cfg_name, 'features_file'))
        },
        output_files={
            'df_all_predictions':
            ResourcePathOutput(cfg_name=cfg_name,
                               task_id=task_id,
                               resource_filename='df_all_predictions.h5'),
            'df_interval_predictions':
            ResourcePathOutput(cfg_name=cfg_name,
                               task_id=task_id,
                               resource_filename='df_interval_predictions.h5'),
            'row_based_metrics':
            ResourcePathOutput(
                cfg_name=cfg_name,
                task_id=task_id,
                resource_filename=
                f'row_based_predictions_metrics_{cfg_name}.txt'),
        },
        dag=dag,
        cfg_name=cfg_name)
コード例 #10
0
def get_fit_predict_xgboost(dag, cfg_name, force_exec=False):
    task_id = 'fit_predict_xgboost'

    return PythonPersistentOperator(
        task_id=task_id,
        force_execution=force_exec,
        python_callable=fit_predict_xgboost,
        ppo_kwargs={
            'train_start': (parser.getint(cfg_name, 'train_start'), HASH_IT),
            'train_end': (parser.getint(cfg_name, 'train_end'), HASH_IT),
            'test_start': (parser.getint(cfg_name, 'test_start'), HASH_IT),
            'test_end': (parser.getint(cfg_name, 'test_end'), HASH_IT),
            'interval_width': (parser.getint(cfg_name,
                                             'interval_width'), HASH_IT),
            'importance_type': (parser.get(cfg_name,
                                           'importance_type'), HASH_IT),
        },
        input_files={
            'raw_file':
            ResourcePathStatic(path=parser.get(cfg_name, 'raw_file')),
            'features_file':
            ResourcePathStatic(path=parser.get(cfg_name, 'features_file'))
        },
        output_files={
            'df_all_predictions':
            ResourcePathOutput(cfg_name=cfg_name,
                               task_id=task_id,
                               resource_filename='df_all_predictions.h5'),
            'df_interval_predictions':
            ResourcePathOutput(cfg_name=cfg_name,
                               task_id=task_id,
                               resource_filename='df_interval_predictions.h5'),
            'features_importance':
            ResourcePathOutput(cfg_name=cfg_name,
                               task_id=task_id,
                               resource_filename='features_importance.txt'),
            'row_based_metrics':
            ResourcePathOutput(
                cfg_name=cfg_name,
                task_id=task_id,
                resource_filename=
                f'row_based_predictions_metrics_{cfg_name}.txt'),
        },
        dag=dag,
        cfg_name=cfg_name)
コード例 #11
0
def get_node_analysis(dag, cfg_name, force_exec=False):
    task_id = 'node_analysis'

    return PythonPersistentOperator(
        task_id=task_id,
        force_execution=force_exec,
        python_callable=nodes_analysis,
        ppo_kwargs={
            'experiment_name': (cfg_name, HASH_IT),
            'start': (parser.gettimestamp(cfg_name, 'test_start'), HASH_IT),
            'end': (parser.gettimestamp(cfg_name, 'test_end'), HASH_IT),
            'nodes_of_interest':
            (parser.getnodelist(cfg_name,
                                get_nodes_of_interest(cfg_name)), HASH_IT),
            'reference_nodes':
            (parser.get(cfg_name, parser.get(cfg_name,
                                             'reference_nodes')), HASH_IT),
            'reference_victim_node':
            (parser.get(cfg_name, 'reference_victim_node'), HASH_IT),
        },
        input_files={
            'df_metrics':
            ResourcePathById(cfg_name=cfg_name,
                             origin_task_id='predict',
                             origin_resource_id='df_metrics')
        },
        output_files={
            'metrics_summary_file':
            ResourcePathOutput(
                cfg_name=cfg_name,
                task_id=task_id,
                resource_filename=f"metrics_summary_file_{cfg_name}.txt"),
            'df_detailed_classifier_data':
            ResourcePathOutput(
                cfg_name=cfg_name,
                task_id=task_id,
                resource_filename='df_detailed_classifier_data.h5'),
            'df_roc_classifier_data':
            ResourcePathOutput(cfg_name=cfg_name,
                               task_id=task_id,
                               resource_filename='df_roc_classifier_data.h5')
        },
        dag=dag,
        cfg_name=cfg_name)
コード例 #12
0
def get_create_graph_model_node_embeddings(dag,
                                           cfg_name,
                                           use_all_nodes=True,
                                           force_exec=False):
    """
        infer_graph_model

        use_all_nodes: True, False. When True, nodes_of_interest is set to an empty list and the infer_graph_model
        function will run on all nodes. When False, it gets the list from the ini file.
    """
    task_id = 'create_graph_model_node_embeddings'
    nodes_of_interest = [] if use_all_nodes else parser.getnodelist(
        cfg_name, get_nodes_of_interest(cfg_name))

    return PythonPersistentOperator(
        task_id=task_id,
        force_execution=force_exec,
        python_callable=infer_graph_model,
        ppo_kwargs={
            'start': (parser.gettimestamp(cfg_name, 'test_start'), HASH_IT),
            'end': (parser.gettimestamp(cfg_name, 'test_end'), HASH_IT),
            'interval_width':
            (parser.gettimedelta(cfg_name, 'test_interval_width'), HASH_IT),
            'predicator_name': (parser.get(cfg_name,
                                           'model_trainer_type'), NO_HASH),
            'hidden_dim': (parser.getint(cfg_name, 'hidden_dim'), HASH_IT),
            'nodes_of_interest': (nodes_of_interest, HASH_IT),
            'tensorboard_writer': (get_writer(cfg_name), NO_HASH),
        },
        input_files={
            **get_inference_dataset_output_files(cfg_name), 'trained_model':
            ResourcePathById(cfg_name=cfg_name,
                             origin_task_id='train_graph_model',
                             origin_resource_id='trained_model')
        },
        output_files={
            'node_embeddings':
            ResourcePathOutput(
                cfg_name=cfg_name,
                task_id=task_id,
                resource_filename=get_trained_model_filename(cfg_name))
        },
        dag=dag,
        cfg_name=cfg_name)
コード例 #13
0
def get_train_graph_model(dag, cfg_name, force_exec=False):
    """
        train_graph_model
    """
    task_id = 'train_graph_model'

    return PythonPersistentOperator(
        task_id=task_id,
        force_execution=force_exec,
        python_callable=train_graph_model,
        ppo_kwargs={
            'start': (parser.gettimestamp(cfg_name, 'train_start'), HASH_IT),
            'end': (parser.gettimestamp(cfg_name, 'train_end'), HASH_IT),
            'interval_width':
            (parser.gettimedelta(cfg_name, 'train_interval_width'), HASH_IT),
            'hidden_dim': (parser.getint(cfg_name, 'hidden_dim'), HASH_IT),
            'feature_extractor': (parser.get(cfg_name,
                                             'feature_extractor'), HASH_IT),
            'training_epochs': (parser.getint(cfg_name,
                                              'training_epochs'), NO_HASH),
            'predicator_name': (parser.get(cfg_name,
                                           'model_trainer_type'), NO_HASH),
            'tensorboard_writer': (get_writer(cfg_name), NO_HASH),
            'patience_epochs': (parser.getint(cfg_name,
                                              'patience_epochs'), HASH_IT),
            'learning_rate': (parser.getfloat(cfg_name,
                                              'learning_rate'), HASH_IT),
        },
        input_files=get_training_dataset_output_files(cfg_name),
        output_files={
            'trained_model':
            ResourcePathOutput(
                cfg_name=cfg_name,
                task_id=task_id,
                resource_filename=get_trained_model_filename(cfg_name))
        },
        dag=dag,
        cfg_name=cfg_name)
コード例 #14
0
def test_skip_task(setup_output_path):
    out_dir = get_out_dir(cfg_name=cfg_name)
    dag = DAG('test_dyn',
              default_args=default_args,
              schedule_interval=timedelta(days=1))

    Variable.set('create_file' + '_hash', 0)

    def callable1_create_file(log, in_files, out_files, **op_kwargs):
        with open(out_files['test_file_dyn_location'].path, 'w') as file:
            file.write("testing dynamic paths")
            return 'succeeded'
        return 'failed'

    # Creating the output file manually
    with open(osp.join(out_dir, 'test_data.txt'), 'w') as file:
        file.write("test_data file content")

    t1_output_files = {
        'test_file_dyn_location':
        ResourcePathDynamic(path=[('var', cfg_name +
                                   'out_dir'), ('const', 'test_data.txt')])
    }

    create_file_forced = PythonPersistentOperator(
        task_id='create_file',
        force_execution=True,
        python_callable=callable1_create_file,
        output_files=t1_output_files,
        dag=dag,
        cfg_name=cfg_name)

    ti1 = TaskInstance(task=create_file_forced, execution_date=datetime.now())
    result1 = create_file_forced.execute(ti1.get_template_context())
    assert result1 == 'succeeded'

    create_file_not_forced = PythonPersistentOperator(
        task_id='create_file',
        force_execution=False,
        python_callable=callable1_create_file,
        output_files=t1_output_files,
        dag=dag,
        cfg_name=cfg_name)

    ti1 = TaskInstance(task=create_file_not_forced,
                       execution_date=datetime.now())
    result1 = create_file_not_forced.execute(ti1.get_template_context())
    assert result1 == 'skipped'

    # Should run: new params
    some_task_params = {
        'start': (parser.gettimestamp(cfg_name, 'train_start'), HASH_IT),
        'end': (parser.gettimestamp(cfg_name, 'train_end'), NO_HASH),
        'interval_width':
        (parser.gettimedelta(cfg_name, 'train_interval_width'), HASH_IT),
        'interval_overlap':
        (parser.gettimedelta(cfg_name, 'train_interval_overlap'), HASH_IT)
    }

    create_file_not_forced = PythonPersistentOperator(
        task_id='create_file',
        force_execution=False,
        python_callable=callable1_create_file,
        ppo_kwargs=some_task_params,
        output_files=t1_output_files,
        dag=dag,
        cfg_name=cfg_name)

    ti1 = TaskInstance(task=create_file_not_forced,
                       execution_date=datetime.now())
    result1 = create_file_not_forced.execute(ti1.get_template_context())
    assert result1 == 'succeeded'

    # should skip: no hashed params changed
    some_other_task_params = {
        'start': (parser.gettimestamp(cfg_name, 'train_start'), HASH_IT),
        'end': (parser.gettimestamp(cfg_name, 'train_start'), NO_HASH),
        'interval_width':
        (parser.gettimedelta(cfg_name, 'train_interval_width'), HASH_IT),
        'interval_overlap':
        (parser.gettimedelta(cfg_name, 'train_interval_overlap'), HASH_IT)
    }

    create_file_not_forced = PythonPersistentOperator(
        task_id='create_file',
        force_execution=False,
        python_callable=callable1_create_file,
        ppo_kwargs=some_other_task_params,
        output_files=t1_output_files,
        dag=dag,
        cfg_name=cfg_name)

    ti1 = TaskInstance(task=create_file_not_forced,
                       execution_date=datetime.now())
    result1 = create_file_not_forced.execute(ti1.get_template_context())
    assert result1 == "skipped"

    # should run: task params changed
    some_other_task_params = {
        'start': (parser.gettimestamp(cfg_name, 'train_end'), HASH_IT),
        'end': (parser.gettimestamp(cfg_name, 'train_end'), NO_HASH),
        'interval_width':
        (parser.gettimedelta(cfg_name, 'train_interval_width'), HASH_IT),
        'interval_overlap':
        (parser.gettimedelta(cfg_name, 'train_interval_overlap'), HASH_IT)
    }

    create_file_not_forced = PythonPersistentOperator(
        task_id='create_file',
        force_execution=False,
        python_callable=callable1_create_file,
        ppo_kwargs=some_other_task_params,
        output_files=t1_output_files,
        dag=dag,
        cfg_name=cfg_name)

    ti1 = TaskInstance(task=create_file_not_forced,
                       execution_date=datetime.now())
    result1 = create_file_not_forced.execute(ti1.get_template_context())
    assert result1 == 'succeeded'