def gen_bigartm_operator(actualizable_bigartms, comboable_bigartms, name, description, number_of_topics, filters, regularization_params, wait_for_basic_tms, is_actualizable=False, name_translit=None, topic_modelling_translit=None, is_comboable=True, text_field="text_lemmatized"): from dags.bigartm.services.service import bigartm_calc if not name_translit: task_id = f"bigartm_calc_{name}" else: task_id = f"bigartm_calc_{topic_modelling_translit}_{name_translit}" filters_copy = filters.copy() bigartm_calc_operator = DjangoOperator( task_id=task_id, python_callable=bigartm_calc, op_kwargs={ "name": name, "name_translit": name_translit, "text_field": text_field, "corpus": filters['corpus'], "corpus_datetime_ignore": filters.get('corpus_datetime_ignore', []), "source": filters['source'], "datetime_from": filters['datetime_from'], "datetime_to": filters['datetime_to'], "group_id": filters['group_id'] if 'group_id' in filters else None, "topic_weight_threshold": filters['topic_weight_threshold'] if 'topic_weight_threshold' in filters else 0.05, "is_ready": False, "description": description, "datetime_created": datetime.now(), "algorithm": "BigARTM", "meta_parameters": { }, "number_of_topics": number_of_topics, "regularization_params": regularization_params, "is_actualizable": is_actualizable, } ) if 'group_id' in filters and filters['group_id']: wait_for_basic_tms >> bigartm_calc_operator else: bigartm_calc_operator >> wait_for_basic_tms if is_actualizable: actualizable_bigartms.append( { "name": name, "name_translit": name_translit, "text_field": text_field, "regularization_params": regularization_params.copy(), "filters": filters_copy, } ) if is_comboable: comboable_bigartms.append( { "name": name, "name_translit": name_translit, "text_field": text_field, } )
def create_tasks(dict_name, source_field, min_document_frequency_relative, max_n_gram_len, corpus=None, concurrency=5): lemmatize_operators = [] for i in range(concurrency): lemmatize_operators.append(DjangoOperator( task_id=f"ngramize_{dict_name}_{i}", python_callable=ngramize, op_kwargs={ "dict_name": dict_name, "source_field": source_field, "max_n_gram_len": max_n_gram_len, "min_document_frequency_relative": min_document_frequency_relative, "process_num": i, "total_proc": concurrency, "corpus": corpus, } ))
'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=30), 'priority_weight': 90, 'pool': 'short_tasks' } dag = DAG('NLPmonitor_get_topics_info', catchup=False, max_active_runs=1, default_args=default_args, schedule_interval='0 23 * * *') topic_info_getters = [] with dag: for tm in actualizable_bigartms: topic_info_getter = DjangoOperator( task_id= f"get_topics_info_{tm['name'] if not tm['name_translit'] else tm['name_translit']}", python_callable=calc_topics_info, op_kwargs={ "corpus": tm["filters"]['corpus'], "topic_modelling_name": tm['name'], "topic_weight_threshold": tm["filters"]['topic_weight_threshold'] if 'topic_weight_threshold' in tm["filters"] else None, }) topic_info_getters.append(topic_info_getter)
from DjangoOperator import DjangoOperator from datetime import datetime, timedelta from dags.pre_caching.services.service import pre_cache default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2020, 12, 24), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 2, 'retry_delay': timedelta(minutes=15), 'priority_weight': 25, 'pool': 'short_tasks' # 'queue': 'bash_queue', # 'priority_weight': 10, # 'end_date': datetime(2016, 1, 1), } dag = DAG('Nlpmonitor_pre_cache', catchup=False, max_active_runs=1, default_args=default_args, schedule_interval='0 * * * *') with dag: send_elastic = DjangoOperator( task_id="pre_cache_dashboard", python_callable=pre_cache, )
from dags.criterion_eval.init_criterions.service import init_criterions default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2019, 11, 14), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 2, 'retry_delay': timedelta(minutes=15), 'priority_weight': 95, 'pool': 'short_tasks' # 'queue': 'bash_queue', # 'priority_weight': 10, # 'end_date': datetime(2016, 1, 1), } dag = DAG('Criterion_init_var', catchup=False, max_active_runs=1, default_args=default_args, schedule_interval='0 12 * * *') with dag: init_sources = DjangoOperator( task_id="init_critetions", python_callable=init_criterions, )
from dags.eval_dicts.services.calc_dict import calc_eval_dicts default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2019, 12, 12), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=30), 'priority_weight': 90, 'pool': 'short_tasks' } dag = DAG('Generate_eval_dicts', catchup=False, max_active_runs=1, default_args=default_args, schedule_interval=None) with dag: evaluator = DjangoOperator(task_id=f"calc_eval_dicts", python_callable=calc_eval_dicts, op_kwargs={ "topic_modellings_list": ("bigartm_two_years", ), })
scrapers_low = [] scrapers_medium = [] scrapers_high = [] # Scraping by accounts for social_network in networks: # Low with dag_low: scraper = DjangoOperator( task_id=f"scrap_{social_network['name']}_by_account_low", python_callable=scrap_wrapper, op_kwargs={ "social_network": social_network['id'], "accounts": list( filter( lambda x: (x['social_network'] == social_network['id']) and (x['priority_rate'] <= 25), accounts)), "by": "account", }) scrapers_low.append(scraper) # Medium with dag_medium: scraper = DjangoOperator( task_id=f"scrap_{social_network['name']}_by_account_medium", python_callable=scrap_wrapper, op_kwargs={
'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=30), 'priority_weight': 40, 'pool': 'short_tasks', } dag = DAG('NLPMonitor_es_activity_update', catchup=False, max_active_runs=1, concurrency=10, default_args=default_args, schedule_interval='15 6 * * 5') indices = json.loads(Variable.get('indices_update_activity', default_var="[]")) with dag: init_update_datetime = DjangoOperator( task_id=f"init_update_datetime", python_callable=init_update_datetime, ) updaters = [] for index in indices: updaters.append(DjangoOperator( task_id=f"update_{index['name_translit']}", python_callable=es_update, op_kwargs={ "index": index['name'], } ) ) set_update_datetime = DjangoOperator( task_id=f"set_update_datetime", python_callable=set_update_datetime, )
dictionary_filters = { "dictionary": "default_dict_pymorphy_2_4_393442_3710985", "document_normal_frequency__gte": 100, "document_normal_frequency__lte": 500000, "is_stop_word": False, # "is_in_pymorphy2_dict": True, # "is_multiple_normals_in_pymorphy2": False, } max_dict_size = 30000 generate_cooccurrence_codistance = DjangoOperator( task_id="generate_cooccurrence_codistance", python_callable=generate_cooccurrence_codistance, op_kwargs={ "name": "test", "dictionary_filters": dictionary_filters, "max_dict_size": max_dict_size, "document_filters": { "corpus": "main", # "source": "https://kapital.kz/", "datetime__gte": date(1950, 1, 1), "datetime__lte": date(2050, 1, 1), }, }) topic_modelling_operator = DjangoOperator( task_id="topic_modelling", python_callable=topic_modelling, op_kwargs={ "name": "test", "d1": 1.75, # Максимальное допустимое расстояние между всеми возможными попарными комбинациями объектов в составе формируемых сгустков (нечётких протокластеров) "d2":
from DjangoOperator import DjangoOperator from datetime import datetime, timedelta from dags.es_activity_update.init_indices.service import init_indices default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2020, 5, 22), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 2, 'retry_delay': timedelta(minutes=15), 'priority_weight': 95, 'pool': 'short_tasks' # 'queue': 'bash_queue', # 'priority_weight': 10, # 'end_date': datetime(2016, 1, 1), } dag = DAG('NLPMonitor_es_activity_update_init_indices', catchup=False, max_active_runs=1, default_args=default_args, schedule_interval='0 6 * * *') with dag: init_sources = DjangoOperator(task_id="init_indices", python_callable=init_indices)
default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2020, 5, 3), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 0, 'retry_delay': timedelta(minutes=5), 'priority_weight': 20, 'pool': 'long_tasks', } dag = DAG( 'Nlpmonitor_generate_rubert_embeddings', catchup=False, max_active_runs=1, default_args=default_args, schedule_interval=None ) with dag: # Word init_word_index = DjangoOperator( task_id="test_connections_to_bert_service", python_callable=test_connections_to_bert_service, pool="short_tasks", op_kwargs={ "created": datetime.now(), } )
'retries': 0, 'retry_delay': timedelta(minutes=30), 'priority_weight': 90, 'pool': 'short_tasks' } dag = DAG('Calc_mma_eval', catchup=False, max_active_runs=1, default_args=default_args, schedule_interval=None) mmas = [] with dag: mmas.append(DjangoOperator( task_id=f"calculate_mma", python_callable=calc_mma, op_kwargs={ "topic_modelling_name": "bigartm_two_years_main_and_gos2", "criterion_ids": (1, 35, 34), "criterion_weights": ((0.44, 0.33, 0.23), ), "class_ids": (36, ), "perform_actualize": False } )) mmas.append(DjangoOperator( task_id=f"calculate_mma_surveys", python_callable=calc_mma, op_kwargs={ "topic_modelling_name": "bigartm_two_years_main_and_gos2", "criterion_ids": (1, 35, 34, 37), # Тональность (негатив!!), Резонансность, Гос. программы, Опросы "criterion_weights": ( (0.44, 0.33, 0, 0.23), (0, 0.2, 0.4, 0.4),
'retry_delay': timedelta(minutes=30), 'priority_weight': 90, 'pool': 'short_tasks' } dag = DAG('Criterion_actualize_evaluations', catchup=False, max_active_runs=1, default_args=default_args, schedule_interval='0 23 * * *') dag_fast = DAG('Criterion_actualize_evaluations_fast', catchup=False, max_active_runs=1, default_args=default_args, schedule_interval='55 * * * *') actualizers_evaluators = [] with dag: for eval in actualizable_criterion_evals: evaluator = DjangoOperator( task_id=f"eval_actualize_{eval['criterion_name']}_{eval['topic_modelling_translit']}", python_callable=evaluate, op_kwargs={ "perform_actualize": True, "criterion_id": eval["criterion_id"], "topic_modelling": eval["topic_modelling"], } ) actualizers_evaluators.append(evaluator) if 'calc_virt_negative' in eval: evaluator = DjangoOperator( task_id=f"eval_actualize_{eval['criterion_name']}_{eval['topic_modelling_translit']}_neg", python_callable=evaluate, op_kwargs={ "perform_actualize": True, "criterion_id": eval["criterion_id"], "topic_modelling": eval["topic_modelling"], "calc_virt_negative": True, }
""" Code that goes along with the Airflow tutorial located at: https://github.com/apache/airflow/blob/master/airflow/example_dags/tutorial.py """ from airflow import DAG from airflow.operators.bash_operator import BashOperator from airflow.operators.python_operator import PythonVirtualenvOperator, PythonOperator from DjangoOperator import DjangoOperator from datetime import datetime, timedelta from dags.examples.external_file_example.es_io import es_etl default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2019, 7, 25), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 0, 'retry_delay': timedelta(minutes=5), 'pool': 'short_tasks', } dag = DAG('Example_es_io', default_args=default_args, schedule_interval=None) with dag: django_op = DjangoOperator(task_id="ES_ETL", python_callable=es_etl, op_kwargs={"stuff": "stuff))"})
from DjangoOperator import DjangoOperator from datetime import datetime, timedelta from dags.scraper_social.init_accounts.service import init_accounts default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2020, 9, 25), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 2, 'retry_delay': timedelta(minutes=15), 'priority_weight': 95, 'pool': 'scraper_tasks', # 'queue': 'second', } dag = DAG('Scrapers_init_social_accounts', catchup=False, max_active_runs=1, default_args=default_args, schedule_interval='0 12 * * *') with dag: init_sources = DjangoOperator( task_id="init_accounts", python_callable=init_accounts, )
'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2020, 5, 18), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=30), 'priority_weight': 40, 'pool': 'scraper_tasks', # 'queue': 'second', } dag = DAG('Scrapers_update_activity', catchup=False, max_active_runs=2, default_args=default_args, schedule_interval='0 0 * * *') with dag: scrapers = [] concurrency = 4 for i in range(concurrency): scrapers.append( DjangoOperator(task_id=f"scrap_{i}", python_callable=update, op_kwargs={ "start": (100 / concurrency) * i, "end": (100 / concurrency) * (i + 1) }))
for tm in actualizable_bigartms: bigartm_calc_operator = DjangoOperator( task_id= f"bigartm_actualize_{tm['name'] if not tm['name_translit'] else tm['name_translit']}", python_callable=bigartm_calc, op_kwargs={ "perform_actualize": True, "name": tm['name'], "name_translit": tm['name_translit'], "text_field": tm['text_field'], "corpus": tm["filters"]['corpus'], "datetime_from": tm["filters"]['datetime_from'], "datetime_to": tm["filters"]['datetime_to'], "source": tm["filters"]['source'], "group_id": tm["filters"]['group_id'] if 'group_id' in tm["filters"] else None, "topic_weight_threshold": tm["filters"]['topic_weight_threshold'] if 'topic_weight_threshold' in tm["filters"] else 0.05, "regularization_params": tm["regularization_params"], }) actualizers_calcs.append(bigartm_calc_operator)
from DjangoOperator import DjangoOperator from datetime import datetime, timedelta from dags.get_proxy_list.services.service_proxy import get_proxy_list default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2020, 2, 4), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 2, 'retry_delay': timedelta(minutes=5), 'priority_weight': 50, 'pool': 'scraper_tasks', 'execution_timeout': timedelta(hours=1), # 'queue': 'second', # 'priority_weight': 10, # 'end_date': datetime(2016, 1, 1), } dag = DAG('Scrapers_get_proxy_list', catchup=False, max_active_runs=1, default_args=default_args, schedule_interval='0 12 * * *') with dag: proxy_op = DjangoOperator( task_id="get_proxy_list", python_callable=get_proxy_list, execution_timeout=timedelta(hours=1) )
from dags.astana_test.external_file_example.my_package import test default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2020, 2, 11), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 0, 'retry_delay': timedelta(minutes=5), 'pool': 'short_tasks', } dag = DAG('Astana_test', catchup=False, max_active_runs=1, default_args=default_args, schedule_interval=None) with dag: simple_op = PythonOperator(task_id="test_simple", python_callable=lambda: "Hello, NurSultan!", queue='second') django_op = DjangoOperator(task_id="test_django", python_callable=test, queue='second') simple_op >> django_op
from dags.document_location.services.get_locations import get_locations default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2019, 12, 12), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=30), 'priority_weight': 90, 'pool': 'short_tasks' } dag = DAG('Generate_document_locations', catchup=False, max_active_runs=1, default_args=default_args, schedule_interval=None) with dag: evaluator = DjangoOperator(task_id=f"document_locations", python_callable=get_locations, op_kwargs={ "criterion_tm_duos": (("bigartm_two_years", 1), ), })
'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=30), 'priority_weight': 40, 'pool': 'scraper_tasks', # 'queue': 'second', } dag = DAG('Scrapers_scrap_per_url', catchup=False, max_active_runs=2, default_args=default_args, schedule_interval=None) # os.path.join(BASE_DAG_DIR, "tmp", f"urls_{source_id}.txt") with dag: scraper1 = DjangoOperator( task_id=f"scrap_svoboda", python_callable=scrap, op_kwargs={ "source_id": 67, } ) scraper2 = DjangoOperator( task_id=f"scrap_rt", python_callable=scrap, op_kwargs={ "source_id": 60, } ) scraper3 = DjangoOperator( task_id=f"scrap_sputnik", python_callable=scrap,
# 'end_date': datetime(2016, 1, 1), } dag = DAG('ML_cluster_sources', catchup=False, max_active_runs=1, default_args=default_args, schedule_interval=None) # '15 22 * * *' with dag: cluster_operators = [] cluster_operators.append( DjangoOperator(task_id=f"cluster_rus", python_callable=run_cluster, op_kwargs={ "tm_name": "bigartm_two_years_rus_and_rus_propaganda", "eps_range": [i / 10 for i in range(1, 11)], "min_samples_range": range(1, 10), })) cluster_operators.append( DjangoOperator(task_id=f"cluster_rus_kz", python_callable=run_cluster, op_kwargs={ "tm_name": "bigartm_two_years_rus_and_main", "eps_range": [i / 10 for i in range(1, 11)], "min_samples_range": range(1, 10), })) cluster_operators.append( DjangoOperator(task_id=f"cluster_kz",
list( filter( lambda x: x.isalnum() or x in ['.', '-', '_'], criterion['name_translit'].replace(":", "_").replace( " ", "_")))) filtered_topic_modelling = "".join( list( filter( lambda x: x.isalnum() or x in ['.', '-', '_'], tm['name_translit'].replace(":", "_").replace(" ", "_")))) evaluators.append( DjangoOperator( task_id= f"eval_{filtered_criterion_name}_{filtered_topic_modelling}", python_callable=evaluate, op_kwargs={ "criterion_id": criterion['id'], "topic_modelling": tm['name'], })) actualizable_criterion_evals.append({ "criterion_id": criterion['id'], "criterion_name": filtered_criterion_name, "topic_modelling": tm['name'], "topic_modelling_translit": filtered_topic_modelling, }) if criterion['calc_virt_negative']: evaluators.append(
'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2020, 4, 14), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=30), 'priority_weight': 40, 'pool': 'short_tasks' } dag = DAG('NLPmonitor_TM_Combo_Finder', catchup=False, max_active_runs=1, default_args=default_args, schedule_interval=None) combo_finders = [] with dag: for tm in comboable_bigartms: bigartm_calc_operator = DjangoOperator( task_id= f"tm_combo_{tm['name'] if not tm['name_translit'] else tm['name_translit']}", python_callable=find_combos, op_kwargs={ "name": tm['name'], "name_translit": tm['name_translit'], }) combo_finders.append(bigartm_calc_operator)
""" from airflow import DAG from airflow.operators.bash_operator import BashOperator from airflow.operators.python_operator import PythonVirtualenvOperator, PythonOperator from DjangoOperator import DjangoOperator from datetime import datetime, timedelta from dags.examples.external_file_example.my_package import test default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2019, 7, 25), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 0, 'retry_delay': timedelta(minutes=5), 'pool': 'short_tasks', } dag = DAG('Example_django_op_example', default_args=default_args, schedule_interval=None) with dag: django_op = DjangoOperator( task_id="test_corpus_create", python_callable=test, )
# 'priority_weight': 10, # 'end_date': datetime(2016, 1, 1), } dag = DAG('Nlpmonitor_Lemmatization_eng', catchup=False, max_active_runs=1, concurrency=4, default_args=default_args, schedule_interval='20 * * * *') with dag: # init_last_datetime = DjangoOperator( # task_id="init_last_datetime", # python_callable=init_last_datetime, # op_kwargs={ # } # ) concurrency = 4 lemmatize_operators = [] for i in range(concurrency): lemmatize_operators.append( DjangoOperator(task_id=f"lemmatize_{i}", python_callable=preprocessing_raw_data, op_kwargs={ "process_num": i, "total_proc": concurrency, })) # init_last_datetime >> lemmatize_operators
'pool': 'short_tasks' # 'queue': 'bash_queue', # 'priority_weight': 10, # 'end_date': datetime(2016, 1, 1), } dag = DAG('Nlpmonitor_Lemmatization_kz', catchup=False, max_active_runs=1, concurrency=5, default_args=default_args, schedule_interval='20 * * * *') with dag: init_last_datetime = DjangoOperator( task_id="init_last_datetime", python_callable=init_last_datetime, ) concurrency = 3 lemmatize_operators = [] for i in range(concurrency): lemmatize_operators.append( DjangoOperator(task_id=f"lemmatize_{i}", python_callable=preprocessing_raw_data, op_kwargs={ "start": (100 / concurrency) * i, "end": (100 / concurrency) * (i + 1) })) init_last_datetime >> lemmatize_operators
schedule_interval=None) with dag: corpuses = ["scopus_real_real"] # name = "kz_rus_ngrams_dict_pymorphy_2_4_393442_3710985" # name = "kz_rus_yandex_ngrams_dict" # name = "en_lemminflect" name = "en_scopus_extend" max_n_gram_len = 3 field_to_parse = "text_lemmatized_eng_lemminflect" init_dictionary_index = DjangoOperator( task_id="init_dictionary_index", python_callable=init_dictionary_index, op_kwargs={ "corpuses": corpuses, "name": name, "datetime": datetime.now(), "max_n_gram_len": max_n_gram_len, "field_to_parse": field_to_parse, }) concurrency = 150 dictionary_operators = [] for i in range(concurrency): dictionary_operators.append( DjangoOperator(task_id=f"dictionary_{i}", python_callable=generate_dictionary_batch, op_kwargs={ "name": name, "process_num": i, "total_proc": concurrency,
from dags.elastic_sender.sender.service import send_elastic default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2019, 9, 12), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 3, 'retry_delay': timedelta(minutes=15), 'priority_weight': 80, 'pool': 'short_tasks' # 'queue': 'bash_queue', # 'priority_weight': 10, # 'end_date': datetime(2016, 1, 1), } dag = DAG('Nlpmonitor_send_elastic', catchup=False, max_active_runs=1, default_args=default_args, schedule_interval='15 * * * *') with dag: send_elastic = DjangoOperator( task_id="send_elastic", python_callable=send_elastic, )
from dags.scraper.init_sources.service import init_sources default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2019, 9, 4), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 2, 'retry_delay': timedelta(minutes=15), 'priority_weight': 95, 'pool': 'scraper_tasks', # 'queue': 'second', # 'priority_weight': 10, # 'end_date': datetime(2016, 1, 1), } dag = DAG('Scrapers_init_sources', catchup=False, max_active_runs=1, default_args=default_args, schedule_interval='0 12 * * *') with dag: init_sources = DjangoOperator(task_id="init_sources", python_callable=init_sources, op_kwargs={ "sources_full": {70}, })