REPLICATION_FACTOR, CONFIGSET)

INDEX_WEB_CONTENT = BashOperator(
    task_id="index_web_content",
    bash_command=AIRFLOW_HOME +
    "/dags/cob_datapipeline/scripts/ingest_web_content.sh ",
    env={
        "HOME":
        AIRFLOW_USER_HOME,
        "SOLR_AUTH_PASSWORD":
        SOLR_CONN.password if SOLR_CONN.password else "",
        "SOLR_AUTH_USER":
        SOLR_CONN.login if SOLR_CONN.login else "",
        "SOLR_WEB_URL":
        tasks.get_solr_url(
            SOLR_CONN,
            CONFIGSET + "-{{ ti.xcom_pull(task_ids='set_collection_name') }}"),
        "WEB_CONTENT_BASE_URL":
        WEB_CONTENT_BASE_URL,
        "WEB_CONTENT_BASIC_AUTH_PASSWORD":
        WEB_CONTENT_BASIC_AUTH_PASSWORD,
        "WEB_CONTENT_BASIC_AUTH_USER":
        WEB_CONTENT_BASIC_AUTH_USER,
        "WEB_CONTENT_BRANCH":
        WEB_CONTENT_BRANCH
    },
    dag=DAG)

GET_NUM_SOLR_DOCS_POST = task_solrgetnumdocs(
    DAG,
    CONFIGSET + "-{{ ti.xcom_pull(task_ids='set_collection_name') }}",
)

INDEX_UPDATES_OAI_MARC = BashOperator(
    task_id="index_updates_oai_marc",
    bash_command=AIRFLOW_HOME + "/dags/cob_datapipeline/scripts/sc_ingest_marc.sh ",
    env={**os.environ, **{
        "AWS_ACCESS_KEY_ID": AIRFLOW_S3.login,
        "AWS_SECRET_ACCESS_KEY": AIRFLOW_S3.password,
        "BUCKET": AIRFLOW_DATA_BUCKET,
        "FOLDER": DAG.dag_id + "/{{ ti.xcom_pull(task_ids='set_s3_namespace') }}/new-updated",
        "GIT_BRANCH": PRE_PRODUCTION_COB_INDEX_VERSION,
        "HOME": AIRFLOW_USER_HOME,
        "LATEST_RELEASE": "false",
        "SOLR_AUTH_USER": SOLR_CONN.login or "",
        "SOLR_AUTH_PASSWORD": SOLR_CONN.password or "",
        "SOLR_URL": tasks.get_solr_url(SOLR_CONN, COLLECTION),
        "COMMAND": "ingest",
    }},
    dag=DAG
)

INDEX_DELETES_OAI_MARC = BashOperator(
    task_id="index_deletes_oai_marc",
    bash_command=AIRFLOW_HOME + "/dags/cob_datapipeline/scripts/sc_ingest_marc.sh ",
    env={**os.environ, **{
        "AWS_ACCESS_KEY_ID": AIRFLOW_S3.login,
        "AWS_SECRET_ACCESS_KEY": AIRFLOW_S3.password,
        "BUCKET": AIRFLOW_DATA_BUCKET,
        "FOLDER": DAG.dag_id + "/{{ ti.xcom_pull(task_ids='set_s3_namespace') }}/deleted",
        "GIT_BRANCH": PRE_PRODUCTION_COB_INDEX_VERSION,
        "HOME": AIRFLOW_USER_HOME,
Esempio n. 3
0
 def test_get_solr_url_without_port(self):
     conn = Connection(host="https://example.com")
     core = "foo"
     self.assertEqual(get_solr_url(conn, core),
                      "https://example.com/solr/foo")
INDEX_SFTP_MARC = BashOperator(
    task_id="index_sftp_marc",
    bash_command=AIRFLOW_HOME +
    "/dags/cob_datapipeline/scripts/sc_ingest_marc.sh ",
    env={
        "AWS_ACCESS_KEY_ID": AIRFLOW_S3.login,
        "AWS_SECRET_ACCESS_KEY": AIRFLOW_S3.password,
        "BUCKET": AIRFLOW_DATA_BUCKET,
        "FOLDER": ALMASFTP_S3_PREFIX + "/" + DAG.dag_id +
        "/{{ ti.xcom_pull(task_ids='set_s3_namespace') }}/alma_bibs__",
        "GIT_BRANCH": COB_INDEX_VERSION,
        "HOME": AIRFLOW_USER_HOME,
        "LATEST_RELEASE": str(LATEST_RELEASE),
        "SOLR_AUTH_USER": SOLR_WRITER.login or "",
        "SOLR_AUTH_PASSWORD": SOLR_WRITER.password or "",
        "SOLR_URL": tasks.get_solr_url(SOLR_WRITER, COLLECTION_NAME),
        "TRAJECT_FULL_REINDEX": "yes",
    },
    dag=DAG)

SOLR_COMMIT = SimpleHttpOperator(task_id="solr_commit",
                                 method="GET",
                                 http_conn_id=SOLR_CLOUD.conn_id,
                                 endpoint="/solr/" + COLLECTION_NAME +
                                 "/update?commit=true",
                                 dag=DAG)

GET_NUM_SOLR_DOCS_POST = task_solrgetnumdocs(DAG,
                                             COLLECTION_NAME,
                                             "get_num_solr_docs_post",
                                             conn_id=SOLR_CLOUD.conn_id)
Esempio n. 5
0
 def test_get_solr_url_with_http_in_host(self):
     conn = Connection(host="https://example.com", port="8983")
     core = "foo"
     self.assertEqual(get_solr_url(conn, core),
                      "https://example.com:8983/solr/foo")