REPLICATION_FACTOR, CONFIGSET) INDEX_WEB_CONTENT = BashOperator( task_id="index_web_content", bash_command=AIRFLOW_HOME + "/dags/cob_datapipeline/scripts/ingest_web_content.sh ", env={ "HOME": AIRFLOW_USER_HOME, "SOLR_AUTH_PASSWORD": SOLR_CONN.password if SOLR_CONN.password else "", "SOLR_AUTH_USER": SOLR_CONN.login if SOLR_CONN.login else "", "SOLR_WEB_URL": tasks.get_solr_url( SOLR_CONN, CONFIGSET + "-{{ ti.xcom_pull(task_ids='set_collection_name') }}"), "WEB_CONTENT_BASE_URL": WEB_CONTENT_BASE_URL, "WEB_CONTENT_BASIC_AUTH_PASSWORD": WEB_CONTENT_BASIC_AUTH_PASSWORD, "WEB_CONTENT_BASIC_AUTH_USER": WEB_CONTENT_BASIC_AUTH_USER, "WEB_CONTENT_BRANCH": WEB_CONTENT_BRANCH }, dag=DAG) GET_NUM_SOLR_DOCS_POST = task_solrgetnumdocs( DAG, CONFIGSET + "-{{ ti.xcom_pull(task_ids='set_collection_name') }}",
) INDEX_UPDATES_OAI_MARC = BashOperator( task_id="index_updates_oai_marc", bash_command=AIRFLOW_HOME + "/dags/cob_datapipeline/scripts/sc_ingest_marc.sh ", env={**os.environ, **{ "AWS_ACCESS_KEY_ID": AIRFLOW_S3.login, "AWS_SECRET_ACCESS_KEY": AIRFLOW_S3.password, "BUCKET": AIRFLOW_DATA_BUCKET, "FOLDER": DAG.dag_id + "/{{ ti.xcom_pull(task_ids='set_s3_namespace') }}/new-updated", "GIT_BRANCH": PRE_PRODUCTION_COB_INDEX_VERSION, "HOME": AIRFLOW_USER_HOME, "LATEST_RELEASE": "false", "SOLR_AUTH_USER": SOLR_CONN.login or "", "SOLR_AUTH_PASSWORD": SOLR_CONN.password or "", "SOLR_URL": tasks.get_solr_url(SOLR_CONN, COLLECTION), "COMMAND": "ingest", }}, dag=DAG ) INDEX_DELETES_OAI_MARC = BashOperator( task_id="index_deletes_oai_marc", bash_command=AIRFLOW_HOME + "/dags/cob_datapipeline/scripts/sc_ingest_marc.sh ", env={**os.environ, **{ "AWS_ACCESS_KEY_ID": AIRFLOW_S3.login, "AWS_SECRET_ACCESS_KEY": AIRFLOW_S3.password, "BUCKET": AIRFLOW_DATA_BUCKET, "FOLDER": DAG.dag_id + "/{{ ti.xcom_pull(task_ids='set_s3_namespace') }}/deleted", "GIT_BRANCH": PRE_PRODUCTION_COB_INDEX_VERSION, "HOME": AIRFLOW_USER_HOME,
def test_get_solr_url_without_port(self): conn = Connection(host="https://example.com") core = "foo" self.assertEqual(get_solr_url(conn, core), "https://example.com/solr/foo")
INDEX_SFTP_MARC = BashOperator( task_id="index_sftp_marc", bash_command=AIRFLOW_HOME + "/dags/cob_datapipeline/scripts/sc_ingest_marc.sh ", env={ "AWS_ACCESS_KEY_ID": AIRFLOW_S3.login, "AWS_SECRET_ACCESS_KEY": AIRFLOW_S3.password, "BUCKET": AIRFLOW_DATA_BUCKET, "FOLDER": ALMASFTP_S3_PREFIX + "/" + DAG.dag_id + "/{{ ti.xcom_pull(task_ids='set_s3_namespace') }}/alma_bibs__", "GIT_BRANCH": COB_INDEX_VERSION, "HOME": AIRFLOW_USER_HOME, "LATEST_RELEASE": str(LATEST_RELEASE), "SOLR_AUTH_USER": SOLR_WRITER.login or "", "SOLR_AUTH_PASSWORD": SOLR_WRITER.password or "", "SOLR_URL": tasks.get_solr_url(SOLR_WRITER, COLLECTION_NAME), "TRAJECT_FULL_REINDEX": "yes", }, dag=DAG) SOLR_COMMIT = SimpleHttpOperator(task_id="solr_commit", method="GET", http_conn_id=SOLR_CLOUD.conn_id, endpoint="/solr/" + COLLECTION_NAME + "/update?commit=true", dag=DAG) GET_NUM_SOLR_DOCS_POST = task_solrgetnumdocs(DAG, COLLECTION_NAME, "get_num_solr_docs_post", conn_id=SOLR_CLOUD.conn_id)
def test_get_solr_url_with_http_in_host(self): conn = Connection(host="https://example.com", port="8983") core = "foo" self.assertEqual(get_solr_url(conn, core), "https://example.com:8983/solr/foo")