Exemple #1
0
    def execute(self, context):
        """
        Executed by task_instance at runtime
        """
        s3_conn = S3Hook(self.s3_conn_id)

        # Grab collection and execute query according to whether or not it is a pipeline
        if self.is_pipeline:
            results = MongoHook(self.mongo_conn_id).aggregate(
                mongo_collection=self.mongo_collection,
                aggregate_query=self.mongo_query,
                mongo_db=self.mongo_db)

        else:
            results = MongoHook(self.mongo_conn_id).find(
                mongo_collection=self.mongo_collection,
                query=self.mongo_query,
                mongo_db=self.mongo_db)

        # Performs transform then stringifies the docs results into json format
        docs_str = self._stringify(self.transform(results))

        # Load Into S3
        s3_conn.load_string(string_data=docs_str,
                            key=self.s3_key,
                            bucket_name=self.s3_bucket,
                            replace=self.replace)

        return True
 def poke(self, context):
     self.log.info(
         "Sensor check existence of the document "
         "that matches the following query: %s", self.query)
     hook = MongoHook(self.mongo_conn_id)
     return hook.find(self.collection, self.query,
                      find_one=True) is not None
Exemple #3
0
    def execute(self, context):
        """Executed by task_instance at runtime"""

        with closing(MongoHook(self.conn_id).get_conn()) as client:
            db = client[self.db]
            collection = db[self.collection]

            # NOTE: Pass only one of target_xcom and target_path
            assert bool(self.target_path) ^ bool(self.target_xcom)

            if self.target_path:  # Use path given in task definition
                target = self.target_path

            else:  # Access from XCOM
                target = context['task_instance'].xcom_pull(**self.target_xcom)

            if os.path.isdir(target):
                for filepath in glob.glob(f'/{target}/*.json'):
                    self._update_db(filepath, collection)

            elif os.path.isfile(target):
                self._update_db(target, collection)

            else:  # Should Never Exit Here...
                return False

        return True
    def execute(self, context):
        logging.info('Executing: ' + str(self.sql_queries))
        mysql_hook = MySqlHook(mysql_conn_id=self.mysql_conn_id)
        mongo_hook = MongoHook(mongo_conn_id=self.mongo_conn_id)

        logging.info("Transferring MySQL query results into MongoDB database.")

        mysql_conn = mysql_hook.get_conn()
        mysql_conn.cursorclass = MySQLdb.cursors.DictCursor
        cursor = mysql_conn.cursor()

        mongo_conn = mongo_hook.get_conn()
        mongo_db = mongo_conn.weather

        if self.mysql_preoperator:
            logging.info("Running MySQL preoperator")
            cursor.execute(self.mysql_preoperator)

        for index, sql in enumerate(self.sql_queries):
            cursor.execute(sql, self.parameters)

            fetched_rows = list(cursor.fetchall())

            mongo_db[self.mongo_collections[index]].insert_many(fetched_rows)

        logging.info("Transfer Done")
Exemple #5
0
def get_mongodb():
    mongodb_test = MongoHook(conn_id='mongodb_id')
    data = mongodb_test.find('order_item', {})
    data = list(map(map_data, data))
    mongodb_test.insert_many('order_item', data)
    data = mongodb_test.find('order_item', {})
    data = list(map(map_data, data))
    print(data)
Exemple #6
0
    def test_context_manager(self):
        with MongoHook(conn_id='mongo_default', mongo_db='default') as ctxHook:
            ctxHook.get_conn()

            self.assertIsInstance(ctxHook, MongoHook)
            self.assertIsNotNone(ctxHook.client)

        self.assertIsNone(ctxHook.client)
Exemple #7
0
def read_csv_and_dump(path, libname):
    
    new_data = pd.read_csv(path, index_col=[0, 1])
    hook = MongoHook(conn_id="arctic_mongo")
    store = hook.get_conn()

    data_ = {}
    if store.library_exists(libname):
        # update from start, in older to overide the start date data point]
        # merge data by replacing
        lib = store.get_library(libname)

        for symbol in data.index.levels[0]:
            new_data = data[symbol]
            store.write(symbol, data=new_data) # simple overide the current data
Exemple #8
0
def check_mongo_db(**kwargs):
    mongo_uri = kwargs.get('mongo_uri')
    mongo_db = kwargs.get('mongo_db')
    mongo_collection = kwargs.get('mongo_collection')
    mongo_conn = MongoHook(mongo_uri).get_conn()
    # Grab collection
    collection = mongo_conn.get_database(mongo_db).get_collection(
        mongo_collection)

    count = collection.find().count()
    if count > 0:
        logging.info('Total in mongo db: {}, coll: {}, count: {}'.format(
            mongo_db, mongo_collection, count))
        return True
    else:
        logging.info('No data found in mongo db: {}, coll: {}'.format(
            mongo_db, mongo_collection))
        return False
Exemple #9
0
    def poke(self, context):
        hook = MongoHook(self.mongo_conn_id, libname=self.libname)
        client = hook.get_conn()
        store = Arctic(client)

        self.log.info(
            f'Poking for {self.mongo_conn_id}, {self.libname}: {self.symbol}')

        try:
            if store.library_exists(self.libname):
                lib = store.get_library(self.libname)
                if lib.has_symbol(self.symbol):
                    return self.python_call_back(
                        self.meta,
                        lib.read_meta(self.symbol).metadata)
        except OSError:
            return False
        return False
 def __init__(self,
              apk_id,
              apk_version,
              runner_conf,
              target_device=None,
              *args,
              **kwargs):
     super(AndroidRunnerOperator, self).__init__(queue='android',
                                                 runner_conf=runner_conf,
                                                 *args,
                                                 **kwargs)
     self.apk_id = apk_id
     self.apk_version = apk_version
     self.apk_path = None
     self.test_apk_path = None
     self.serial = target_device
     self.mongo_hk = MongoHook(conn_id='stocksdktest_mongo')
     self.conn = self.mongo_hk.get_conn()
Exemple #11
0
    def setUp(self):
        db.merge_conn(
            Connection(conn_id='mongo_test',
                       conn_type='mongo',
                       host='mongo',
                       port='27017',
                       schema='test'))

        args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
        self.dag = DAG('test_dag_id', default_args=args)

        hook = MongoHook('mongo_test')
        hook.insert_one('foo', {'bar': 'baz'})

        self.sensor = MongoSensor(task_id='test_task',
                                  mongo_conn_id='mongo_test',
                                  dag=self.dag,
                                  collection='foo',
                                  query={'bar': 'baz'})
def content_neo4j_node_creation(**kwargs):
    """

    :param kwargs:
    :return:
    """
    from lib.neo4jintegrator import Neo4jIntegrator

    uri = "bolt://" + globals()["GOLD_NEO4J_IP"] + ":" + globals(
    )["NEO4J_PORT"]
    neo4j_user = "******"
    neo4j_pass = "******"
    driver = Neo4jIntegrator(uri, neo4j_user, neo4j_pass)
    # mongo_uri = globals()["META_MONGO_IP"] + ":" + globals()["MONGO_PORT"]
    meta_base = MongoHook(globals()["MONGO_META_CONN_ID"])

    coll = kwargs["dag_run"].conf["swift_container"]
    swift_id = str(kwargs["dag_run"].conf["swift_id"])
    doc = meta_base.get_conn().swift.get_collection(coll).find_one(
        {"swift_object_id": swift_id})
    driver.insert_image(doc)
Exemple #13
0
    def execute(self, context):
        mongo = MongoHook(conn_id=self.conn_id, )
        mongo.uri, dbname = mongo.uri.rsplit("/", maxsplit=1)
        # conn = mongo.get_conn()

        # return conn.list_database_names()

        posts = mongo.get_collection("posts", dbname)

        import datetime
        post = {
            "author": "Mike",
            "text": "My first blog post!",
            "tags": ["mongodb", "python", "pymongo"],
            "date": datetime.datetime.utcnow()
        }
        # posts = db.posts
        post_id = posts.insert_one(post).inserted_id
        # collection = mongo.get_collection('people', mongo_db='starwars')
        # res = collection.find_one()
        # return str(res['_id'])
        return str(post_id)
Exemple #14
0
    def _get_mongo_doc(self):
        """
        It gets the document from mongodb server connection. convert it into appropriate,
        string into json file
        :param self:
        :return:
        """

        mongo_conn = MongoHook(self.mongo_conn_id).get_conn()
        collection = mongo_conn.get_database(self.mongo_db).get_collection(self.mongo_collection)
        results = collection.aggregate(self.mongo_query) if self.is_pipeline else collection.find(self.mongo_query)
        docs_str = self._stringify(self.transform(results))
        print(docs_str)
        #file_no = 0
        tmp_file_handle = NamedTemporaryFile(delete=True)
        #tmp_file_handles = {self.filename:tmp_file_handle}
        if PY3:
           docs_str = docs_str.replace("$",'').encode('utf-8')
        tmp_file_handle.write(docs_str)
        tmp_file_handles = {self.filename:tmp_file_handle}
        final_json_file = tmp_file_handles
        return final_json_file
 def get_hook(self):
     if self.conn_type == 'mysql':
         from airflow.hooks.mysql_hook import MySqlHook
         return MySqlHook(mysql_conn_id=self.conn_id)
     elif self.conn_type == 'google_cloud_platform':
         from airflow.gcp.hooks.bigquery import BigQueryHook
         return BigQueryHook(bigquery_conn_id=self.conn_id)
     elif self.conn_type == 'postgres':
         from airflow.hooks.postgres_hook import PostgresHook
         return PostgresHook(postgres_conn_id=self.conn_id)
     elif self.conn_type == 'pig_cli':
         from airflow.hooks.pig_hook import PigCliHook
         return PigCliHook(pig_cli_conn_id=self.conn_id)
     elif self.conn_type == 'hive_cli':
         from airflow.hooks.hive_hooks import HiveCliHook
         return HiveCliHook(hive_cli_conn_id=self.conn_id)
     elif self.conn_type == 'presto':
         from airflow.hooks.presto_hook import PrestoHook
         return PrestoHook(presto_conn_id=self.conn_id)
     elif self.conn_type == 'hiveserver2':
         from airflow.hooks.hive_hooks import HiveServer2Hook
         return HiveServer2Hook(hiveserver2_conn_id=self.conn_id)
     elif self.conn_type == 'sqlite':
         from airflow.hooks.sqlite_hook import SqliteHook
         return SqliteHook(sqlite_conn_id=self.conn_id)
     elif self.conn_type == 'jdbc':
         from airflow.hooks.jdbc_hook import JdbcHook
         return JdbcHook(jdbc_conn_id=self.conn_id)
     elif self.conn_type == 'mssql':
         from airflow.hooks.mssql_hook import MsSqlHook
         return MsSqlHook(mssql_conn_id=self.conn_id)
     elif self.conn_type == 'oracle':
         from airflow.hooks.oracle_hook import OracleHook
         return OracleHook(oracle_conn_id=self.conn_id)
     elif self.conn_type == 'vertica':
         from airflow.contrib.hooks.vertica_hook import VerticaHook
         return VerticaHook(vertica_conn_id=self.conn_id)
     elif self.conn_type == 'cloudant':
         from airflow.contrib.hooks.cloudant_hook import CloudantHook
         return CloudantHook(cloudant_conn_id=self.conn_id)
     elif self.conn_type == 'jira':
         from airflow.contrib.hooks.jira_hook import JiraHook
         return JiraHook(jira_conn_id=self.conn_id)
     elif self.conn_type == 'redis':
         from airflow.contrib.hooks.redis_hook import RedisHook
         return RedisHook(redis_conn_id=self.conn_id)
     elif self.conn_type == 'wasb':
         from airflow.contrib.hooks.wasb_hook import WasbHook
         return WasbHook(wasb_conn_id=self.conn_id)
     elif self.conn_type == 'docker':
         from airflow.hooks.docker_hook import DockerHook
         return DockerHook(docker_conn_id=self.conn_id)
     elif self.conn_type == 'azure_data_lake':
         from airflow.contrib.hooks.azure_data_lake_hook import AzureDataLakeHook
         return AzureDataLakeHook(azure_data_lake_conn_id=self.conn_id)
     elif self.conn_type == 'azure_cosmos':
         from airflow.contrib.hooks.azure_cosmos_hook import AzureCosmosDBHook
         return AzureCosmosDBHook(azure_cosmos_conn_id=self.conn_id)
     elif self.conn_type == 'cassandra':
         from airflow.contrib.hooks.cassandra_hook import CassandraHook
         return CassandraHook(cassandra_conn_id=self.conn_id)
     elif self.conn_type == 'mongo':
         from airflow.contrib.hooks.mongo_hook import MongoHook
         return MongoHook(conn_id=self.conn_id)
     elif self.conn_type == 'gcpcloudsql':
         from airflow.gcp.hooks.cloud_sql import CloudSqlDatabaseHook
         return CloudSqlDatabaseHook(gcp_cloudsql_conn_id=self.conn_id)
     elif self.conn_type == 'grpc':
         from airflow.contrib.hooks.grpc_hook import GrpcHook
         return GrpcHook(grpc_conn_id=self.conn_id)
     raise AirflowException("Unknown hook type {}".format(self.conn_type))
        'B': dictObj2
    }
    reorderedDictWithReorderedListsInValue = {
        'B': dictObj2,
        'A': [{
            'Y': 2
        }, {
            'X': [reorderedDictObj, dictObj2]
        }]
    }
    a = {"L": "M", "N": dictWithListsInValue}
    b = {"L": "M", "N": reorderedDictWithReorderedListsInValue}

    # return j1, j2
    # return j3,j4
    # return a,b
    r1 = {"1": j1, "2": j3, "3": a}
    r2 = {"1": j2, "2": j4, "3": b}
    return r1, r2


if __name__ == '__main__':
    mongo_hk = MongoHook()
    mongo_hk.uri = 'mongodb://localhost:27017/'
    r1, r2 = genTwoCase()
    a = DataCompareOperator(runner_conf='1',
                            task_id='11',
                            task_id_list=['a', 'b'])
    res = a.record_compare(r1, r2)
    print(res)
# myclient = pymongo.MongoClient("mongodb://localhost:27017/")
Exemple #17
0
def get_mongodb_connection():
    conn = MongoHook(conn_id='playrecipe_mongo')
    return conn
 def __init__(self, runner_conf, task_id_list, *args, **kwargs):
     super().__init__(queue='worker', *args, **kwargs)
     self.runner_conf = runner_conf
     self.task_id_list = task_id_list
     self.mongo_hk = MongoHook(conn_id='stocksdktest_mongo')
     self.conn = self.mongo_hk.get_conn()
Exemple #19
0
def get_mongo_hook():
    return MongoHook()
Exemple #20
0
import os
import logging
import fnmatch
import json
import glob
from pymongo.errors import BulkWriteError
from typing import List
from airflow.contrib.hooks.mongo_hook import MongoHook
from common.utils import *
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk

# Setting up boto3 hook to AWS S3
s3_hook = S3Hook('my_conn_S3')
# Setting up MongoDB hook to mlab server
mongodb_hook = MongoHook('mongo_default')
ftp_conn_id = "pubmed_ftp"
s3bucket = 'case_reports'
mongo_folder = 'casereports'


def extract_pubmed_data() -> None:
    """Extracts case-reports from pubmed data and stores result on S3
    """
    pattern = "*.xml.tar.gz"
    ftp_path = '/pub/pmc/oa_bulk'
    root_dir = '/usr/local/airflow'
    temp_dir = os.path.join(root_dir, 'temp')
    bucket_name = 'supreme-acrobat-data'
    prefix = s3bucket + '/pubmed/original'
 def test_srv(self):
     hook = MongoHook(conn_id='mongo_default_with_srv')
     self.assertTrue(hook.uri.startswith('mongodb+srv://'))