def test_mysql_hook_test_bulk_dump_mock(self, mock_get_conn): mock_execute = mock.MagicMock() mock_get_conn.return_value.cursor.return_value.execute = mock_execute from airflow.hooks.mysql_hook import MySqlHook hook = MySqlHook('airflow_ci') table = "INFORMATION_SCHEMA.TABLES" tmp_file = "/path/to/output/file" hook.bulk_dump(table, tmp_file) from xTool.utils.tests import assertEqualIgnoreMultipleSpaces mock_execute.assert_called_once() query = """ SELECT * INTO OUTFILE '{tmp_file}' FROM {table} """.format(tmp_file=tmp_file, table=table) assertEqualIgnoreMultipleSpaces(self, mock_execute.call_args[0][0], query)
def test_mysql_to_hive_type_conversion(self, mock_load_file): mysql_conn_id = 'airflow_ci' mysql_table = 'test_mysql_to_hive' from airflow.hooks.mysql_hook import MySqlHook m = MySqlHook(mysql_conn_id) try: with m.get_conn() as c: c.execute("DROP TABLE IF EXISTS {}".format(mysql_table)) c.execute(""" CREATE TABLE {} ( c0 TINYINT, c1 SMALLINT, c2 MEDIUMINT, c3 INT, c4 BIGINT, c5 TIMESTAMP ) """.format(mysql_table)) from airflow.operators.mysql_to_hive import MySqlToHiveTransfer t = MySqlToHiveTransfer(task_id='test_m2h', mysql_conn_id=mysql_conn_id, hive_cli_conn_id='beeline_default', sql="SELECT * FROM {}".format(mysql_table), hive_table='test_mysql_to_hive', dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) mock_load_file.assert_called_once() d = OrderedDict() d["c0"] = "SMALLINT" d["c1"] = "INT" d["c2"] = "INT" d["c3"] = "BIGINT" d["c4"] = "DECIMAL(38,0)" d["c5"] = "TIMESTAMP" self.assertEqual(mock_load_file.call_args[1]["field_dict"], d) finally: with m.get_conn() as c: c.execute("DROP TABLE IF EXISTS {}".format(mysql_table))
def execute(self, context): try: _json = {} _data = {} if 'job' in context['dag_run'].conf: logging.debug('{0}: dag_run conf: \n {1}'.format( self.task_id, context['dag_run'].conf['job'])) _json = context['dag_run'].conf['job'] mysql = MySqlHook(mysql_conn_id=biowardrobe_connection_id) with closing(mysql.get_conn()) as conn: with closing(conn.cursor()) as cursor: if 'biowardrobe_uid' in context['dag_run'].conf: _data = get_biowardrobe_data( cursor, context['dag_run'].conf['biowardrobe_uid']) _json = _data['job'] update_status( uid=_json['uid'], message='Analysing', code=11, conn=conn, cursor=cursor, optional_column="forcerun=0, dateanalyzes=now()") update_status(uid=_json['uid'], message='Analysing', code=11, conn=conn, cursor=cursor, optional_column="dateanalyzed=now()", optional_where="and dateanalyzed is null") return self.cwl_dispatch(_json) # fragment = urlsplit(self.dag.default_args["workflow"]).fragment # fragment = fragment + '/' if fragment else '' # job_order_object_extended = {fragment + key: value for key, value in job_order_object.items()} except Exception as e: _logger.info('Dispatch Exception {0}: \n {1} {2}'.format( self.task_id, type(e), e)) pass
def test_mysql_to_hive_type_conversion(self, mock_load_file): mysql_table = 'test_mysql_to_hive' from airflow.hooks.mysql_hook import MySqlHook hook = MySqlHook() try: with hook.get_conn() as conn: conn.execute("DROP TABLE IF EXISTS {}".format(mysql_table)) conn.execute(""" CREATE TABLE {} ( c0 TINYINT, c1 SMALLINT, c2 MEDIUMINT, c3 INT, c4 BIGINT, c5 TIMESTAMP ) """.format(mysql_table)) from airflow.operators.mysql_to_hive import MySqlToHiveTransfer op = MySqlToHiveTransfer( task_id='test_m2h', hive_cli_conn_id='hive_cli_default', sql="SELECT * FROM {}".format(mysql_table), hive_table='test_mysql_to_hive', dag=self.dag) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) assert mock_load_file.call_count == 1 ordered_dict = OrderedDict() ordered_dict["c0"] = "SMALLINT" ordered_dict["c1"] = "INT" ordered_dict["c2"] = "INT" ordered_dict["c3"] = "BIGINT" ordered_dict["c4"] = "DECIMAL(38,0)" ordered_dict["c5"] = "TIMESTAMP" self.assertEqual(mock_load_file.call_args[1]["field_dict"], ordered_dict) finally: with hook.get_conn() as conn: conn.execute("DROP TABLE IF EXISTS {}".format(mysql_table))
def get_data_from_mysql(filename, tablename): hook = MySqlHook(mysql_conn_id='mysql_baseball') sql = "select * from " sql += tablename cur = hook.get_records(sql) # f = open(filename,'w') # print >>f, cur c = csv.writer(open(filename, "wb"), quoting=csv.QUOTE_NONNUMERIC) for row in cur: c.writerow(row) c.close() return cur
def get_mysql_dataset(**kwargs): if ("mysql_conn_id" not in kwargs or "schema" not in kwargs or "sql" not in kwargs): raise Exception("Miss parameter mysql_conn_id or metadata or sql.") maxrows = 0 if "maxrows" not in kwargs else kwargs["maxrows"] how = 1 if "how" not in kwargs else kwargs["how"] mysql = MySqlHook(mysql_conn_id=kwargs["mysql_conn_id"], schema=kwargs["schema"]) conn = mysql.get_conn() if not conn.open: raise Exception("Could not open connection.") conn.query(kwargs["sql"]) result = conn.store_result() dataset = result.fetch_row(maxrows=maxrows, how=how) conn.close() return dataset
def __init__(self, src_file="/usr/local/airflow/csv_lz_2/cleaned_depts.csv", src_file_attr={ "action": "action", "key": "department_id" }, tgt_table_conn_id='mysql_default', db_table=None, df=None, output_filename="/usr/local/airflow/csv_lz_2/depts_td.csv", *args, **kwargs): super(LoadOltpOperator, self).__init__(*args, **kwargs) self.src_file = src_file self.src_file_attr = src_file_attr self.tgt_table_conn_id = tgt_table_conn_id self.db_table = db_table self.conn = MySqlHook(conn_name_attr=tgt_table_conn_id) self.output_filename = output_filename
def upload_db(table, tmp_file, mysql_conn_id='default_mysql'): df = pd.read_csv(tmp_file, ) mysql_hook = MySqlHook(mysql_conn_id=mysql_conn_id) print(df) print( '###############################################################################################' ) conn = mysql_hook.get_conn() cursor = conn.cursor() cursor.execute('truncate {}'.format(table)) conn.commit() print( '###############################################################################################' ) df.to_sql(table, mysql_hook.get_sqlalchemy_engine(), if_exists='append', index=False)
def identify_popular_links(directory=RAW_TWEET_DIR, write_mode="w", **kwargs): """ Identify the most popular links from the last day of tweest in the db Writes them to latest_links.txt in the RAW_TWEET_DIR (or directory kwarg) """ dbconn = MySqlHook(mysql_conn_id="mysql_default") conn = dbconn.get_connection() cursor = conn.cursor() query = """select * from tweets where created > date('now', '-1 days') and urls is not null order by favorite_count""" df = pd.read_sql_query(query, conn) df.urls = df.urls.map(ast.literal_eval) cntr = Counter(itertools.chain.from_iterable(df.urls.values)) with open("{}/latest_links.txt".format(directory), write_mode) as latest: wrtr = writer(latest) wrtr.writerow(["url", "count"]) wrtr.writerows(cntr.most_common(5))
def get_users(user_id, context): db = MySqlHook(mysql_conn_id='mariadb', schema="dbo") sql = f""" select user_id, name, culture, group_id, employee_num, anonymous_name, email, theme_code, date_format_code, time_format_code, time_zone, row_count, language_code, interface_id, phone, mobile, fax, icon, addsign_img, is_plural, is_notification, is_absence, is_deputy from users where user_id = %s """ task = {} rows = db.get_records(sql, parameters=[user_id]) for row in rows: model = { 'user_id': row[0], 'name': row[1], 'culture': row[2], 'group_id': row[3], 'employee_num': row[4], 'anonymous_name': row[5], 'email': row[6], 'theme_code': row[7], 'date_format_code': row[8], 'time_format_code': row[9], 'row_count': row[10], 'language_code': row[11], 'interface_id': row[12], 'phone': row[13], 'mobile': row[14], 'fax': row[15], 'icon': row[16], 'addsign_img': row[17], 'is_plural': row[18], 'is_notification': row[19], 'is_absence': row[20], 'is_deputy': row[21] } task = model context['ti'].xcom_push(key=USERS, value=task) return task
def insert_process(**kwargs): ti = kwargs['ti'] #Extrae el archivo source_file = ti.xcom_pull(task_ids='transform_process') #Conexion a Base de Datos db_connection = MySqlHook('airflow_db').get_sqlalchemy_engine() df = pd.read_csv(source_file) with db_connection.begin() as transaction: #Elimina lo que existe en la tabla cada vez que se ejecuta transaction.execute( "DELETE FROM covid.time_series_covid19_confirmed_global WHERE 1=1") df.to_sql( "time_series_covid19_confirmed_global", #Nombre tabla con=transaction, schema="covid", if_exists="append", index=False) os.remove(source_file)
def index(self): sql = """ SELECT a.name as db, db_location_uri as location, count(1) as object_count, a.desc as description FROM DBS a JOIN TBLS b ON a.DB_ID = b.DB_ID GROUP BY a.name, db_location_uri, a.desc """.format(**locals()) h = MySqlHook(METASTORE_MYSQL_CONN_ID) df = h.get_pandas_df(sql) df.db = ('<a href="/admin/metastorebrowserview/db/?db=' + df.db + '">' + df.db + '</a>') table = df.to_html( classes="table table-striped table-bordered table-hover", index=False, escape=False, na_rep='', ) return self.render("metastore_browser/dbs.html", table=table)
def url_details_create_insert(*args, **kwargs): with open("/Users/preetiyerkuntwar/documents/Zomato-test/all_restro.json", "r") as f: result = json.load(f) f.close() normalizedL1 = pd.json_normalize(result["restaurants"], max_level=1).drop( columns=['restaurant.apikey']).set_index('restaurant.id') daL2 = normalizedL1.reset_index() url_details = daL2[dfNameDict['url']] mysql_hook = MySqlHook(mysql_conn_id="mysql_zomato") conn = mysql_hook.get_conn() connection = conn curr = connection.cursor() for i, row in url_details.iterrows(): curr.execute(insert_into_sql_statements.INSERT_TABLE_URL_DETAILS, row) connection.commit() curr.close()
def create_table(): # Drop and Re-create table connection = MySqlHook(mysql_conn_id='mysql_default') sql = '''CREATE TABLE IF NOT EXISTS `swapi_data`.`swapi_people` ( `id` int(11) NOT NULL auto_increment, `name` varchar(100) NOT NULL default '', `birth_year` varchar(100) NOT NULL default '', `film` varchar(100) NOT NULL default '', `film_name` varchar(100) NOT NULL default '', `url` varchar(100) NOT NULL default '', `birth_year_number` DECIMAL(4,1) NOT NULL default 0, PRIMARY KEY (`id`) );''' connection.run(sql, autocommit=True, parameters=()) sql = '''DELETE FROM `swapi_data`.`swapi_people`;''' connection.run(sql, autocommit=True, parameters=()) return True
def execute(self, context): biowardrobe_uid = context['dag_run'].conf['biowardrobe_uid'] \ if 'biowardrobe_uid' in context['dag_run'].conf else None if not biowardrobe_uid: raise Exception('biowardrobe_id must be provided') run_id = context['dag_run'].conf['run_id'] \ if 'run_id' in context['dag_run'].conf else 'trig__{}__{}'.format(biowardrobe_uid, uuid.uuid4()) _logger.info('Successfully finished: {}'.format(biowardrobe_uid)) mysql = MySqlHook(mysql_conn_id=biowardrobe_connection_id) with closing(mysql.get_conn()) as conn: with closing(conn.cursor()) as cursor: cursor.execute( "update ems.labdata set libstatus=10, libstatustxt='downloaded' where uid=%s", (biowardrobe_uid, )) conn.commit() data = get_biowardrobe_data(cursor=cursor, biowardrobe_uid=biowardrobe_uid) dag_id = os.path.basename( os.path.splitext(data['workflow'])[0]) payload = { 'biowardrobe_uid': biowardrobe_uid, 'run_id': run_id } _logger.info("Trigger basic analysis with: {}".format(payload)) session = settings.Session() dr = DagRun(dag_id=dag_id, run_id=run_id, conf=payload, execution_date=datetime.now(), external_trigger=True) logging.info("Creating DagRun {}".format(dr)) session.add(dr) session.commit() session.close()
def execute(self, context): _job_result, promises = self.cwl_gather(context) mysql = MySqlHook(mysql_conn_id=biowardrobe_connection_id) with closing(mysql.get_conn()) as conn: with closing(conn.cursor()) as cursor: _data = get_biowardrobe_data(cursor, promises['uid']) _params = loads(_data['params']) _promoter = _params[ 'promoter'] if 'promoter' in _params else 1000 _params = _job_result _params['promoter'] = _promoter try: upload_results_to_db2(upload_rules=loads( _data['upload_rules']), uid=promises['uid'], output_folder=self.output_folder, cursor=cursor, conn=conn) update_status( uid=promises['uid'], message='Complete:upgraded', code=12, conn=conn, cursor=cursor, optional_column="dateanalyzee=now(),params='{}'". format(dumps(_params))) except BiowBasicException as ex: update_status( uid=promises['uid'], message=f'Fail:{ex}', code=2010, conn=conn, cursor=cursor, optional_column="dateanalyzee=now(),params='{}'". format(dumps(_params))) return _job_result
def insert_db(**context): # get variable obj = Variable.get('TMDB_API', deserialize_json=True) # request get trending movie response = requests.get(obj["host"] + 'trending/all/week?api_key=' + obj["key"]) page = 1 resp = response.json() # for i in range(data.total_pages): for i in range(2): response = requests.get(obj["host"] + 'trending/all/week?api_key=' + obj["key"] + '&page=' + str(page)) data = response.json() # looping data ke variable rows = [] for key in data['results']: # value = data['results'] if 'title' in key: title = key['title'].encode("utf-8") ori_title = key['original_title'].encode("utf-8") release_date = key['release_date'] else: title = key['name'].encode("utf-8") ori_title = key['original_name'].encode("utf-8") release_date = key['first_air_date'] # convert genre ke separated comma genres = ','.join(map(str, key['genre_ids'])) row = (key['id'], title, release_date, ori_title, genres, key['media_type'], key['vote_average']) rows.append(row) # simpan data perpage api: MySqlHook = MySqlHook(default_conn_name='mysql_default') api.insert_rows(table='movie', rows=tuple(rows)) page += 1 return 'success'
def get_columns_and_exclude(conn_id, table_name, l_columns_exclude): """ :param conn_id: connection id to connect to :param table_name: table to get the columns :param l_columns_exclude: list of strings of columns to exclude :return: list of strings without the excluded columns """ mysql_hook = MySqlHook(conn_id) sql_query = "SHOW COLUMNS FROM {}".format(table_name) all_records = mysql_hook.get_records(sql_query) l_columns_after_exclude = [ "t.`{}`".format(l_row[0]) for l_row in all_records if l_row[0] not in l_columns_exclude ] logging.debug( "Columns after exclude: '{}'".format(l_columns_after_exclude)) return l_columns_after_exclude
def test_mysql_hook_test_bulk_load(self): records = ("foo", "bar", "baz") import tempfile with tempfile.NamedTemporaryFile() as t: t.write("\n".join(records).encode('utf8')) t.flush() from airflow.hooks.mysql_hook import MySqlHook h = MySqlHook('airflow_ci') with h.get_conn() as c: c.execute(""" CREATE TABLE IF NOT EXISTS test_airflow ( dummy VARCHAR(50) ) """) c.execute("TRUNCATE TABLE test_airflow") h.bulk_load("test_airflow", t.name) c.execute("SELECT dummy FROM test_airflow") results = tuple(result[0] for result in c.fetchall()) self.assertEqual(sorted(results), sorted(records))
def copy(ds, **kwargs): source_query = """select * from address;""" dest_query = "insert into address values %s" source_hook = create_engine( 'postgresql+psycopg2://airflow:airflow@postgres/airflow') source_conn = source_hook.connect() records = source_conn.execute(source_query) dest_hook = MySqlHook(mysql_conn_id="target", schema="mysql") dest_conn = dest_hook.get_conn() dest_cursor = dest_conn.cursor() if records: # logging.info("Inserting rows into MySQL") dest_hook.insert_rows(table="address", rows=records) dest_cursor.close() source_conn.close() dest_conn.close()
def user_ratings_insert(*args, **kwargs): with open("/Users/preetiyerkuntwar/documents/Zomato-test/all_restro.json", "r") as f: result = json.load(f) f.close() normalizedL1 = pd.json_normalize(result["restaurants"], max_level=1).drop( columns=['restaurant.apikey']).set_index('restaurant.id') daL2 = normalizedL1.reset_index() user_rating = daL2[dfNameDict['user_rating']] user_rating = pd.concat([ pd.json_normalize( user_rating.drop(['restaurant.id'], axis=1)['restaurant.user_rating']), user_rating['restaurant.id'] ], axis=1) user_rating['restaurant.id'] = pd.to_numeric(user_rating['restaurant.id']) user_rating['aggregate_rating'] = user_rating['aggregate_rating'].astype( float) user_rating['rating_text'] = user_rating['rating_text'].astype(str) user_rating['rating_color'] = user_rating['rating_color'].astype(str) user_rating['rating_obj.title.text'] = user_rating[ 'rating_obj.title.text'].astype(float) user_rating['rating_obj.bg_color.type'] = user_rating[ 'rating_obj.bg_color.type'].astype(str) user_rating['rating_obj.bg_color.tint'] = user_rating[ 'rating_obj.bg_color.tint'].astype(str) mysql_hook = MySqlHook(mysql_conn_id="mysql_zomato") conn = mysql_hook.get_conn() connection = conn curr = connection.cursor() for i, row in user_rating.iterrows(): # print(row) curr.execute(insert_into_sql_statements.INSERT_TABLE_USER_RATINGS, row) connection.commit() curr.close()
def copy_from_func(**context): biowardrobe_uid = context['dag_run'].conf['biowardrobe_uid'] \ if 'biowardrobe_uid' in context['dag_run'].conf else None if not biowardrobe_uid: raise Exception('biowardrobe_id must be provided') data = {} _tmpfiles1 = [] _tmpfiles2 = [] mysql = MySqlHook(mysql_conn_id=biowardrobe_connection_id) with closing(mysql.get_conn()) as conn: with closing(conn.cursor()) as cursor: data = get_biowardrobe_data(cursor=cursor, biowardrobe_uid=biowardrobe_uid) cursor.execute("select uid from labdata where id in (" + data['url'].replace(' ', ',') + ")") for row in cursor.fetchall(): _copy_from = get_biowardrobe_data(cursor=cursor, biowardrobe_uid=row['uid']) if _copy_from['pair'] == data['pair']: _tmpfiles1.append(_copy_from['fastq_file_upstream']) if data['pair']: _tmpfiles2.append(_copy_from['fastq_file_downstream']) pathlib.Path(data['output_folder']).mkdir(parents=True, exist_ok=True, mode=0o777) bufsize = 16 * 1024 with open(data['fastq_file_upstream'], "wb") as outfile: for filename in _tmpfiles1: _logger.info("Adding " + filename + "...") with open(filename, "rb") as fq_file: copyfileobj(fq_file, outfile, bufsize) if data['pair']: with open(data['fastq_file_downstream'], "wb") as outfile: for filename in _tmpfiles2: _logger.info("Adding " + filename + "...") with open(filename, "rb") as fq_file: copyfileobj(fq_file, outfile, bufsize)
def readS3FilesAndLoadtoMySql(**kwargs): """ Read Data from S3 Files and load to Mysql :param kwargs: :return: """ s3_files = fetchFilesBasedonPattern(**kwargs) tmp_trg_file_path = "/tmp/s3mysqlload_" + str(round(datetime.now().timestamp())) + "/" if s3_files is None: raise Exception("No Files are Available to process") else: files_df = pd.DataFrame() print(type(s3_files)) data = ",".join(s3_files) kwargs['ti'].xcom_push(key='s3_data_files', value=data) s3_client = generateS3Hook(kwargs["aws_conn_id"]) for path in s3_files: file_name = getFileName(path) if (file_name.lower().__contains__(".csv")): files_df = files_df.append(pd.read_csv( io.BytesIO(s3_client.get_key(key=path, bucket_name=kwargs['src_bucket']).get()['Body'].read()))) elif file_name.lower().__contains__(".json"): files_df = files_df.append(pd.read_json( io.BytesIO(s3_client.get_key(key=path, bucket_name=kwargs['src_bucket']).get()['Body'].read()))) if len(files_df) > 0: if not os.path.exists(tmp_trg_file_path): os.makedirs(tmp_trg_file_path) file_path = tmp_trg_file_path + str(round(datetime.now().timestamp())) + ".tsv" files_df.to_csv(file_path, sep="\t", index=False,header=False,line_terminator="\n") mysql_client = MySqlHook(mysql_conn_id=kwargs["mysql_conn"]) mysql_client.bulk_load(table=kwargs["schema"]+"."+kwargs["table"], tmp_file=file_path) shutil.rmtree(tmp_trg_file_path) else: raise Exception("Source Files are Empty")
def send_aggregate_to_requestbin(): target = 'http://requestbin.net/r/zorarbzo' connection = MySqlHook(mysql_conn_id='mysql_default') sql = ''' SELECT film_name, name, birth_year FROM `swapi_data`.`swapi_people_aggregate`; ''' result = connection.get_records(sql) data = [] for item in result: data.append({ "film_name": item[0], "name": item[1], "birth_year": str(item[2]) }) result = requests.post(target, data=json.dumps(data)) return result
def local_to_mysql(): connection = MySqlHook(mysql_conn_id='youtube_db') query = ''' CREATE TABLE IF NOT EXISTS `group3`.`youtube7` ( `video_id` VARCHAR(100) NOT NULL, `title` VARCHAR(100) NULL, `publishedAt` VARCHAR(45) NULL, `channelId` VARCHAR(45) NULL, `channelTitle` VARCHAR(60) NULL, `categoryId` INT NULL, `trending_date` DATETIME NULL, `tags` LONGTEXT NULL, `view_count` INT NULL, `likes` INT NULL, `dislikes` INT NULL, `comment_count` INT NULL, `thumbnail_link` VARCHAR(100) NULL, `comments_disabled` TINYINT NULL, `ratings_disabled` TINYINT NULL, `description` LONGTEXT NULL, PRIMARY KEY (`video_id`)); ''' connection.run(query, autocommit=True) # df = pd.read_csv(temp_youtube_trending_vids) df = pd.concat( [pd.read_csv(f, sep=',') for f in glob.glob('/temp' + "/*.csv")], ignore_index=True) df = df.where((pd.notnull(df)), None) for i, row in df.iterrows(): query = ''' INSERT IGNORE INTO group3.youtube7 (video_id, title, publishedAt, channelId, channelTitle, categoryId, trending_date, tags, view_count, likes, dislikes, comment_count, thumbnail_link, comments_disabled, ratings_disabled, description) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ''' try: connection.run(query, autocommit=True, parameters=tuple(row)) except: pass
def objects(self): where_clause = '' if DB_WHITELIST: dbs = ",".join(["'" + db + "'" for db in DB_WHITELIST]) where_clause = "AND b.name IN ({})".format(dbs) if DB_BLACKLIST: dbs = ",".join(["'" + db + "'" for db in DB_BLACKLIST]) where_clause = "AND b.name NOT IN ({})".format(dbs) sql = """ SELECT CONCAT(b.NAME, '.', a.TBL_NAME), TBL_TYPE FROM TBLS a JOIN DBS b ON a.DB_ID = b.DB_ID WHERE a.TBL_NAME NOT LIKE '%tmp%' AND a.TBL_NAME NOT LIKE '%temp%' AND b.NAME NOT LIKE '%tmp%' AND b.NAME NOT LIKE '%temp%' {where_clause} LIMIT {LIMIT}; """.format(where_clause=where_clause, LIMIT=TABLE_SELECTOR_LIMIT) h = MySqlHook(METASTORE_MYSQL_CONN_ID) d = [{'id': row[0], 'text': row[0]} for row in h.get_records(sql)] return json.dumps(d)
def execute(self, context): dest_mysql = MySqlHook(mysql_conn_id=self.dest_mysqls_conn_id) self.cursor = self.cursor if not data_cursor else kwargs['ti'].xcom_pull( key=None, task_ids=data_cursor) logging.info( "Transferring cursor into new Mysql database.") if self.mysql_preoperator: logging.info("Running Mysql preoperator") dest_mysql.run(self.mysql_preoperator) dest_mysql.insert_rows(table=self.dest_table, rows=self.cursor) logging.info(self.cursor.rowcount, " rows inserted") else: logging.info("No rows inserted") if self.mysql_postoperator: logging.info("Running Mysql postoperator") dest_mysql.run(self.mysql_postoperator) logging.info("Done.")
def set_signers(doc, group, context): db = MySqlHook(mysql_conn_id='mariadb', schema="dapp") sql = f""" insert into signers(instance_id, sign_area_id, sequence, sub_instance_id, sign_section, sign_position, sign_action, is_executed, group_culture, group_id, group_name, created_date, received_date, approved_date) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s); """ is_executed = True sub_instance_id = 0 sign_action = STATUS_00 # 기결재 db.run(sql, autocommit=True, parameters=[ doc.find('instance_id').text, doc.find('sign_area_id').text, doc.find('sequence').text, sub_instance_id, doc.attrib['sign_section'], doc.attrib['sign_position'], sign_action, is_executed, group['culture'], doc.find('group_id').text, group['name'], datetime.now(), datetime.now(), datetime.now() ])
def save_transactions(**kwargs): transactions = kwargs['task_instance'].xcom_pull( key='transactions', task_ids='get_transactions') transactions['dt'] = month transactions['value'] = transactions['value'].astype(float) transactions['date'] = transactions['date'].dt.strftime('%Y-%m-%d') mysql = MySqlHook(mysql_conn_id='credit_card_processor') mysql_conn = mysql.get_conn() cursor = mysql_conn.cursor() cursor.execute(config['mysql']['create_transaction_table']) wildcards = ','.join(['%s'] * len(transactions.columns)) colnames = ','.join(transactions.columns) insert_sql = config['mysql']['create_transaction'] % ( config['mysql']['transaction_table'], colnames, wildcards) data = [tuple([v for v in rw]) for rw in transactions.values] cursor.executemany(insert_sql, data) mysql_conn.commit() cursor.close()
def etl_process(**kwargs): logger.info(kwargs["execution_date"]) file_path = FSHook(FILE_CONNECTION_NAME).get_path() filename = 'sales.csv' mysql_connection = MySqlHook( mysql_conn_id=CONNECTION_DB_NAME).get_sqlalchemy_engine() full_path = f'{file_path}/{filename}' df = (pd.read_csv(full_path, encoding="ISO-8859-1", usecols=COLUMNS.keys(), parse_dates=DATE_COLUMNS).rename(columns=COLUMNS)) with mysql_connection.begin() as connection: connection.execute("DELETE FROM test.sales WHERE 1=1") df.to_sql('sales', con=connection, schema='test', if_exists='append', index=False) os.remove(full_path) logger.info(f"Rows inserted {len(df.index)}")