async def main(mytimer: azure.functions.TimerRequest, starter: str): try: orchestrator_name = "F_orchestrator" client = df.DurableOrchestrationClient(starter) req_params = { 'trigger': 'scheduled', 'source': 'prestashop', 'last_days': '1', 'model': None, 'action': 'full' } req_body = { 'status': 'TODO' } orc_input = { 'params': req_params, 'body': req_body } instance_id = await client.start_new(orchestrator_name, None, req_params) logger.info(f"Started orchestration with ID = '{instance_id}'.") except Exception as e: logger.error("F_prestashop_timer :: {}".format(traceback.print_exc()))
def apply_changes(self, plan): """Applies the changes specified in a given 'plan' JSON file. This approach is pretty much inspired by Terraform, but applied to SQLAlchemy db models :)""" returnmsg = "" result = {} schema = plan['schema'] to_delete = plan['delete'] to_create = plan['create'] deletion = len(to_delete) > 0 creation = len(to_create) > 0 if deletion or creation: logger.info("DB CHANGE: Applying change plan: {}".format(plan)) try: # for documentation on this : refer to https://docs.sqlalchemy.org/en/14/orm/extensions/automap.html AutoBase.prepare(engine=self.engine, schema=schema, reflect=True) # if tables need to be dropped, use SQLAlchemy to drop them if deletion: # delete_tables = list(x.__table__ for x in AutoBase.classes if x.__table__.name in to_delete) self.delete_tables(schema, to_delete) AutoBase.metadata.clear() # if tables need to be (re)-created, create them from the connector's manifest definition if creation: self.create_models(schema, to_create) AutoBase.metadata.clear() returnmsg = "Successfully applied changes to the DB." logger.info(returnmsg) result['status'] = 'success' except Exception as e: returnmsg = "DB CHANGE: Error {}".format(e) logger.error(returnmsg) result['status'] = 'error' else: returnmsg = "DB CHANGE: Nothing to change in the current plan. No action will be applied on the db." logger.info(returnmsg) result['status'] = 'not applied' result['message'] = returnmsg result['plan'] = plan return result
def apply_transforms(self, transforms): source = transforms['Source'] table_list = transforms['Tables'] dataframes = self.load_tables(source, table_list) df = None for step in transforms['Steps']: step_name = step['Step'] logger.debug("STEP: {}".format(step)) try: logger.info("{}::{} - Executing Step".format( source, step_name)) operation = step['type'] params = step['params'] output_name = step['output'] # replace the dataframe names by the actual dataframes in the params input_name = step['input'] params['origin_df'] = dataframes[input_name] if 'right_input' in step.keys(): right_name = step['right_input'] params['right_df'] = dataframes[right_name] logger.debug("STEP PARAMS: {}".format(params)) # retrieve the right function to apply and pass the parameters as dict function = getattr(self, operation) df = function(**params) logger.debug(df.head(10)) # store the output in the buffer_dfs for further chaining dataframes[output_name] = df if 'save' in step.keys() and (step['save']): logger.info("Saving dataframe {}::{}".format( source, output_name)) self.save(df, source, output_name) except Exception as e: errmsg = "{}::{} error: {}".format(source, step_name, e) logger.error(errmsg) continue return df
def main(params: dict) -> dict: returnStr = "" try: # params = orc_input['params'] pdconn = PandasSQLConnector.load_default() schema = params['source'] trigger = params['trigger'] results = {} initStr = "Extend Data Table operation started. Trigger : {} - Schema: {}".format(trigger,schema) logger.info(initStr) for filename in os.listdir(TRANSFORMS_DIR): transform_def = load_conf(filename, subfolder='transforms') if transform_def['Source'] == schema: logger.info("Applying pandas transforms from manifest: {}".format(filename)) df = pdconn.apply_transforms(transform_def) results[filename] = 'applied' else: logger.info("Skipping filtered schema : {}".format(transform_def)) results[filename] = 'skipped' returnStr = "Extend Data Table ended. Results: {}".format(results) logger.info(returnStr) output_results = { 'params': params, 'results': results } except Exception as e: returnStr = '{}'.format(e) logger.error(e) output_results = { 'params': params, 'results': returnStr } return output_results
async def main(req: func.HttpRequest, starter: str) -> func.HttpResponse: try: client = df.DurableOrchestrationClient(starter) logger.info("request parameters: {}".format(req.params)) expected_params = [ 'last_days', 'source', 'model', 'action' ] # req_params = dict(req.params) params = {} # req_body = req.get_body() req_body = { 'status': 'TODO' } for key in expected_params: params[key] = (req.params[key] if key in req.params.keys() else None) params['trigger'] = 'http' models_raw = params['model'] params['model'] = (models_raw.split(',') if models_raw else None) orc_input = { 'params': params, 'body': req_body } instance_id = await client.start_new(req.route_params["functionName"], None, params) logger.info(f"Started orchestration with ID = '{instance_id}'.") return client.create_check_status_response(req, instance_id) except Exception as e: logger.error("F_starter :: {}".format(e))
def main(params: dict) -> dict: result = {} try: azconn = AzureSQLConnector.load_default() schema = params['source'] schema_list = ([schema] if schema else CONNECTOR_MAP.keys()) action = params['action'] models = params['model'] if action == 'build': result = azconn.create_db(schema_list) elif action == 'destroy': result = azconn.delete_db(schema_list) elif action == 'drop': result = azconn.delete_tables(schema, models) elif action == 'examine': for schema in schema_list: result[schema] = azconn.plan_changes(schema) elif action == 'apply': for schema in schema_list: plan = azconn.plan_changes(schema) result[schema] = azconn.apply_changes(plan) else: returnMsg = "F_db_activity :: Invalid value provided for 'action' parameter: {}".format( action) logger.warning(returnMsg) result = returnMsg except Exception as e: returnMsg = 'F_db_activity error :: {}'.format(traceback.print_exc()) logger.error(returnMsg) result = returnMsg return result
def main(params: dict) -> dict: returnStr = "" try: # params = orc_input['params'] source, last_days, models = format_params(params) trigger = params['trigger'] results = {} azconn = AzureSQLConnector.load_default() initStr = "Fetch operation started. Trigger: {} Source: {} - Models: {} - LAST_DAYS={}".format( trigger, source, models, last_days) logger.info(initStr) client = get_client(source) for model_name in models: logger.info('Extracting data from Model: {}'.format(model_name)) jsonpath, dataset = client.get_data(model_name, last_days=last_days) # push to Azure SQL result = azconn.insert_dataset(dataset) results[model_name] = result returnStr = "Fetch operation ended. Trigger: {} - Source: {} - LAST_DAYS={}\nRESULTS: {}".format( trigger, source, last_days, results) logger.info(returnStr) output_results = {'params': params, 'results': results} except Exception as e: returnStr = 'F_fetch_data.fetch_data :: {}'.format( traceback.print_exc()) logger.error(e) output_results = {'params': params, 'results': returnStr} return output_results
def update_from_json(self, dataset): header = dataset['header'] schema = header['schema'] model_name = header['model'] result = None logger.info("Loading DB schema: {}".format(schema)) # for documentation on this : refer to https://docs.sqlalchemy.org/en/14/orm/extensions/automap.html AutoBase = automap_base() AutoBase.prepare(engine=self.engine, schema=schema, reflect=True) logger.debug("loading modelObject") modelObject = getattr(AutoBase.classes, model_name) logger.debug("Opening Session") session = self.SessionFactory() # This is very important, so the data is inserted in the right schema session.connection( execution_options={"schema_translate_map": { schema: schema }}) logger.info("Saving JSON file to {}".format(self.dbname)) logger.debug("JSON Header: {}".format(header)) try: for dict_item in dataset['data']: id = dict_item['Id'] objectInstance = session.query(modelObject).filter( modelObject.Id == id).first() # if object not found in the db, create it if objectInstance is None: logger.debug( "Object {} with ID={} not found in DB. Creating.". format(model_name, id)) objectInstance = modelObject(**dict_item) session.add(objectInstance) # if already present, update all its fields else: logger.debug( "Object {} with ID={} found in DB. Updating.".format( model_name, id)) id = dict_item.pop('Id') for key, value in dict_item.items(): setattr(objectInstance, key, value) logger.debug("inserted record {}".format(dict_item.values())) logger.info("Committing...") session.commit() result = 'committed' except Exception as e: logger.error("SQL connector update_from_json: {}".format(e)) session.rollback() result = 'rolled back' finally: session.close() return result