def get_vertica_results(credentials, query): """Run a single query in Vertica and return the results.""" credentials_target = ExternalURL(url=credentials).output() cred = None with credentials_target.open('r') as credentials_file: cred = json.load(credentials_file) # Externalize autocommit and read timeout connection = vertica_python.connect(user=cred.get('username'), password=cred.get('password'), host=cred.get('host'), port=cred.get('port'), database='warehouse', autocommit=False, read_timeout=None) if not vertica_client_available: raise ImportError('Vertica client library not available') try: cursor = connection.cursor() cursor.execute(query) results = cursor.fetchall() finally: connection.close() return results
def get_downstream_task(self): # If no downstream task has been set, load our configuration and generate our tasks and dependency chain. if self.downstream_task is None: script_conf_target = ExternalURL( url=self.script_configuration).output() with script_conf_target.open('r') as script_conf_file: config = yaml.safe_load(script_conf_file) if config is not None and isinstance(config, dict): previous_task = None scripts = config.get('scripts', []) # Iterate over the list of scripts in the configuration file in reverse order. We also zip a list of integers, # representing the zero-based index position of the given script in the overall list. We iterate in reverse # in order to link each task together, using requires(), to ensure that tasks run sequentially, and in the intended # order: from the top of the file, downwards. for script in scripts: if not self.validate_script_entry(script): log.warn("encountered invalid script entry!") continue new_task = RunVerticaSqlScriptTask( credentials=self.credentials, schema=self.schema, marker_schema=self.marker_schema, date=self.date, read_timeout=self.read_timeout, source_script=path.join(self.script_root, script['location']), script_name=script.get('name')) # If we previously configured a task, set it as a dependency of this one, so it runs prior to. if previous_task is not None: new_task.add_dependency(previous_task) # Mark this as the previously-created task. previous_task = new_task self.downstream_task = previous_task # If a downstream task has been set, yield it, triggering Luigi to schedule our scripts. if self.downstream_task is not None: yield self.downstream_task
def get_downstream_task(self): # If no downstream task has been set, load our configuration and generate our tasks and dependency chain. if self.downstream_task is None: script_conf_target = ExternalURL(url=self.script_configuration).output() with script_conf_target.open('r') as script_conf_file: config = yaml.safe_load(script_conf_file) if config is not None and isinstance(config, dict): previous_task = None scripts = config.get('scripts', []) # Iterate over the list of scripts in the configuration file in reverse order. We also zip a list of integers, # representing the zero-based index position of the given script in the overall list. We iterate in reverse # in order to link each task together, using requires(), to ensure that tasks run sequentially, and in the intended # order: from the top of the file, downwards. for script in scripts: if not self.validate_script_entry(script): log.warn("encountered invalid script entry!") continue new_task = RunVerticaSqlScriptTask( credentials=self.credentials, schema=self.schema, marker_schema=self.marker_schema, date=self.date, read_timeout=self.read_timeout, source_script=path.join(self.script_root, script['location']), script_name=script.get('name')) # If we previously configured a task, set it as a dependency of this one, so it runs prior to. if previous_task is not None: new_task.add_dependency(previous_task) # Mark this as the previously-created task. previous_task = new_task self.downstream_task = previous_task # If a downstream task has been set, yield it, triggering Luigi to schedule our scripts. if self.downstream_task is not None: yield self.downstream_task
def get_mysql_query_results(credentials, database, query): """ Executes a mysql query on the provided database and returns the results. """ credentials_target = ExternalURL(url=credentials).output() cred = None with credentials_target.open('r') as credentials_file: cred = json.load(credentials_file) connection = mysql.connector.connect(user=cred.get('username'), password=cred.get('password'), host=cred.get('host'), port=cred.get('port'), database=database) try: cursor = connection.cursor() cursor.execute(query) results = cursor.fetchall() finally: connection.close() return results