def on_connected(self, *args, **kwargs): if self._args.config: self.cfg = load_sq_config(validate=True, config_file=self._args.config) else: self.cfg = load_sq_config(validate=True) if not self.cfg: sys.exit(1) self.schemas = Schema(self.cfg["schema-directory"])
def _coalescer_init(pq_dir: str) -> (TemporaryDirectory, NamedTemporaryFile): """Basic Coalescer test :param pq_dir: the input parquet dir to be copied, this is the root of the parquet dir (tests/data/nxos/parquet-out, for example) :returns: temporary dir where the parquet data has been copied to :rtype: TemporaryDirectory :returns: Temporary config file :rtype: NamedTemporaryFile """ # Create a temp dir temp_dir = TemporaryDirectory() # Copy the directory we want to copy copy_tree(pq_dir, temp_dir.name) config = load_sq_config(config_file=create_dummy_config_file()) config['data-directory'] = f'{temp_dir.name}/' tmpfile = NamedTemporaryFile(suffix='.yml', delete=False) with open(tmpfile.name, 'w') as f: yaml.dump(config, f) return temp_dir, tmpfile
def rest_main(args=None): if args is None: args = sys.argv parser = argparse.ArgumentParser(args) parser.add_argument("-c", "--config", type=str, help="alternate config file", default=f'{os.getenv("HOME")}/.suzieq/suzieq-cfg.yml') userargs = parser.parse_args() app = app_init(userargs.config) cfg = load_sq_config(config_file=userargs.config) try: api_key = cfg['rest']['API_KEY'] except KeyError: print('missing API_KEY in config file') exit(1) logcfg, loglevel = get_log_config_level(cfg) ssl_keyfile, ssl_certfile = get_cert_files(cfg) srvr_addr = cfg.get('rest', {}).get('address', '127.0.0.1') srvr_port = cfg.get('rest', {}).get('port', 8000) uvicorn.run(app, host=srvr_addr, port=srvr_port, log_level=loglevel.lower(), log_config=logcfg, ssl_keyfile=ssl_keyfile, ssl_certfile=ssl_certfile)
def get_configured_api_key(): cfg = load_sq_config(config_file=app.cfg_file) try: api_key = cfg['API_KEY'] except KeyError: print('missing API_KEY in config file') sys.exit(1) return api_key
def create_config(testvar): if 'data-directory' in testvar: # We need to create a tempfile to hold the config tmpconfig = load_sq_config(conftest.create_dummy_config_file()) tmpconfig['data-directory'] = testvar['data-directory'] tf = tempfile.NamedTemporaryFile(delete=False) with open(tf.name, 'w') as f: f.write(yaml.dump(tmpconfig)) return tf.name
def create_config(testvar): if 'data-directory' in testvar: # We need to create a tempfile to hold the config tf = conftest.create_dummy_config_file() tmpconfig = load_sq_config(tf) tmpconfig['data-directory'] = testvar['data-directory'] with open(tf, 'w') as f: f.write(yaml.dump(tmpconfig)) return tf
def create_config(): # We need to create a tempfile to hold the config tmpconfig = load_sq_config(config_file=conftest.create_dummy_config_file()) tmpconfig['data-directory'] = './tests/data/multidc/parquet-out' r_int = random.randint(17, 2073) fname = f'/tmp/suzieq-cfg-{r_int}.yml' with open(fname, 'w') as f: f.write(yaml.dump(tmpconfig)) return fname
def __init__(self, engine, config_file=None): self.cfg = load_sq_config(config_file=config_file) self.schemas = Schema(self.cfg['schema-directory']) self.namespace = '' self.hostname = '' self.start_time = '' self.end_time = '' self.exec_time = '' self.engine = engine self.sort_fields = []
def create_config(t_dir, suzieq_dir): # We need to create a tempfile to hold the config tmpconfig = load_sq_config(conftest.create_dummy_config_file()) tmpconfig['data-directory'] = f"{t_dir}/parquet-out" tmpconfig[ 'service-directory'] = f"{suzieq_dir}/{tmpconfig['service-directory']}" tmpconfig[ 'schema-directory'] = f"{suzieq_dir}/{tmpconfig['schema-directory']}" fname = f'{t_dir}/suzieq-cfg.yml' with open(fname, 'w') as f: f.write(yaml.dump(tmpconfig)) return fname
def __init__(self, engine="pandas"): self.cfg = load_sq_config(validate=False) self.schemas = Schema(self.cfg["schema-directory"]) self.namespace = "" self.hostname = "" self.start_time = "" self.end_time = "" self.exec_time = "" self.engine_name = engine self.sort_fields = [] self.engine = get_sqengine(engine) super().__init__()
def _coalescer_basic_test(pq_dir, namespace, path_src, path_dest): """Basic coalescer test Copy the parquet dir from the directory provided to a temp dir, coalesce and ensure that everything looks the same. This second part is done by ensuring table show looks the same before and after coalescing, and since we're assuming a run-once parquet input, there shouldn't be any duplicate entries that are being coalesced. We also run path before and after coalescing. path encompasseds many different tables and so with a single command we test multiple tables are correctly rendered. :param pq_dir: The original parquet dir :param namespace: The namespace to be used for checking info :param path_src: The source IP of the path :param path_dest: The destination IP of the path :returns: :rtype: """ temp_dir, tmpfile = _coalescer_init(pq_dir) from suzieq.sqobjects.tables import TablesObj from suzieq.sqobjects.path import PathObj tablesobj = TablesObj(config_file=tmpfile.name) pre_tables_df = tablesobj.get() pathobj = PathObj(config_file=tmpfile.name) pre_path_df = pathobj.get(namespace=[namespace], source=path_src, dest=path_dest) cfg = load_sq_config(config_file=tmpfile.name) do_coalesce(cfg, None) _verify_coalescing(temp_dir) post_tables_df = tablesobj.get() assert_df_equal(pre_tables_df, post_tables_df, None) post_path_df = pathobj.get(namespace=[namespace], source=path_src, dest=path_dest) assert_df_equal(pre_path_df, post_path_df, None) _coalescer_cleanup(temp_dir, tmpfile)
def __init__(self, engine): self.cfg = load_sq_config(validate=False) self.schemas = Schema(self.cfg['schema-directory']) self.namespace = '' self.hostname = '' self.start_time = '' self.end_time = '' self.exec_time = '' self.engine = 'pandas' self.sort_fields = [] self.engine = get_sqengine(self.engine) if not self.engine: # We really should define our own error raise ValueError
def get_configured_log_level(): cfg = load_sq_config(config_file=app.cfg_file) log_level = cfg.get('logging-level', 'INFO').lower() return log_level
def create_context_config(): return load_sq_config(config_file=create_dummy_config_file())
def create_context(): config = load_sq_config(config_file=create_dummy_config_file()) context = NubiaSuzieqContext() context.cfg = config context.schemas = Schema(config["schema-directory"]) return context
def get_log_file(): cfg = load_sq_config(config_file=app.cfg_file) tmp = cfg.get('temp-directory', '/tmp') return f"{tmp}/sq-rest-server.log"
parser.add_argument( "-p", "--period", type=str, help=('Override the period specified in config file with this. ' 'Format is <period><h|d|w|y>. 1h is 1 hour, 2w is 2 weeks etc.') ) parser.add_argument( "--no-sqpoller", action='store_true', help=argparse.SUPPRESS ) userargs = parser.parse_args() cfg = load_sq_config(config_file=userargs.config) if not cfg: print(f'Invalid Suzieq config file {userargs.config}') sys.exit(1) logfile = cfg.get('coalescer', {}).get('logfile', '/tmp/sq-coalescer.log') loglevel = cfg.get('coalescer', {}).get('logging-level', 'DEBUG') logger = init_logger('suzieq.coalescer', logfile, loglevel, False) # Ensure we're the only compacter coalesce_dir = cfg.get('coalescer', {})\ .get('coalesce-directory', f'{cfg.get("data-directory")}/coalesced') fd = ensure_single_instance(f'{coalesce_dir}/.sq-coalescer.pid',
def coalescer_main(): parser = argparse.ArgumentParser() parser.add_argument( "-s", "--service-only", type=str, help="Only run this space separated list of services", ) parser.add_argument( "-x", "--exclude-services", type=str, help="Exclude running this space separated list of services", ) parser.add_argument("-c", "--config", default=f'{os.getenv("HOME")}/.suzieq/suzieq-cfg.yml', type=str, help="alternate config file") parser.add_argument( "--run-once", default=False, help='Run the coalescer once and exit', action='store_true', ) parser.add_argument( "-p", "--period", type=str, help=('Override the period specified in config file with this. ' 'Format is <period><h|d|w|y>. 1h is 1 hour, 2w is 2 weeks etc.')) parser.add_argument("--no-sqpoller", action='store_true', help=argparse.SUPPRESS) userargs = parser.parse_args() cfg = load_sq_config(config_file=userargs.config) if not cfg: print(f'Invalid Suzieq config file {userargs.config}') sys.exit(1) logfile, loglevel = get_log_file_level('coalescer', cfg, '/tmp/sq-coalescer.log') logger = init_logger('suzieq.coalescer', logfile, loglevel, False) # Ensure we're the only compacter coalesce_dir = cfg.get('coalescer', {})\ .get('coalesce-directory', f'{cfg.get("data-directory")}/coalesced') fd = ensure_single_instance(f'{coalesce_dir}/.sq-coalescer.pid', False) if not fd: print(f'ERROR: Another coalescer process present') logger.error(f'Another coalescer process present') sys.exit(errno.EBUSY) if userargs.run_once: timestr = '' elif not userargs.period: timestr = cfg.get('coalescer', {'period': '1h'}).get('period', '1h') else: timestr = userargs.period schemas = Schema(cfg.get('schema-directory')) if userargs.service_only or userargs.exclude_services: tables = [ x for x in schemas.tables() if (schemas.type_for_table(x) != "derivedRecord") ] if userargs.service_only: tables = [x for x in tables if x in userargs.service_only.split()] if userargs.exclude_services: tables = [ x for x in tables if x not in userargs.exclude_services.split() ] else: tables = [] run_coalescer(cfg, tables, timestr, userargs.run_once, logger, userargs.no_sqpoller or False) os.truncate(fd, 0) try: fcntl.flock(fd, fcntl.LOCK_UN) os.close(fd) except OSError: pass sys.exit(0)
def poller_main() -> None: supported_outputs = ["parquet"] parser = argparse.ArgumentParser() requiredgrp = parser.add_mutually_exclusive_group(required=True) requiredgrp.add_argument( "-D", "--devices-file", type=str, help="File with URL of devices to gather data from", ) requiredgrp.add_argument( "-a", "--ansible-file", type=str, help="Ansible inventory file of devices to gather data from", ) requiredgrp.add_argument("-i", "--input-dir", type=str, help="Directory where run-once=gather data is") parser.add_argument("-n", "--namespace", type=str, required='--ansible-file' in sys.argv or "-a" in sys.argv, help="Namespace to associate for the gathered data") parser.add_argument( "-o", "--outputs", nargs="+", default=["parquet"], choices=supported_outputs, type=str, help="Output formats to write to: parquet. Use " "this option multiple times for more than one output", ) parser.add_argument( "-s", "--service-only", type=str, help="Only run this space separated list of services", ) parser.add_argument( "-x", "--exclude-services", type=str, help="Exclude running this space separated list of services", ) parser.add_argument("-c", "--config", type=str, help="alternate config file") parser.add_argument( "--run-once", type=str, choices=["gather", "process"], help=argparse.SUPPRESS, ) parser.add_argument( "--output-dir", type=str, default=f'{os.path.abspath(os.curdir)}/sqpoller-output', help=argparse.SUPPRESS, ) parser.add_argument( "--ask-pass", default=False, action='store_true', help="prompt to enter password for login to devices", ) parser.add_argument( "--passphrase", default=False, action='store_true', help="prompt to enter private key passphrase", ) parser.add_argument( "--envpass", default="", type=str, help="Use named environment variable to retrieve password", ) parser.add_argument( "-j", "--jump-host", default="", type=str, help= "Jump Host via which to access the devices, IP addr or DNS hostname") parser.add_argument("-K", "--jump-host-key-file", default="", type=str, help="Key file to be used for jump host") parser.add_argument( "-k", "--ignore-known-hosts", default=False, action='store_true', help="Ignore Known Hosts File", ) parser.add_argument( "--ssh-config-file", type=str, default=None, help= "Path to ssh config file to use. If not set, config file is not used") parser.add_argument( "--no-coalescer", default=False, action='store_true', help=argparse.SUPPRESS, ) userargs = parser.parse_args() if userargs.passphrase: userargs.passphrase = getpass.getpass( 'Passphrase to decode private key file: ') else: userargs.passphrase = None if userargs.ask_pass: userargs.ask_pass = getpass.getpass('Password to login to device: ') else: userargs.ask_pass = None if userargs.envpass: passwd = os.getenv(userargs.envpass, '') if not passwd: print( f'ERROR: No password in environment variable {userargs.envpass}' ) sys.exit(1) userargs.ask_pass = passwd uvloop.install() cfg = load_sq_config(config_file=userargs.config) if not cfg: print("Could not load config file, aborting") sys.exit(1) try: asyncio.run(start_poller(userargs, cfg)) except (KeyboardInterrupt, RuntimeError): pass except Exception: import traceback traceback.print_exc() sys.exit(0)
def on_connected(self, *args, **kwargs): if self._args.config: self.cfg = load_sq_config(validate=False, config_file=self._args.config) self.schemas = Schema(self.cfg["schema-directory"])
def test_transform(input_file): to_transform = Yaml2Class(input_file) try: data_directory = to_transform.transform.data_directory except AttributeError: print('Invalid transformation file, no data directory') pytest.fail('AttributeError', pytrace=True) # Make a copy of the data directory temp_dir, tmpfile = _coalescer_init(data_directory) cfg = load_sq_config(config_file=tmpfile.name) schemas = Schema(cfg['schema-directory']) for ele in to_transform.transform.transform: query_str_list = [] # Each transformation has a record => write's happen per record for record in ele.record: changed_fields = set() new_df = pd.DataFrame() tables = [x for x in dir(record) if not x.startswith('_')] for table in tables: # Lets read the data in now that we know the table tblobj = get_sqobject(table) pq_db = get_sqdb_engine(cfg, table, None, None) columns = schemas.fields_for_table(table) mod_df = tblobj(config_file=tmpfile.name).get(columns=columns) for key in getattr(record, table): query_str = key.match chg_df = pd.DataFrame() if query_str != "all": try: chg_df = mod_df.query(query_str) \ .reset_index(drop=True) except Exception as ex: assert (not ex) query_str_list.append(query_str) else: chg_df = mod_df _process_transform_set(key.set, chg_df, changed_fields) if new_df.empty: new_df = chg_df elif not chg_df.empty: new_df = pd.concat([new_df, chg_df]) if new_df.empty: continue # Write the records now _write_verify_transform(new_df, table, pq_db, SchemaForTable(table, schemas), tmpfile.name, query_str_list, changed_fields) # Now we coalesce and verify it works from suzieq.sqobjects.tables import TablesObj pre_table_df = TablesObj(config_file=tmpfile.name).get() do_coalesce(cfg, None) _verify_coalescing(temp_dir) post_table_df = TablesObj(config_file=tmpfile.name).get() assert_df_equal(pre_table_df, post_table_df, None) # Run additional tests on the coalesced data for ele in to_transform.transform.verify: table = [x for x in dir(ele) if not x.startswith('_')][0] tblobj = get_sqobject(table) for tst in getattr(ele, table): start_time = tst.test.get('start-time', '') end_time = tst.test.get('end-time', '') columns = tst.test.get('columns', ['default']) df = tblobj(config_file=tmpfile.name, start_time=start_time, end_time=end_time).get(columns=columns) if not df.empty and 'query' in tst.test: query_str = tst.test['query'] df = df.query(query_str).reset_index(drop=True) if 'assertempty' in tst.test: assert (df.empty) elif 'shape' in tst.test: shape = tst.test['shape'].split() if shape[0] != '*': assert (int(shape[0]) == df.shape[0]) if shape[1] != '*': assert (int(shape[1]) == df.shape[1]) else: assert (not df.empty) _coalescer_cleanup(temp_dir, tmpfile)
log_config['handlers']['default']['filename'] = get_log_file(cfg) del (log_config['handlers']['default']['stream']) return log_config if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-c", "--config", type=str, help="alternate config file", default=f'{os.getenv("HOME")}/.suzieq/suzieq-cfg.yml') userargs = parser.parse_args() app = app_init(userargs.config) cfg = load_sq_config(userargs.config) try: api_key = cfg['API_KEY'] except KeyError: print('missing API_KEY in config file') exit(1) log_level = cfg.get('logging-level', 'INFO').lower() ssl_keyfile, ssl_certfile = get_cert_files(cfg) uvicorn.run(app, host="0.0.0.0", port=8000, log_level=log_level, log_config=get_log_config(cfg), ssl_keyfile=ssl_keyfile, ssl_certfile=ssl_certfile)