def main(argv): [aggregate, nodes, debug] = get_gram_defaults() pprint(aggregate) pprint(nodes) # Local for local datastore (vs collector for a collector) # This is not the database name or database program # Those are set in /ops-monitoring/config/<db_type>_operator.conf db_type = "local" config_path = "../../config/" tbl_mgr = table_manager.TableManager(db_type, config_path, debug) # get schemas from config store tbl_mgr.poll_config_store() # get info schema from table manager info_schema = tbl_mgr.info_schema # Drops all informational tables tbl_mgr.drop_all_tables() tbl_mgr.establish_all_tables() # supports aggregate info query set_aggregate_info(tbl_mgr, aggregate, nodes) set_node_info(tbl_mgr, nodes)
def main(): db_type = "collector" debug = True tbl_mgr = table_manager.TableManager(db_type, config_path, debug) tbl_mgr.poll_config_store() plotter = PsqlLinePlotter(tbl_mgr)
def main(argv): [aggregate_id, extck_id, object_type_param, cert_path, debug] = parse_args(argv) if (aggregate_id == "" and extck_id == "") or object_type_param == "" or cert_path == "": usage() db_type = "collector" # If in debug mode, make sure to overwrite the logging configuration to print out what we want, if debug: logger.configure_logger_for_debug_info(config_path) logger.get_logger(config_path).info("Starting object data fetching") tbl_mgr = table_manager.TableManager(db_type, config_path) tbl_mgr.poll_config_store() ocl = opsconfig_loader.OpsconfigLoader(config_path) all_event_types = ocl.get_event_types() node_event_types = all_event_types["node"] interface_event_types = all_event_types["interface"] interface_vlan_event_types = all_event_types["interfacevlan"] aggregate_event_types = all_event_types["aggregate"] experiment_event_types = all_event_types["experiment"] # pprint(all_event_types) if object_type_param == 'n': logger.get_logger(config_path).debug("Fetching node events") event_types = node_event_types object_type = "node" elif object_type_param == 'i': logger.get_logger(config_path).debug("Fetching interface events") event_types = interface_event_types object_type = "interface" elif object_type_param == 'v': logger.get_logger(config_path).debug("Fetching interfacevlan events") event_types = interface_vlan_event_types object_type = "interfacevlan" elif object_type_param == 'a': logger.get_logger(config_path).debug("Fetching aggregate events") event_types = aggregate_event_types object_type = "aggregate" elif object_type_param == 'x': logger.get_logger(config_path).debug("Fetching experiements events") event_types = experiment_event_types object_type = "experiment" else: logger.get_logger(config_path).critical("invalid object type arg %s\n" % object_type_param) sys.stderr.write("invalid object type arg %s\n" % object_type_param) sys.exit(1) fetcher = SingleLocalDatastoreObjectTypeFetcher(tbl_mgr, aggregate_id, extck_id, object_type, event_types, cert_path, debug, config_path) if not fetcher.fetch_and_insert(): logger.get_logger(config_path).critical("fetch_and_insert() failed") sys.exit(-1)
def main(argv): multiprocessing.util.log_to_stderr(multiprocessing.util.DEBUG) db_type = "local" tbl_mgr = table_manager.TableManager(db_type, config_path) tbl_mgr.poll_config_store() ocl = opsconfig_loader.OpsconfigLoader(config_path) aggregateStores = ocl.config_json['aggregatestores'] # experiment_event_types = ocl.get_event_types()["experiment"] ext_config = extck_config.ExtckConfigLoader(tbl_mgr.logger) ipConfigPathLocal = os.path.join(extck_path, "ips.conf") ipConfigPathRemote = ext_config.get_ips_file_remote_location() pingerLocal = os.path.join(extck_path, "pinger.py") pingerRemote = ext_config.get_pinger_file_remote_location() keyPath = ext_config.get_ssh_key_file_location() poolSize = ext_config.get_experiment_ping_thread_pool_size() initialPingCount = ext_config.get_experiment_ping_initial_count() measurementPingCount = ext_config.get_experiment_ping_measurmentl_count() slices = ext_config.get_experiment_slices_info() nickCache = extck_store.AggregateNickCache( ext_config.get_nickname_cache_file_location()) nickCache.parseNickCache() # Set up info about extra extck tables and establish them. ext_config.configure_extck_tables(tbl_mgr) # Relying on the fact that extck_populator.py has been run recently and has created the cached credentials file... user_cred_file = ext_config.get_user_credential_file() table_str = "ops_experiment_ping_rtt_ms" myLock = multiprocessing.Lock() argsList = [] ping_sets = ext_config.get_experiment_ping_set() for ping_set in ping_sets: sources = ext_config.get_experiment_source_ping_for_set(ping_set) for site in sources: args = (ext_config, site, ping_set, ipConfigPathLocal, ipConfigPathRemote, keyPath, pingerLocal, pingerRemote, poolSize, initialPingCount, measurementPingCount, table_str, myLock, tbl_mgr, slices, aggregateStores, nickCache, user_cred_file) argsList.append(args) pool = multiprocessing.pool.ThreadPool(processes=int( ext_config.get_experiment_coordination_thread_pool_size())) pool.map(run_remote_pings, argsList) # Purge data older than 168 hours (1 wk) old_ts = int((time.time() - 168 * 60 * 60) * 1000000) tbl_mgr.purge_old_tsdata(table_str, old_ts)
def main(): (num_ins, per_sec) = arg_parser(sys.argv) db_name = "local" config_path = "../config/" # debug = False tbl_mgr = table_manager.TableManager(db_name, config_path) tbl_mgr.poll_config_store() ocl = opsconfig_loader.OpsconfigLoader(config_path) event_types = ocl.get_event_types() node_id = "instageni.gpolab.bbn.com_node_pc1" event_types_arr = event_types["node"] nsp1 = StatsPopulator(tbl_mgr, "node", node_id, num_ins, per_sec, event_types_arr) nsp1.start() node_id = "instageni.gpolab.bbn.com_node_pc2" nsp2 = StatsPopulator(tbl_mgr, "node", node_id, num_ins, per_sec, event_types_arr) nsp2.start() iface_id = "instageni.gpolab.bbn.com_interface_pc1:eth1" event_types_arr = event_types["interface"] isp1 = StatsPopulator(tbl_mgr, "interface", iface_id, num_ins, per_sec, event_types_arr) isp1.start() iface_id = "instageni.gpolab.bbn.com_interface_pc2:eth1" isp2 = StatsPopulator(tbl_mgr, "interface", iface_id, num_ins, per_sec, event_types_arr) isp2.start() q_res = tbl_mgr.query("select count(*) from ops_" + event_types_arr[0]) if q_res is not None: print "num entries", q_res[0][0] threads = [] threads.append(nsp1) threads.append(nsp2) threads.append(isp1) threads.append(isp2) ok = True # join all threads for t in threads: t.join() if not t.run_ok: ok = False if not ok: sys.stderr.write("\nCould not populate statistics properly.\n") sys.exit(-1)
def main(): db_type = "local" config_path = "../../config/" tbl_mgr = table_manager.TableManager(db_type, config_path) tbl_mgr.poll_config_store() if not tbl_mgr.drop_all_tables(): sys.stderr.write("\nCould not drop all tables.\n") sys.exit(-1) if not tbl_mgr.establish_all_tables(): sys.stderr.write("\nCould not create all tables.\n") sys.exit(-1)
def main(argv): [base_url, node_id, interface_id, num_ins, per_sec] = parse_args(argv) db_type = "local" config_path = "../../config" debug = False tbl_mgr = table_manager.TableManager(db_type, config_path, debug) ocl = opsconfig_loader.OpsconfigLoader(config_path) event_types = ocl.get_event_types() tbl_mgr.drop_all_tables() tbl_mgr.establish_all_tables() # info population ip = info_populator.InfoPopulator(tbl_mgr, base_url) ip.insert_fake_info() q_res = tbl_mgr.query("select count(*) from ops_aggregate") if q_res is not None: print "Aggregate has entries", q_res[0][0], "entries" # data population node_event_str_arr = event_types["node"] interface_event_str_arr = event_types["interface"] print node_event_str_arr + interface_event_str_arr tsdata_lifespan_sec = 60 node_sp = stats_populator.StatsPopulator(tbl_mgr, node_id, num_ins, per_sec, node_event_str_arr, tsdata_lifespan_sec) interface_sp = stats_populator.StatsPopulator(tbl_mgr, interface_id, num_ins, per_sec, interface_event_str_arr, tsdata_lifespan_sec) # start threads node_sp.start() interface_sp.start() threads = [] threads.append(node_sp) threads.append(interface_sp) # join all threads for t in threads: t.join()
def main(): db_name = "local" tbl_mgr = table_manager.TableManager(db_name, config_path) tbl_mgr.poll_config_store() # getting content of template rspec template_filename = os.path.join(extck_path, config.get_stitcher_rspec_template_filename()) fh = open(template_filename, "r") template_content = list(fh) fh.close() slicename = config.get_stitch_experiment_slicename() sliceinfo = config.get_experiment_slices_info()[slicename] projectname = sliceinfo[2] site_info = get_stitch_sites_details(tbl_mgr) user_dir = config.get_user_path() table_name = 'ops_experiment_is_stitch_path_available' for idx1 in range(len(site_info)): site1 = site_info[idx1] for idx2 in range(idx1 + 1, len(site_info)): site2 = site_info[idx2] opslogger.debug("Checking on stitching path from %s to %s" % (site1[0], site2[0])) (fh, tmpFilename) = tempfile.mkstemp(suffix='.rspec', prefix='tmpstitch', dir=user_dir) write_tmp_rspec(fh, template_content, site1[1], site2[1]) # launch stitcher with no reservation command and get result path_available = execute_stitcher_command(tmpFilename, slicename, projectname, user_dir) if path_available == 0: opslogger.warning("Stitching path from %s to %s could NOT be successfully computed" % (site1[0], site2[0])) else: opslogger.debug("Stitching path from %s to %s was successfully computed" % (site1[0], site2[0])) # remove tmp file. os.remove(tmpFilename) # insert record ts = int(time.time() * 1000000) experiment_id = name_stitch_path_experiment(site1[0], site2[0]) val_str = "(\'" + experiment_id + "\', " + str(ts) + ", " + str(path_available) + ")" tbl_mgr.insert_stmt(table_name, val_str)
def main(argv): [cert_path] = parse_args(argv) db_name = "local" tbl_mgr = table_manager.TableManager(db_name, config_path) tbl_mgr.poll_config_store() opsConfigLoader = opsconfig_loader.OpsconfigLoader(config_path) config = extck_config.ExtckConfigLoader(tbl_mgr.logger) # Set up info about extra extck tables and establish them. config.configure_extck_tables(tbl_mgr) nickCache = AggregateNickCache(config.get_nickname_cache_file_location()) ip = InfoPopulator(tbl_mgr, config, nickCache) # Populate "ops_externalCheck" table ip.insert_externalcheck() # Grab urns and urls for all agg stores opsconfig_url = opsConfigLoader.config_json['selfRef'] aggRequest = handle_request(tbl_mgr.logger, cert_path, opsconfig_url) if aggRequest is None: tbl_mgr.logger.warning("Could not not contact opsconfigdatastore!") return aggStores = aggRequest['aggregatestores'] urn_map = nickCache.parseNickCache() nickCache.updateCache(urn_map, aggStores) ip.cleanUpObsoleteAggregates(aggStores) registerAggregates(aggStores, cert_path, urn_map, ip, config) # Populate "ops_experiment" tables ip.populateExperimentInfoTables(aggStores)
def main(argv): [ base_url, node_id, interface_id, interfacevlan_id, aggregate_id, experiment_id, num_ins, per_sec ] = parse_args(argv) db_type = "local" config_path = "../../config/" tbl_mgr = table_manager.TableManager(db_type, config_path) tbl_mgr.poll_config_store() ocl = opsconfig_loader.OpsconfigLoader(config_path) event_types = ocl.get_event_types() if not tbl_mgr.drop_all_tables(): sys.stderr.write("\nCould not drop all tables.\n") sys.exit(-1) if not tbl_mgr.establish_all_tables(): sys.stderr.write("\nCould not create all tables.\n") sys.exit(-1) # info population ip = info_populator.InfoPopulator(tbl_mgr, base_url) error = False if not ip.insert_fake_info(): error = True if not ip.insert_authority_store_info(): error = True if not ip.insert_externalcheck_store(): error = True if error: sys.stderr.write("Error populating local datastore\n") sys.exit(-1) q_res = tbl_mgr.query("select count(*) from ops_aggregate") if q_res is not None: print "Aggregate has ", q_res[0][0], "entries" # data population node_event_str_arr = event_types["node"] interface_event_str_arr = event_types["interface"] interfacevlan_event_str_arr = event_types["interfacevlan"] aggregate_event_str_arr = event_types["aggregate"] experiment_event_str_arr = event_types["experiment"] print node_event_str_arr + interface_event_str_arr obj_type = "node" node_sp = stats_populator.StatsPopulator(tbl_mgr, obj_type, node_id, num_ins, per_sec, node_event_str_arr) obj_type = "interface" interface_sp = stats_populator.StatsPopulator(tbl_mgr, obj_type, interface_id, num_ins, per_sec, interface_event_str_arr) obj_type = "interfacevlan" interfacevlan_sp = stats_populator.StatsPopulator( tbl_mgr, obj_type, interfacevlan_id, num_ins, per_sec, interfacevlan_event_str_arr) obj_type = "aggregate" aggregate_sp = stats_populator.StatsPopulator(tbl_mgr, obj_type, aggregate_id, num_ins, per_sec, aggregate_event_str_arr) obj_type = "experiment" experiment_sp = stats_populator.StatsPopulator(tbl_mgr, obj_type, experiment_id, num_ins, per_sec, experiment_event_str_arr) # start threads node_sp.start() interface_sp.start() interfacevlan_sp.start() aggregate_sp.start() experiment_sp.start() threads = [] threads.append(node_sp) threads.append(interface_sp) threads.append(interfacevlan_sp) threads.append(aggregate_sp) threads.append(experiment_sp) ok = True # join all threads for t in threads: t.join() if not t.run_ok: ok = False if not ok: sys.stderr.write("\nCould not populate statistics properly.\n") sys.exit(-1)
def __init__(self, parent_path): self.local_path = parent_path + "/local/" self.common_path = parent_path + "/common/" self.config_path = parent_path + "/config/" self.debug = False sys.path.append(self.local_path) sys.path.append(self.common_path) sys.path.append(self.config_path) import rest_call_handler import table_manager import logger opslog = logger.get_logger(self.config_path) opslog.critical("Starting ops monitoring") try: self.db_name = "local" # uses postgres by default self.tm = table_manager.TableManager(self.db_name, self.config_path) self.tm.poll_config_store() # Try to get the software version we're running from the VERSION file # in the top-level directory. version_filename = parent_path + "/VERSION" try: version_file = open(version_filename) self.monitoring_version = version_file.readline().strip() version_file.close() except Exception, e: opslog.warning( "Could not read monitoring version from file %s: %s" % (version_filename, str(e))) self.monitoring_version = "unknown" opslog.info("Monitoring version is %s" % (self.monitoring_version)) _ = AgingOutThread(self.tm, opslog) self.app = Flask(__name__) def make_resp(json_resp): """ Function to make an HTTP Response given a json response :param json_resp: the json response in text format :return: an HTTP Response given the json response """ return Response(json_resp, status=httplib.OK, mimetype='application/json; charset=utf-8') @self.app.route('/info/aggregate/<path:agg_id>', methods=['GET']) def info_aggregate_args(agg_id): return make_resp( rest_call_handler.handle_aggregate_info_query( self.tm, agg_id, self.monitoring_version)) @self.app.route('/info/externalcheck/<path:extck_id>', methods=['GET']) def info_externalcheck_args(extck_id): return make_resp( rest_call_handler.handle_externalcheck_info_query( self.tm, extck_id, self.monitoring_version)) @self.app.route('/info/experiment/<path:exp_id>', methods=['GET']) def info_experiment_args(exp_id): return make_resp( rest_call_handler.handle_experiment_info_query( self.tm, exp_id)) @self.app.route('/info/experimentgroup/<path:expgroup_id>', methods=['GET']) def info_experimentgroup_args(expgroup_id): return make_resp( rest_call_handler.handle_experimentgroup_info_query( self.tm, expgroup_id)) @self.app.route('/info/node/<path:node_id>', methods=['GET']) def info_node_args(node_id): return make_resp( rest_call_handler.handle_node_info_query(self.tm, node_id)) @self.app.route('/info/interface/<path:iface_id>', methods=['GET']) def info_interface_args(iface_id): return make_resp( rest_call_handler.handle_interface_info_query( self.tm, iface_id)) @self.app.route('/info/interfacevlan/<path:ifacevlan_id>', methods=['GET']) def info_interfacevlan_args(ifacevlan_id): return make_resp( rest_call_handler.handle_interfacevlan_info_query( self.tm, ifacevlan_id)) @self.app.route('/info/sliver/<path:sliver_id>', methods=['GET']) def info_sliver_args(sliver_id): return make_resp( rest_call_handler.handle_sliver_info_query( self.tm, sliver_id)) @self.app.route('/info/link/<path:link_id>', methods=['GET']) def info_link_args(link_id): return make_resp( rest_call_handler.handle_link_info_query(self.tm, link_id)) @self.app.route('/info/slice/<path:slice_id>', methods=['GET']) def info_slice_args(slice_id): return make_resp( rest_call_handler.handle_slice_info_query( self.tm, slice_id)) @self.app.route('/info/user/<path:user_id>', methods=['GET']) def info_user_args(user_id): return make_resp( rest_call_handler.handle_user_info_query(self.tm, user_id)) @self.app.route('/info/authority/<path:authority_id>', methods=['GET']) def info_authority_args(authority_id): return make_resp( rest_call_handler.handle_authority_info_query( self.tm, authority_id, self.monitoring_version)) @self.app.route('/info/opsconfig/<path:opsconfig_id>', methods=['GET']) def info_opsconfig_args(opsconfig_id): return rest_call_handler.handle_opsconfig_info_query( self.tm, opsconfig_id, self.monitoring_version) @self.app.route('/data/', methods=['GET']) def data(): # get everything to the right of ?q= as string from flask.request filters = request.args.get('q', None) return make_resp( rest_call_handler.handle_ts_data_query(self.tm, filters))
def main(): db_name = "local" tbl_mgr = table_manager.TableManager(db_name, config_path) tbl_mgr.poll_config_store() dp = DataPopulator(tbl_mgr) extck_id = config.get_extck_store_id() # get all monitored aggregates monitored_aggregates = tbl_mgr.query( "SELECT id FROM ops_externalcheck_monitoredaggregate WHERE externalcheck_id = '%s'" % extck_id) if monitored_aggregates is None: opslogger.warning( "Could not find any monitored aggregate. Has extck_store been executed?" ) return refresh_user_credentials() myLock = multiprocessing.Lock() argsList = [] for monitored_aggregate_tuple in monitored_aggregates: monitored_aggregate_id = monitored_aggregate_tuple[0] opslogger.info("Checking availability of AM: %s", monitored_aggregate_id) amtype = tbl_mgr.query( "SELECT type FROM extck_aggregate WHERE aggregate_id = '%s'" % monitored_aggregate_id) if amtype is None: opslogger.warning( "Error trying to determine type of aggregate: %s" % monitored_aggregate_id) continue amtype = amtype[0][0] # first value of first tuple... am_urls = tbl_mgr.query( "SELECT amurl FROM extck_aggregate_amurl WHERE aggregate_id = '%s'" % monitored_aggregate_id) if am_urls is None: opslogger.warning( "Did not find any registered AM URL for aggregate: %s" % monitored_aggregate_id) continue # args = (monitored_aggregate_id, amtype, am_urls, dp, myLock) for url_tuple in am_urls: url = url_tuple[0] args = (monitored_aggregate_id, amtype, url, myLock) argsList.append(args) # argsList.append(args) pool = multiprocessing.pool.ThreadPool( processes=int(config.get_populator_pool_size())) results = pool.map(check_aggregate_state_for_one_url, argsList) # Building the results # regroup results by aggregate_id agg_results = dict() agg_to_get_available_res = set() agg_ids_to_get_available_res = set() for (monitored_aggregate_id, ts, state, am_url, get_avail) in results: if not monitored_aggregate_id in agg_results: agg_results[monitored_aggregate_id] = list() agg_results[monitored_aggregate_id].append((ts, state)) if get_avail: if monitored_aggregate_id not in agg_ids_to_get_available_res: agg_ids_to_get_available_res.add(monitored_aggregate_id) agg_to_get_available_res.add((monitored_aggregate_id, am_url)) for monitored_aggregate_id in agg_results.keys(): insert_aggregate_result(monitored_aggregate_id, agg_results[monitored_aggregate_id], dp) dp.db_purge_agg_is_available() argsList = [] for (monitored_aggregate_id, am_url) in agg_to_get_available_res: site = IGCompute(monitored_aggregate_id, '', cmid='', url=am_url) args = (site, myLock) argsList.append(args) results = pool.map(get_ig_available_ips_and_raw_pcs_for_one_site, argsList) for (monitored_aggregate_id, ts, raw_pc_avail, avail_ips, _config_ips) in results: dp.insert_routable_ip_available_datapoint(monitored_aggregate_id, ts, avail_ips) dp.insert_raw_pcs_available_datapoint(monitored_aggregate_id, ts, raw_pc_avail)
def __init__(self, config): self._frequency_sec = int(config['frequency_sec']) self._window_duration_sec = int(config['window_duration_sec']) self._database_user = config['database_user'] self._database_pwd = config['database_pwd'] self._database_name = config['database_name'] # External IP addr of Gram rack local datastore self._base_url = config['base_address'] self._aggregate_id = config['aggregate_id'] self._aggregate_urn = config['aggregate_urn'] # Set the current time for all methods in run loop self._current_time = 0 self._ts = '' # Aggregate info query url self._aggregate_href = self._base_url + "/info/aggregate/" + self._aggregate_id # Time-series data measurement reference self._measurement_href = self._base_url + "/data/" self._hosts = config['hosts'] self._modules = config['modules'] self._node_commands = config['node_commands'] self._interface_commands = config['interface_commands'] self._prev_values = {} self._config = config self._table_manager = table_manager.TableManager('local', config_path) self._table_manager.poll_config_store() # Grab schemas for cmd in self._node_commands: tablename = cmd['table'] self._prev_values[tablename] = {} self._gram_config = json.loads(open('/etc/gram/config.json').read()) self._recent_snapshot = None self._objects_by_urn = {} # Current objects read by snapshot # json-schema self._agg_schema = "http://www.gpolab.bbn.com/monitoring/schema/20140501/aggregate#" self._authority_schema = "http://www.gpolab.bbn.com/monitoring/schema/20140501/authority#" self._node_schema = "http://www.gpolab.bbn.com/monitoring/schema/20140501/node#" self._sliver_schema = "http://www.gpolab.bbn.com/monitoring/schema/20140501/sliver#" self._slice_schema = "http://www.gpolab.bbn.com/monitoring/schema/20140501/slice#" self._interface_schema = "http://www.gpolab.bbn.com/monitoring/schema/20140501/port#" self._interfacevlan_schema = "http://www.gpolab.bbn.com/monitoring/schema/20140501/port-vlan#" self._link_schema = "http://www.gpolab.bbn.com/monitoring/schema/20140501/link#" self._user_schema = "http://www.gpolab.bbn.com/monitoring/schema/20140501/user#" self._populator_version = '20140501' # Change ' to " in any expressions (can't parse " in json) for cmd in self._node_commands: expr = cmd['expression'] expr_new = expr.replace("'", '\\"') cmd['expression'] = expr_new imports = ";".join("import %s" % mod for mod in self._modules) measurements = ", ".join(c['expression'] for c in self._node_commands) self._rsh_command = "python -c %s%s;print %s%s" % \ ('"', imports, measurements, '"') # print "RSH = %s" % self._rsh_command self._interface_info_rsh_command = "python -c %s%s%s;print %s%s" % \ ('"', imports, ';import json', 'json.dumps(psutil.net_io_counters(pernic=True))', '"') # print "IFACE RSH = %s" % self._interface_info_rsh_command self._external_vlans = self._compute_external_vlans() self._internal_vlans = \ parseVLANs(self._gram_config['internal_vlans']) self._nodes = {} self._links = {} self._initialize_nodes()