def send2monit(data): """ Helper function which wraps StompAMQ and incoming dataframe into notification message. Then it sends it to AMQ end-point provided by credentials file. """ if not StompAMQ: return # main function logic with open(SparkFiles.get('amq_broker.json')) as istream: creds = json.load(istream) host, port = creds['host_and_ports'].split(':') port = int(port) amq = StompAMQ(creds['username'], creds['password'], \ creds['producer'], creds['topic'], \ validation_schema=None, \ host_and_ports=[(host, port)]) arr = [] for idx, row in enumerate(data): # if not idx: # print("### row", row, type(row)) doc = json.loads(row) hid = doc.get("hash", 1) arr.append(amq.make_notification(doc, hid)) amq.send(arr) print("### Send %s docs to CERN MONIT" % len(arr))
def uploadToAMQ(self, docs, agentUrl, timeS): """ _uploadToAMQ_ Sends data to AMQ, which ends up in the MonIT infrastructure. :param docs: list of documents/dicts to be posted """ if not docs: logging.info("There are no documents to send to AMQ") return # add mandatory information for every single document for doc in docs: doc['agent_url'] = agentUrl docType = "cms_%s_info" % self.producer notifications = [] logging.debug("Sending the following data to AMQ %s", pformat(docs)) try: stompSvc = StompAMQ(username=self.userAMQ, password=self.passAMQ, producer=self.producer, topic=self.topicAMQ, validation_schema=None, host_and_ports=self.hostPortAMQ, logger=logging) for doc in docs: singleNotif, _, _ = stompSvc.make_notification( payload=doc, docType=docType, ts=timeS, dataSubfield="payload") notifications.append(singleNotif) failures = stompSvc.send(notifications) msg = "%i out of %i documents successfully sent to AMQ" % ( len(notifications) - len(failures), len(notifications)) logging.info(msg) except Exception as ex: logging.exception("Failed to send data to StompAMQ. Error %s", str(ex)) return
def uploadToAMQ(self, docs, producer=None): """ _uploadToAMQ_ Sends data to AMQ, which ends up in elastic search. :param docs: list of documents/dicts to be posted :param producer: service name that's providing this info """ if not docs: self.logger.info("There are no documents to send to AMQ") return producer = producer or self.producer ts = int(time.time()) notifications = [] self.logger.debug("Sending the following data to AMQ %s", pformat(docs)) try: stompSvc = StompAMQ(username=self.userAMQ, password=self.passAMQ, producer=producer, topic=self.topicAMQ, validation_schema=None, host_and_ports=self.hostPortAMQ, logger=self.logger) for doc in docs: singleNotif, _, _ = stompSvc.make_notification( payload=doc, docType=self.docTypeAMQ, ts=ts, dataSubfield="payload") notifications.append(singleNotif) failures = stompSvc.send(notifications) msg = "%i out of %i documents successfully sent to AMQ" % ( len(notifications) - len(failures), len(notifications)) self.logger.info(msg) except Exception as ex: self.logger.exception("Failed to send data to StompAMQ. Error %s", str(ex)) return
def sendDoc(cred, docs): """ Given a credential dict and documents to send, make notification. :param dict cred: credential required by StompAMQ :param list docs: documents to send :returns: None """ if not docs: logger.info("No document going to be set to AMQ.") return [] try: amq = StompAMQ(username=None, password=None, producer=cred['producer'], topic=cred['topic'], validation_schema=None, host_and_ports=[(cred['hostport']['host'], cred['hostport']['port'])], logger=logger, cert=cred['cert'], key=cred['key']) doctype = 'workflowmonit_{}'.format(cred['producer']) notifications = [ amq.make_notification(payload=doc, docType=doctype)[0] for doc in docs ] failures = amq.send(notifications) logger.info("{}/{} docs successfully sent to AMQ.".format( (len(notifications) - len(failures)), len(notifications))) return failures except Exception as e: logger.exception("Failed to send data to StompAMQ. Error: {}".format( str(e))) raise
def hdfs(fin, fout, token, amq, verbose): "perform HDFS scan" out = [] data = json.load(open(fin)) cmd = "hadoop fs -du -h -s %s" path = "hdfs:///path" for desc, path in data.items(): size, output = run_hdfs(cmd % path) rec = {} rec['name'] = desc rec['path'] = path rec['size'] = size rec['type'] = 'hdfs' if verbose: print(desc, path, size, output) out.append(rec) # get monit ES info if token and os.path.exists(token): cmd = 'monit -token %s -query="stats"' % token output = os.popen(cmd).readlines() for line in output: index, size = line.replace('\n', '').split() rec = {} rec['name'] = index rec['size'] = float(size) rec['path'] = '' rec['type'] = 'elasticsearch' if verbose: print(index, size, line) out.append(rec) if amq: creds = credentials(amq) host, port = creds['host_and_ports'].split(':') port = int(port) producer = creds['producer'] topic = creds['topic'] username = creds['username'] password = creds['password'] if verbose: print("producer: {}, topic {}".format(producer, topic)) print("ckey: {}, cert: {}".format(ckey, cert)) try: # create instance of StompAMQ object with your credentials mgr = StompAMQ(username, password, producer, topic, validation_schema=None, host_and_ports=[(host, port)]) # loop over your document records and create notification documents # we will send to MONIT data = [] for doc in out: # every document should be hash id hid = doc.get( "hash", 1) # replace this line with your hash id generation tstamp = int(time.time()) * 1000 producer = creds["producer"] notification, _, _ = \ mgr.make_notification(doc, hid, producer=producer, ts=tstamp, dataSubfield="") data.append(notification) # send our data to MONIT results = mgr.send(data) print("AMQ submission results", results) except Exception as exc: print("Fail to send data to AMQ", str(exc)) else: if fout: with open(fout, 'w') as ostream: ostream.write(json.dumps(out)) else: print(json.dumps(out))
def main(): "Main function" optmgr = OptionParser() opts = optmgr.parser.parse_args() time0 = time.time() if opts.scripts: scripts() sys.exit(0) verbose = opts.verbose todate = datetime.datetime.today() todate = int(todate.strftime("%Y%m%d")) fromdate = datetime.datetime.today() - datetime.timedelta(days=1) fromdate = int(fromdate.strftime("%Y%m%d")) spec = {} try: if os.path.isfile(opts.spec): spec = json.load(open(opts.spec)) else: spec = json.loads(opts.spec) except Exception as exp: pass timerange = spec.get('spec', {}).get('timerange', [fromdate, todate]) if timerange and verbose: print("### TimeRang: %s" % timerange) hdir = opts.hdir if timerange: pat = re.compile(".*/20[0-9][0-9].*") if len(hdir.split()) == 1 and not pat.match(hdir): hdir = hdir.split()[0] hdirs = [] for tval in range_dates(timerange): if hdir.find(tval) == -1: hdirs.append(os.path.join(hdir, tval)) hdir = hdirs if verbose: print("### HDIR: %s" % hdir) results = run(opts.schema, hdir, opts.script, opts.spec, verbose, opts.rout, opts.yarn) if opts.store: data = { "results": results, "ts": time.time(), "etime": time.time() - time0 } if opts.wmaid: data['wmaid'] = opts.wmaid else: data['wmaid'] = wmaHash(data) data['dtype'] = 'job' pdata = dict(job=data) postdata(opts.store, pdata, opts.ckey, opts.cert, verbose) elif opts.amq: creds = credentials(opts.amq) host, port = creds['host_and_ports'].split(':') port = int(port) if creds and StompAMQ: print("### Send %s docs via StompAMQ" % len(results)) amq = StompAMQ(creds['username'], creds['password'], \ creds['producer'], creds['topic'], \ validation_schema=None, \ host_and_ports=[(host, port)]) data = [] for doc in results: hid = doc.get("hash", 1) if '_id' in doc: del doc['_id'] # delete ObjectID from MongoDB producer = "wmarchive" tstamp = int(time.time()) * 1000 notification, _, _ = amq.make_notification(doc, hid, producer=producer, ts=tstamp, dataSubfield="") data.append(notification) results = amq.send(data) print("### results from AMQ %s" % len(results)) else: if isinstance(results, list): print("### number of results %s" % len(results)) for doc in results: if '_id' in doc: del doc['_id'] # delete ObjectID from MongoDB try: print(json.dumps(doc)) except: print(doc) else: print(results)
password = creds['password'] topic = creds['topic'] host, port = creds['host_and_ports'].split(':') port = int(port) producer = creds['producer'] topic = creds['topic'] hosts = [(host, port)] if creds and StompAMQ: amq = StompAMQ(username, password, producer, topic, key=None, cert=None, validation_schema=None, host_and_ports=[(host, port)]) eod = False wait_seconds = 10 while not eod: messages = [] for d in payload: notif, _, _ = amq.make_notification( d, "aaa_federations_document", dataSubfield=None) messages.append(notif) if messages: print(messages) amq.send(messages) time.sleep(wait_seconds) else: eod = True
def run(creds, fout, date=None, ifilter=None): _schema = StructType([ StructField( 'metadata', StructType([StructField('timestamp', LongType(), nullable=True)])), StructField( 'data', StructType([ StructField('t__error_message', StringType(), nullable=True), StructField('src_hostname', StringType(), nullable=True), StructField('dst_hostname', StringType(), nullable=True) ])), ]) #schema of the FTS data that is taken sc = spark_session() if not date: # tstamp = time.time()-24*60*60 # one day ago tstamp = time.time() # take today date = time.strftime("%Y/%m/%d", time.gmtime(tstamp)) fts_df = fts_tables(sc, date=date, schema=_schema).select( col('metadata.timestamp').alias('tstamp'), col('data.src_hostname').alias('src_hostname'), col('data.dst_hostname').alias('dst_hostname'), col('data.t__error_message').alias('error_message') ).where( 'error_message <> ""' ) #taking non-empty messages, if date is not given then the records from yesterday are taken if ifilter: fts_df = fts_df.where(ifilter) fts_df.show() df = fts_df.toPandas() #the messages are converted to Pandas df mod_name = 'word2vec_' + time.strftime( "%Y_%m_%d_%H_%M_%S", time.localtime( time.time())) + '.model' #model is named according to the run date cluster = pipeline.Chain(df, target='error_message', mode='create', model_name=mod_name) cluster.process() #messages are clustered with 'clusterlogs' module df.loc[:, 'cluster_id'] = int(1) print(cluster.model_name) if cluster.clustering_type == 'SIMILARITY': df.loc[:, 'model'] = 'Levenshtein' else: df.loc[:, 'model'] = cluster.model_name #info about clustering model is added to the messages a = cluster.result.index for el in a: df.loc[cluster.result.loc[el, 'indices'], 'cluster_id'] = str(uuid.uuid4()) df.loc[cluster.result.loc[ el, 'indices'], 'cluster_pattern'] = cluster.result.loc[ el, 'pattern'] #info about the clusters is added to the error messages res = df[[ 'tstamp', 'cluster_id', 'cluster_pattern', 'model', 'src_hostname', 'dst_hostname', 'error_message' ]] print("Number of messages: ", res.shape[0]) if fout: nrows = df.shape[0] count = 0 now = time.time() with open(fout, 'w') as ostream: ostream.write( '[{"producer": "logclustering",\n "type_prefix": "raw",\n "type": "log_classification",\n "timestamp": ' + now + ',\n "data": [' + '\n') for d in df_to_batches(res, 10000): for r in d: if nrows - count == 1: # last row to write ostream.write(json.dumps(r) + '\n') else: ostream.write(json.dumps(r) + ',\n') count += 1 ostream.write(']]' + '\n') creds = credentials(creds) if creds: username = creds.get('username', '') password = creds.get('password', '') producer = creds.get('producer', 'cms-fts-logsanalysis') topic = creds.get('topic', '/topic/cms.fts.logsanalysis') host = creds.get('host', 'cms-mb.cern.ch') port = int(creds.get('port', 61313)) cert = creds.get('cert', None) ckey = creds.get('ckey', None) stomp_amq = StompAMQ(username, password, producer, topic, key=ckey, cert=cert, validation_schema=None, host_and_ports=[(host, port)]) for d in df_to_batches(res, 10000): messages = [] for msg in d: notif, _, _ = stomp_amq.make_notification(msg, "training_document", producer=producer, dataSubfield=None) messages.append(notif) stomp_amq.send(messages) time.sleep( 0.1) #messages are sent to AMQ queue in batches of 10000 print("Message sending is finished")
def monicron(interval_code, config_path, out_base_dir, creds_path): """Run aggregations and push results through StompAMQ service""" with open(config_path, "r") as fin: config = json.load(fin) if not out_base_dir[-1] == "/": out_base_dir += "/" # Get credentials creds = credentials(creds_path) host, port = creds["host_and_ports"].split(":") port = int(port) if not creds or not StompAMQ: raise ValueError("missing StompAMQ credentials file") # Establish StompAMQ context amq = StompAMQ(creds["username"], creds["password"], creds["producer"], "/topic/" + config["topic"], validation_schema=None, host_and_ports=[(host, port)]) interval = "" results = {} if type(interval_code) == int: if interval_code > len(INTERVALS) - 1: raise ValueError("invalid interval code") else: interval = INTERVALS[interval_code] print("[monicron] Computing aggs for " + interval) # Run aggregations results = run_over_interval(config, interval, out_base_dir) else: interval = "daily" print("[monicron] computing aggs for yesterday:") # Run aggregations results = run_over_yesterday(config) # Format results and send notification through StompAMQ if not results: print("[monicron] WARNING: no results produced.") return else: # Get StompAMQ notification print("[monicron] Results:") print(json.dumps(results, indent=4)) payload = results["data"] metadata = results["metadata"] doc_type = config["source_name"] notification, _, _ = amq.make_notification(payload, doc_type, metadata=metadata) # Get destination path in local cache min_datetime, max_datetime = get_time_interval(interval) monicron_dir = "{0}/{1}/{2}/{3}/".format(config["cache_name"], config["source_name"], config["namespace"], interval) date_dir = "" file_date = get_file_date(min_datetime, max_datetime) if interval == "daily": split_date = file_date.split("-") # [month, day, year] split_date = [split_date[-1]] + split_date[:-1] date_dir = "/".join(split_date) + "/" else: date_dir = file_date + "/" out_path = out_base_dir + monicron_dir + date_dir if not os.path.exists(out_path): os.makedirs(out_path) # Write to local cache data = notification["body"]["data"] metadata = notification["body"]["metadata"] with open(out_path + "data.json", "w") as f_out: json.dump(data, f_out) with open(out_path + "metadata.json", "w") as f_out: json.dump(metadata, f_out) print("[monicron] Dumped results to {}".format(out_path)) # Deliver package print("[monicron] Delivered the following package:") print(json.dumps(notification, indent=4)) response = amq.send(notification) print("[monicron] Response from AMQ:") print(json.dumps(response, indent=4)) return
modules = data.get("modules") for module in modules: mpayload = {} mpayload["module_type"] = str(module.get("type", "no type")) mpayload["module_label"] = str(module.get("label", "no label")) [subsystem, package] = findGroup(mpayload) mpayload["module_package"] = str(package) mpayload["module_subsystem"] = str(subsystem) mpayload[str(payload["module_type"])] = str( payload["module_label"]) mpayload["events"] = int(module.get("events", 0)) mpayload["time_thread"] = float(module.get("time_thread", 0.)) mpayload["time_real"] = float(module.get("time_real", 0.)) mpayload["mem_alloc"] = int(module.get("mem_alloc", 0)) mpayload["mem_free"] = int(module.get("mem_free", 0)) mpayload["release"] = release mpayload["release_queue"] = release_queue mpayload["release_ts"] = release_ts mpayload["workflow"] = workflow mpayload["arch"] = arch str2hash = release + arch + workflow + str( release_ts) + mpayload.get("module_label") mhash = hashlib.sha1(str2hash.encode()).hexdigest() mpayload["hash"] = mhash notification, _, _ = stomp_amq.make_notification( mpayload, "profiling_document", dataSubfield=None) documents.append(notification) results = stomp_amq.send(documents) print(results)
def main(): _schema = StructType([ StructField( 'metadata', StructType([StructField('timestamp', LongType(), nullable=True)])), StructField( 'data', StructType([ StructField('t__error_message', StringType(), nullable=True), StructField('src_hostname', StringType(), nullable=True), StructField('dst_hostname', StringType(), nullable=True) ])), ]) #schema of the FTS data that is taken sc = spark_session() fts_df = fts_tables( sc, date="2020/04/30", schema=_schema ).select( #,date="2020/03/19" col('metadata.timestamp').alias('timestamp'), col('data.src_hostname').alias('src_hostname'), col('data.dst_hostname').alias('dst_hostname'), col('data.t__error_message').alias('error_message') ).where( 'error_message <> ""' ) #taking non-empty messages, if date is not given then the records from yesterday are taken fts_df.show() df = fts_df.toPandas() #the messages are converted to Pandas df mod_name = 'word2vec_' + time.strftime( "%Y_%m_%d_%H_%M_%S", time.localtime( time.time())) + '.model' #model is named according to the run date cluster = pipeline.Chain(df, target='error_message', mode='create', model_name=mod_name) cluster.process() #messages are clustered with 'clusterlogs' module df.loc[:, 'cluster_id'] = int(1) print(cluster.model_name) if cluster.clustering_type == 'SIMILARITY': df.loc[:, 'model'] = 'Levenshtein' else: df.loc[:, 'model'] = cluster.model_name #info about clustering model is added to the messages a = cluster.result.index for el in a: df.loc[cluster.result.loc[el, 'indices'], 'cluster_id'] = str(uuid.uuid4()) df.loc[cluster.result.loc[ el, 'indices'], 'cluster_pattern'] = cluster.result.loc[ el, 'pattern'] #info about the clusters is added to the error messages res = df[[ 'timestamp', 'cluster_id', 'cluster_pattern', 'model', 'src_hostname', 'dst_hostname', 'error_message' ]] print("Number of messages: ", res.shape[0]) username = "" password = "" producer = "cms-fts-logsanalysis" topic = "/topic/cms.fts.logsanalysis" host = "cms-mb.cern.ch" port = 61323 cert = "/afs/cern.ch/user/n/ntuckus/.globus/usercert.pem" ckey = "/afs/cern.ch/user/n/ntuckus/.globus/userkey.pem" #using StompAMQ module connection to MonIT is created stomp_amq = StompAMQ(username, password, producer, topic, key=ckey, cert=cert, validation_schema=None, host_and_ports=[(host, port)]) for d in df_to_batches(res, 10000): messages = [] for msg in d: notif, _, _ = stomp_amq.make_notification( msg, "training_document", metadata={"version": "997"}, dataSubfield=None, ts=msg['timestamp']) messages.append(notif) stomp_amq.send(messages) time.sleep(0.1) #messages are sent to AMQ queue in batches of 10000 print("Message sending is finished")