def prometheus_solid(context): e = Enum( 'my_task_state', 'Description of enum', states=['starting', 'running', 'stopped'], registry=context.resources.prometheus.registry, ) # no idea why pylint doesn't like this line, it's correct e.state('running') # pylint: disable=no-member metric = None for metric in context.resources.prometheus.registry.collect(): if metric.name == 'my_task_state': break assert metric and metric.samples[0].labels == { 'my_task_state': 'starting' }
def prometheus_solid(context): e = Enum( "my_task_state", "Description of enum", states=["starting", "running", "stopped"], registry=context.resources.prometheus.registry, ) # no idea why pylint doesn't like this line, it's correct e.state("running") # pylint: disable=no-member metric = None for metric in context.resources.prometheus.registry.collect(): if metric.name == "my_task_state": break assert metric and metric.samples[0].labels == { "my_task_state": "starting" }
def process_exporter(): from process.config import hadoop host = socket.gethostname() REGISTRY = CollectorRegistry(auto_describe=True) servicelist = hadoop.get(host) for service in servicelist: metricname = 'process_' + service + '_state' state = int(service_state(service).strip()) print(state) print(type(state)) if state >= 1: e = Enum(metricname, 'process status', states=['running'], registry=REGISTRY) e.state('running') elif state == 0: e = Enum(metricname, 'process status', states=['stopped'], registry=REGISTRY) e.state('stopped') else: e = Enum(metricname, 'process status', states=['Nostate'], registry=REGISTRY) e.state("Nostate") return Response(generate_latest(REGISTRY), mimetype='text/plain')
def getnodeState(): REGISTRY = CollectorRegistry(auto_describe=True) for host in ipdict.keys(): ip = ipdict.get(host) metricname = host[4:] url = "http://%s:8285/v1/info/state" % (ip) print(url) print(host) try: state = requests.get(url, timeout=5) if state.json() == 'ACTIVE': e = Enum(metricname, 'presto worker state', states=['Active'], registry=REGISTRY) e.state("Active") else: e = Enum(metricname, 'presto worker state', states=['Dead'], registry=REGISTRY) e.state('Dead') except Exception: e = Enum(metricname, 'presto worker state', states=['Dead'], registry=REGISTRY) e.state('Dead') return Response(generate_latest(REGISTRY), mimetype='text/plain')
class BroadcastWebsocketStats(): def __init__(self, local_hostname, remote_hostname): self._local_hostname = local_hostname self._remote_hostname = remote_hostname self._registry = CollectorRegistry() # TODO: More robust replacement self.name = self.safe_name(self._local_hostname) self.remote_name = self.safe_name(self._remote_hostname) self._messages_received_total = Counter(f'awx_{self.remote_name}_messages_received_total', 'Number of messages received, to be forwarded, by the broadcast websocket system', registry=self._registry) self._messages_received = Gauge(f'awx_{self.remote_name}_messages_received', 'Number forwarded messages received by the broadcast websocket system, for the duration of the current connection', registry=self._registry) self._connection = Enum(f'awx_{self.remote_name}_connection', 'Websocket broadcast connection', states=['disconnected', 'connected'], registry=self._registry) self._connection_start = Gauge(f'awx_{self.remote_name}_connection_start', 'Time the connection was established', registry=self._registry) self._messages_received_per_minute = Gauge(f'awx_{self.remote_name}_messages_received_per_minute', 'Messages received per minute', registry=self._registry) self._internal_messages_received_per_minute = FixedSlidingWindow() def safe_name(self, s): # Replace all non alpha-numeric characters with _ return re.sub('[^0-9a-zA-Z]+', '_', s) def unregister(self): self._registry.unregister(f'awx_{self.remote_name}_messages_received') self._registry.unregister(f'awx_{self.remote_name}_connection') def record_message_received(self): self._internal_messages_received_per_minute.record() self._messages_received.inc() self._messages_received_total.inc() def record_connection_established(self): self._connection.state('connected') self._connection_start.set_to_current_time() self._messages_received.set(0) def record_connection_lost(self): self._connection.state('disconnected') def get_connection_duration(self): return (datetime.datetime.now() - self._connection_established_ts).total_seconds() def render(self): msgs_per_min = self._internal_messages_received_per_minute.render() self._messages_received_per_minute.set(msgs_per_min) def serialize(self): self.render() registry_data = generate_latest(self._registry).decode('UTF-8') return registry_data
def main(): print('Starting prometheus exporter') start_http_server(int(os.getenv('METRICS_PORT', 8000))) bird_config_folder = '/opt/bird' birdc_ip4_config = f'{bird_config_folder}/bird.conf' birdc_ip6_config = f'{bird_config_folder}/bird6.conf' birdc_ip4_socket_path = '/var/run/bird/bird.ctl' birdc_ip6_socket_path = '/var/run/bird/bird6.ctl' bird4_prometheus = Enum('bird4_config_state', 'Bird IPv4 Config State', states=['valid', 'invalid']) bird6_prometheus = Enum('bird6_config_state', 'Bird IPv6 Config State', states=['valid', 'invalid']) birdc_ip4 = bird_control.BirdControl(birdc_ip4_socket_path) birdc_ip6 = bird_control.BirdControl(birdc_ip6_socket_path) bird_ip4_config_hash = hashing.sha256sum(birdc_ip4_config) bird_ip6_config_hash = hashing.sha256sum(birdc_ip6_config) inotify = INotify() watch_flags = flags.CREATE | flags.MODIFY inotify.add_watch(bird_config_folder, watch_flags) print(f'watching config directory {bird_config_folder}') while True: for event in inotify.read(): print(40 * '-') print('got inotify event, checking what has changed') print(event) new_bird_ip4_config_hash = hashing.sha256sum(birdc_ip4_config) new_bird_ip6_config_hash = hashing.sha256sum(birdc_ip6_config) if new_bird_ip4_config_hash != bird_ip4_config_hash: print('IPv4 config change') bird_ip4_config_hash = new_bird_ip4_config_hash valid_config = birdc_ip4.reconfigure_check(birdc_ip4_config) if valid_config: birdc_ip4.reconfigure(birdc_ip4_config) bird4_prometheus.state('valid') print('valid IPv4 Config') else: bird4_prometheus.state('invalid') print('BAD IPv4 Config') else: print('NO IPv4 config change') if new_bird_ip6_config_hash != bird_ip6_config_hash: print('IPv6 config change') bird_ip6_config_hash = new_bird_ip6_config_hash valid_config = birdc_ip6.reconfigure_check(birdc_ip6_config) if valid_config: birdc_ip6.reconfigure(birdc_ip6_config) bird6_prometheus.state('valid') print('valid IPv6 Config') else: bird6_prometheus.state('invalid') print('BAD IPv6 Config') else: print('NO IPv6 config change') print(40 * '-')
class Metrics: def __init__(self, database): """ (date time, ah_percent real, ah_remaining real, ah_full real, power real, voltage real, current real, temperature real, cycles numeric); """ self.database = '' conn = sqlite3.connect(database) self.c = conn.cursor() self.last_update_gauge = Gauge('bms_last_update', 'Metrics of last update') self.ah_percent_gauge = Gauge('bms_ah_percent', 'Metrics of percent of capacity') self.ah_remaining_gauge = Gauge('bms_ah_remaining', 'Metrics of remaining capcity') self.ah_full_gauge = Gauge('bms_ah_full', 'Metrics of full capacity') self.power_gauge = Gauge('bms_power', 'Metrics of power') self.voltage_gauge = Gauge('bms_voltage', 'Metrics of voltage') self.current_gauge = Gauge('bms_current', 'Metrics of current') self.temperature_gauge = Gauge('bms_temperature', 'Metrics of temperature') self.cycles_gauge = Gauge('bms_cycles', 'Metrics of cycles') self.mode_enum = Enum('bms_mode', 'Metrics of mode', states=['discharging', 'charging']) self.time_left = Gauge('bms_time_left', 'Metrics of time to charge/discharge', ['mode']) def build_metrics(self): data = self.c.execute( 'select * from readings ORDER BY date DESC LIMIT 1').fetchone() last_update = data[0] last_update = datetime.datetime.strptime(last_update, '%Y-%m-%d %H:%M:%S') last_update = last_update.timestamp() self.last_update_gauge.set(last_update) self.ah_percent_gauge.set(data[1]) ah_remaining_gauge = data[2] self.ah_remaining_gauge.set(ah_remaining_gauge) ah_full_gauge = data[3] self.ah_full_gauge.set(ah_full_gauge) self.power_gauge.set(data[4]) self.voltage_gauge.set(data[5]) self.current_gauge.set(data[6]) self.temperature_gauge.set(data[7]) self.cycles_gauge.set(data[8]) if float(data[4]) >= 0: mode = 'charging' else: mode = 'discharging' self.mode_enum.state(mode) # Calculate mean value over 5m currents = [] self.c.execute( 'select current from readings ORDER BY date DESC LIMIT 30') for row in self.c: currents.append(row[0]) mean_current = sum(currents) / len(currents) if mean_current == float(0): return to100 = (ah_full_gauge - ah_remaining_gauge) / mean_current to40 = (ah_remaining_gauge - (ah_full_gauge * 0.4)) / mean_current to0 = ah_remaining_gauge / mean_current if mode == 'discharging': smart = to40 else: smart = to100 self.time_left.labels(mode='smart').set(smart) self.time_left.labels(mode='to100').set(to100) self.time_left.labels(mode='to40').set(to40) self.time_left.labels(mode='to0').set(to0)
class ReplicationLagLogger(threading.Thread): """Thread that periodically logs the current replication lag. """ def __init__(self, opman, interval): super(ReplicationLagLogger, self).__init__() self.opman = opman self.interval = interval self.daemon = True self.oplog_status = Enum( 'oplog_status', 'Status of mongo-connector syncing with the oplog', states=[ 'ingesting', 'behind_time', 'behind_entries', 'up_to_date', 'behind_entries_and_time' ]) self.oplog_behind_seconds = Gauge('oplog_behind_seconds', 'Oplog is behind in time') self.oplog_behind_entries = Gauge('oplog_behind_entries', 'Oplog is behind in entries') def log_replication_lag(self): checkpoint = self.opman.checkpoint if checkpoint is None: self.oplog_status.state('ingesting') return newest_write = retry_until_ok(self.opman.get_last_oplog_timestamp) lag_secs = newest_write.time - checkpoint.time lag_inc = newest_write.inc - checkpoint.inc if (lag_secs > 0) and (lag_inc > 0): self.oplog_status.state('behind_entries_and_time') self.oplog_behind_seconds.set_to_current_time() self.oplog_behind_seconds.set(lag_secs) self.oplog_behind_entries.set_to_current_time() self.oplog_behind_entries.set(lag_inc) elif (lag_secs == 0) and (lag_inc == 0): self.oplog_status.state('up_to_date') self.oplog_behind_seconds.set_to_current_time() self.oplog_behind_seconds.set(0) self.oplog_behind_entries.set_to_current_time() self.oplog_behind_entries.set(0) else: if lag_secs > 0: self.oplog_status.state('behind_time') self.oplog_behind_seconds.set_to_current_time() self.oplog_behind_seconds.set(lag_secs) else: self.oplog_behind_seconds.set_to_current_time() self.oplog_behind_seconds.set(0) if lag_inc > 0: self.oplog_status.state('behind_entries') self.oplog_behind_entries.set_to_current_time() self.oplog_behind_entries.set(lag_inc) else: self.oplog_behind_entries.set_to_current_time() self.oplog_behind_entries.set(0) if newest_write < checkpoint: # OplogThread will perform a rollback, don't log anything return if lag_secs > 0: LOG.info( "OplogThread for replica set '%s' is %s seconds behind " "the oplog.", self.opman.replset_name, lag_secs) else: if lag_inc > 0: LOG.info( "OplogThread for replica set '%s' is %s entries " "behind the oplog.", self.opman.replset_name, lag_inc) else: LOG.info( "OplogThread for replica set '%s' is up to date " "with the oplog.", self.opman.replset_name) def run(self): while self.opman.is_alive(): self.log_replication_lag() time.sleep(self.interval)
g = Gauge('g', 'Description of gauge', ['labelname']) g.labels('numAliveNodes').set(random.random()) return g ''' print(get_nnHeap_utilization(url)) print(get_dfs_utilization(url)) print(get_nn_rpcConn(url)) print(get_dfs_totalFiles(url)) print(get_dfs_totalBlocks(url)) print(get_numAliveNodes(url)) ''' regis = CollectorRegistry(auto_describe=False) a = Gauge('namenode', 'Descriptionofgauge', ['cluster', 'service', 'metric']) b = Gauge('dfsblockscorrupt', 'Corrupted Block Number', ['cluster', 'service', 'metric']) c = Gauge('dfsNumNodes', 'Alive Nodes', ['cluster', 'service', 'metric']) e = Enum('my_task_state', 'Description of enum', states=['starting', 'running', 'stopped']) start_http_server(8000) while True: # a.labels(cluster='bdp',service='bdp102',metric='heapuse').set(get_nnHeap_utilization(url)) # b.labels(cluster='bdp',service='hdfs',metric='repblock').set(get_dfs_corruptedRep(url)) # c.labels(cluster='bdp',service='hdfs',metric='alivenodes').set(get_numAliveNodes(url)) e.state('stopped')
import time import random from prometheus_client import start_http_server from prometheus_client import Counter, Gauge, Histogram, Summary from prometheus_client import Info, Enum cc = Counter('cc', 'A counter') gg = Gauge('gg', 'A gauge') hh = Histogram('hh', 'A histogram', buckets=(-5, 0, 5), labelnames=['a', 'b']) ss = Summary('ss', 'A summary', labelnames=['a', 'b']) i = Info('my_build_version', 'Description of info') e = Enum('my_task_state', 'Description of enum', states=['starting', 'running', 'stopped']) i.info({'version': '1.2.3', 'buildhost': 'foo@bar'}) if __name__ == '__main__': start_http_server(8000) while True: cc.inc() gg.set(random.random()) hh.labels('c', 'd').observe(random.randint(-10, 10)) ss.labels(a='c', b='d').observe(17) e.state('running') time.sleep(2)
class Command(BaseCommand): help = 'This command is used to ingest data from local disk cache' def __init__(self, *args, **kwargs): super(Command, self).__init__(*args, **kwargs) self.source_ingest = Gauge("total_ingest_count", "Total number of articles ingested", ['source', 'category']) self.task_state = Enum("article_ingestion_state", "Article ingestion states", states=["running", "waiting"]) self.now = datetime.now( pytz.timezone("Asia/Kolkata")).strftime("%Y-%m-%d") self.redis = redis.Redis() self.batch = [] self.sleep_time = 0 self.classify = RegexClassification() self.score = ArticleScore() def reset_stats(self): """ this method is used to reset stats to zero """ print("Resetting Stats") for metric in self.source_ingest._metrics.keys(): source, category = metric self.source_ingest(source=source, category=category).set(0) def add_arguments(self, parser): parser.add_argument('--source', '-s', type=str, help='redis source name [Ex: theverge]') parser.add_argument( '--index', '-i', type=str, default='article', help='elastic search index name [default: article]') def get_data_from_redis(self, source): """ this method returns data from redis """ return self.redis.lpop(source) def parse_date(self, date_str): try: dt = parse(date_str) return dt.astimezone(tz=pytz.UTC) except Exception: try: ts = int(date_str) return datetime.utcfromtimestamp(ts) except Exception: return None def remove_char(self, tag, ch): """ this method removes given char from tag """ new_tag = [tag] if ch in tag: return tag.split(ch) return new_tag def remove_special_chars(self, tags): """ this method is used to remove special chars from tags """ new_tags = [] for tag in tags: new_tags = new_tags + self.remove_char(tag, ";") clean_tags = [] for tag in new_tags: clean_tags = clean_tags + self.remove_char(tag, " & ") final_tags = [] for tag in clean_tags: final_tags = final_tags + self.remove_char(tag, " and ") final_tags = [ tag.replace("&", " ").replace(",", "").replace(":", "").replace( "'", "").replace("#", "").replace("*", "").replace("(", "").replace( ")", "").replace("@", "").replace("!", "").replace("-", " ").strip().lower() for tag in final_tags ] return final_tags def get_tags(self, tags): """ this method will return tag name from tags objects """ tag_list = [] for tag in tags: tag_list.append(tag["name"]) return tag_list def create_model_obj(self, doc, index): """ this method is used to create django article model object """ title = doc["title"] category = doc["category"] source = doc["source"] source_url = doc["source_url"] cover_image = doc["cover_image"] blurb = doc["blurb"] full_text = doc.get("short_description") or doc.get("full_text", "") published_on = self.parse_date(doc["published_on"]) if not published_on: published_on = timezone.now() author = doc.get("author", "") author_twitter = doc.get("author_twitter", "") video_data = doc.get("video_data", "") images = doc["images"] tags = doc["tags"] if not cover_image: if video_data: cover_image = video_data[0].get("video_image", "") if title and full_text: if not Article.objects.filter(title=title).exists(): if category == "Uncategorised": # apply regex based category only if article is uncategorised # get category id from regex classfication category_id = self.classify.match(title) category = Category.objects.get(id=category_id) else: category = Category.objects.get(name=category) source, _ = Source.objects.get_or_create(name=source) article_obj = Article.objects.create(title=title, source=source, category=category, source_url=source_url, cover_image=cover_image, blurb=blurb, full_text=full_text, published_on=published_on, active=True) if len(images) > 1: for img in images: _ = ArticleMedia.objects.create(article=article_obj, category="image", url=img) if len(video_data) > 0: for video_dic in video_data: _ = ArticleMedia.objects.create( article=article_obj, category="video", url=video_dic.get("video_image", ""), video_url=video_dic.get("video_url", "")) if len(tags) > 0: tag_objs = [] new_tags = self.remove_special_chars(tags) if new_tags: for tag in new_tags: tag_obj = HashTag.objects.filter(name=tag) if tag_obj: tag_objs.append(tag_obj.first()) else: tag_obj = HashTag.objects.create(name=tag) tag_objs.append(tag_obj) article_obj.hash_tags.add(*tag_objs) # calculate article score score = self.score.calculate_score(doc) serializer = ArticleSerializer(article_obj) json_data = serializer.data json_data["article_score"] = score if json_data["hash_tags"]: tag_list = self.get_tags(json_data["hash_tags"]) json_data["hash_tags"] = tag_list self.batch.append(json_data) if len(self.batch) == 99: ingest_to_elastic(self.batch, index, index, 'id') self.batch = [] print("Ingesting Batch To Elastic...!!!") def handle(self, *args, **options): if options['source'] == None: raise CommandError("Option `--source=...` must be specified.") # start prometheus http server for metrics start_http_server(8686) source = options['source'] index = options['index'] create_index(index) try: while True: file_path = self.get_data_from_redis(source) if file_path: date = datetime.now( pytz.timezone("Asia/Kolkata")).strftime("%Y-%m-%d") self.task_state.state("running") self.sleep_time = 0 if os.path.isfile(file_path): doc = cPickle.loads( zlib.decompress(open(file_path).read())) try: self.create_model_obj(doc, index) if date == self.now: self.source_ingest.labels( source=doc.get("source", "source"), category=doc.get("category", "category")).inc() else: self.now = datetime.now( pytz.timezone("Asia/Kolkata")).strftime( "%Y-%m-%d") # self.reset_stats() self.source_ingest.labels( source=doc.get("source", "source"), category=doc.get("category", "category")).inc() except Exception as e: print("error in doc read") print(e) else: msg = "Data file not found: {0}".format(file_path) print(msg) else: self.task_state.state("waiting") print("Sleeping...!!!") time.sleep(10) self.sleep_time += 10 if self.sleep_time >= 60: if self.batch: ingest_to_elastic(self.batch, index, index, 'id') print("Ingesting Final Batch...!!!") self.batch = [] self.sleep_time = 0 except KeyboardInterrupt: sys.exit(0)
import time from prometheus_client import CollectorRegistry, Enum, Gauge, pushadd_to_gateway ''' e = Enum('my_task_state', 'Description of enum', states=['starting', 'running', 'stopped']) e.state('running') ''' registry = CollectorRegistry() duration = Gauge('my_job_duration_seconds', 'Duration of my batch job in seconds', registry=registry) job_status = Enum('job_status', 'Job completion status', states=['complete', 'failed', 'running'], registry=registry) try: with duration.time(): time.sleep(random.random()) job_status.state(random.choice(['complete', 'failed', 'running'])) pass # This only runs if there wasn't an exception g = Gauge('my_job_last_success_seconds', 'Last time my batch job successfully finished', registry=registry) g.set_to_current_time() finally: pushadd_to_gateway('localhost:9091', job='my-batch-job', registry=registry)
def get_temp(): """ Get W125 temperature """ val = sp.temperature if val == 'N/A': quit("Could not connect to host " + ip) return 0 return val def get_total(): """ Get W125 total energy usage """ val = sp.total_consumption if val == 'N/A': quit("Could not connect to host " + ip) return 0 return val # Main entry point if __name__ == '__main__': # Start up the server to expose the metrics. start_http_server(listen_port) # Main loop while True: sp = SmartPlug(ip, code) REQUEST_STATE.state(state=get_state()) time.sleep(sleep_time)