def print_chgsets(self, db, print_meta=False): for c in db.chgsets_find(state=[ db.STATE_CLOSED, db.STATE_OPEN, db.STATE_ANALYZING2, db.STATE_REANALYZING, db.STATE_DONE ]): cid = c['cid'] meta = db.chgset_get_meta(cid) info = db.chgset_get_info(cid) #print 'cset=', pprint.pprint(data) if 'comment' in meta['tag'].keys(): comment = meta['tag']['comment'] else: comment = '-no comment-' if 'source' in meta['tag'].keys(): source = "source='" + meta['tag']['source'] + "'" else: source = '' timestamp = oc.Changeset.get_timestamp(meta)[1] htimestamp = HumanTime.date2human(timestamp) print u" {} \'{}\' {} ('{}') '{}' state={}".format( cid, meta['user'], htimestamp, timestamp, comment, info['state']).encode('ascii', 'backslashreplace') if print_meta: for k, v in meta.items(): print u' {0}:{1}'.format(k, v).encode('ascii', 'backslashreplace'), for k, v in meta.get('tags', {}).items(): print u' {0}:{1}'.format(k, v).encode('ascii', 'backslashreplace'), print
def cset_check_drop_old(config, db, cset=None, cid=None): if not cset: cset = db.chgset_get(cid) if not cset: return True horizon_s = config.get('horizon_hours', 'tracker') * 3600 if horizon_s > 0: now = datetime.datetime.utcnow().replace(tzinfo=pytz.utc) dt = now - datetime.timedelta(seconds=horizon_s) if cset['updated'] < dt: logger.info('Cset {} ready to be dropped, updated {}'.format( cset['cid'], HumanTime.date2human(dt))) return True logger.debug('Cset {} not ready to be dropped, updated {}'.format( cset['cid'], cset['updated'])) return False
def cset_ready_for_reprocessing(config, db, cset=None, cid=None, refresh_period=0): if not cset: cset = db.chgset_get(cid) if not cset: return True if refresh_period > 0: now = datetime.datetime.utcnow().replace(tzinfo=pytz.utc) dt = now - datetime.timedelta(minutes=refresh_period) if cset['refreshed'] < dt: logger.debug('Cset {} ready for refresh, refreshed {}'.format( cset['cid'], HumanTime.date2human(dt))) return True logger.debug( 'Cset {} not ready for refresh, refreshed {}, refresh period {}'. format(cset['cid'], cset['refreshed'], refresh_period)) return False
def print_chgsets(self, db, print_meta=False): for c in db.chgsets_find(state=[db.STATE_CLOSED, db.STATE_OPEN, db.STATE_ANALYZING2, db.STATE_REANALYZING, db.STATE_DONE]): cid = c['cid'] meta = db.chgset_get_meta(cid) info = db.chgset_get_info(cid) #print 'cset=', pprint.pprint(data) if 'comment' in meta['tag'].keys(): comment = meta['tag']['comment'] else: comment = '-no comment-' if 'source' in meta['tag'].keys(): source = "source='"+meta['tag']['source']+"'" else: source = '' timestamp = oc.Changeset.get_timestamp(meta)[1] htimestamp = HumanTime.date2human(timestamp) print u" {} \'{}\' {} ('{}') '{}' state={}".format(cid, meta['user'], htimestamp, timestamp, comment, info['state']).encode('ascii','backslashreplace') if print_meta: for k,v in meta.items(): print u' {0}:{1}'.format(k,v).encode('ascii','backslashreplace'), for k,v in meta.get('tags',{}).items(): print u' {0}:{1}'.format(k,v).encode('ascii','backslashreplace'), print
def diff_fetch(args, config, db): logger.debug('Fetching minutely diff') if args and args.simulate: cid = args.simulate source = { 'type': 'minute', 'sequenceno': 123456789, 'observed': datetime.datetime.utcnow().replace(tzinfo=pytz.utc) } db.chgset_append(cid, source) return if args and args.metrics: m_pt = prometheus_client.Histogram( 'osmtracker_minutely_diff_processing_time_seconds', 'Processing time for latest minutely diff (seconds)') m_diff_ts = prometheus_client.Gauge( 'osmtracker_minutely_diff_timestamp', 'Timestamp of recently processed minutely diff') m_diff_proc_ts = prometheus_client.Gauge( 'osmtracker_minutely_diff_processing_timestamp', 'Timestamp of when recently processed minutely diff was processed') m_seqno = prometheus_client.Gauge( 'osmtracker_minutely_diff_latest_seqno', 'Sequence number of recently processed minutely diff') m_head_seqno = prometheus_client.Gauge( 'osmtracker_minutely_diff_head_seqno', 'Head sequence number of minutely diff replication') m_csets = prometheus_client.Gauge( 'osmtracker_minutely_diff_csets_observed', 'Number of changesets observed in recently processed minutely diff' ) dapi = osmdiff.OsmDiffApi() ptr = db.pointer if args: if args.log_level == 'DEBUG': dapi.debug = True if args.history: history = HumanTime.human2date(args.history) head = dapi.get_state('minute') pointer = dapi.get_seqno_le_timestamp('minute', history, head) db.pointer = pointer elif args.initptr or not ptr: head = dapi.get_state('minute', seqno=None) head.sequenceno_advance(offset=-1) db.pointer = head logger.debug('Initialized pointer to:{}'.format(db.pointer)) while True: try: ptr = db.pointer['seqno'] head = dapi.get_state('minute', seqno=None) start = None now = datetime.datetime.utcnow().replace(tzinfo=pytz.utc) if ptr <= head.sequenceno: logger.debug('Fetching diff, ptr={}, head={}'.format( ptr, head.sequenceno)) start = time.time() chgsets = fetch_and_process_diff(config, dapi, ptr, 'minute') logger.debug('{} changesets: {}'.format(len(chgsets), chgsets)) for cid in chgsets: source = { 'type': 'minute', 'sequenceno': ptr, 'observed': now } db.chgset_append(cid, source) # Set timestamp from old seqno as new seqno might not yet exist seqno = db.pointer['seqno'] nptr = dapi.get_state('minute', seqno=seqno) db.pointer_meta_update({'timestamp': nptr.timestamp()}) db.pointer_advance() m_diff_ts.set( time.mktime(nptr.timestamp().timetuple()) + nptr.timestamp().microsecond / 1E6) m_diff_proc_ts.set_to_current_time() m_seqno.set(seqno) m_head_seqno.set(head.sequenceno) m_csets.set(len(chgsets)) except KeyboardInterrupt as e: logger.warn('Processing interrupted, exiting...') raise e except (urllib2.HTTPError, urllib2.URLError, socket.error, socket.timeout) as e: logger.error('Error retrieving OSM data: '.format(e)) logger.error(traceback.format_exc()) time.sleep(60) if args and args.track: if start: end = time.time() elapsed = end - start else: elapsed = 0 if args.metrics: m_pt.observe(elapsed) if ptr >= head.sequenceno: # No more diffs to fetch delay = min(60, max(0, 60 - elapsed)) logger.info( 'Processing seqno {} took {:.2f}s. Sleeping {:.2f}s'. format(ptr, elapsed, delay)) time.sleep(delay) else: logger.info( 'Processing seqno {} took {:.2f}s. Head ptr is {}'.format( ptr, elapsed, head.sequenceno)) else: break return 0
def diff_fetch(args, config, db): logger.debug('Fetching minutely diff') if args and args.simulate: cid = args.simulate source = {'type': 'minute', 'sequenceno': 123456789, 'observed': datetime.datetime.utcnow().replace(tzinfo=pytz.utc)} db.chgset_append(cid, source) return dapi = osmdiff.OsmDiffApi() if args.log_level == 'DEBUG': dapi.debug = True ptr = db.pointer if args.history: history = HumanTime.human2date(args.history) head = dapi.get_state('minute') pointer = dapi.get_seqno_le_timestamp('minute', history, head) db.pointer = pointer elif args.initptr or not ptr: head = dapi.get_state('minute', seqno=None) head.sequenceno_advance(offset=-1) db.pointer = head logger.debug('Initialized pointer to:{}'.format(db.pointer)) while True: try: ptr = db.pointer['seqno'] head = dapi.get_state('minute', seqno=None) start = None if ptr <= head.sequenceno: logger.debug('Fetching diff, ptr={}, head={}'.format(ptr, head.sequenceno)) start = time.time() chgsets = fetch_and_process_diff(config, dapi, ptr, 'minute') logger.debug('{} changesets: {}'.format(len(chgsets), chgsets)) for cid in chgsets: source = {'type': 'minute', 'sequenceno': ptr, 'observed': datetime.datetime.utcnow().replace(tzinfo=pytz.utc)} db.chgset_append(cid, source) # Set timestamp from old seqno as new seqno might not yet exist nptr = dapi.get_state('minute', seqno=db.pointer['seqno']) db.pointer_meta_update({'timestamp': nptr.timestamp()}) db.pointer_advance() except (urllib2.HTTPError, urllib2.URLError, socket.error, socket.timeout) as e: logger.error('Error retrieving OSM data: '.format(e)) logger.error(traceback.format_exc()) time.sleep(60) if args.track: if start: end = time.time() elapsed = end-start else: elapsed = 0 if ptr >= head.sequenceno: # No more diffs to fetch delay = min(60, max(0, 60-elapsed)) logger.info('Processing seqno {} took {:.2f}s. Sleeping {:.2f}s'.format(ptr, elapsed, delay)) time.sleep(delay) else: logger.info('Processing seqno {} took {:.2f}s. Head ptr is {}'.format(ptr, elapsed, head.sequenceno)) else: break return 0
def diff_fetch(args, config, db): logger.debug('Fetching minutely diff') if args and args.simulate: cid = args.simulate source = { 'type': 'changesets', 'sequenceno': 123456789, 'observed': datetime.datetime.utcnow().replace(tzinfo=pytz.utc) } db.chgset_append(cid, source) return if args and args.metrics: m_pt = prometheus_client.Histogram( 'osmtracker_minutely_diff_processing_time_seconds', 'Minutely diff processing time (seconds)') m_diff_ts = prometheus_client.Gauge( 'osmtracker_minutely_diff_timestamp', 'Timestamp of recently processed minutely diff') m_diff_proc_ts = prometheus_client.Gauge( 'osmtracker_minutely_diff_processing_timestamp', 'Timestamp of when recently processed minutely diff was processed') m_seqno = prometheus_client.Gauge( 'osmtracker_minutely_diff_latest_seqno', 'Sequence number of recently processed minutely diff') m_head_seqno = prometheus_client.Gauge( 'osmtracker_minutely_diff_head_seqno', 'Head sequence number of minutely diff replication') m_csets = prometheus_client.Gauge( 'osmtracker_minutely_diff_csets_observed', 'Number of changesets observed in recently processed minutely diff' ) m_events = prometheus_client.Counter('osmtracker_events', 'Number of events', EVENT_LABELS) dapi = osmdiff.OsmDiffApi() ptr = db.pointer if args: if args.log_level == 'DEBUG': dapi.debug = True amqp = messagebus.Amqp(args.amqp_url, AMQP_EXCHANGE_TOPIC, 'topic', AMQP_QUEUES) amqp_gen = messagebus.Amqp(args.amqp_url, AMQP_EXCHANGE_FANOUT, 'fanout', [], []) if args.history: history = HumanTime.human2date(args.history) head = dapi.get_state('changesets') pointer = dapi.get_seqno_le_timestamp('changesets', history, head) db.pointer = pointer elif args.initptr or not ptr: head = dapi.get_state('changesets', seqno=None) head.sequenceno_advance(offset=-1) db.pointer = head logger.debug('Initialized pointer to:{}'.format(db.pointer)) while True: try: ptr = db.pointer['seqno'] head = dapi.get_state('changesets', seqno=None) start = None if ptr <= head.sequenceno: logger.debug('Fetching diff, ptr={}, head={}'.format( ptr, head)) chgsets = diff_fetch_single(args, config, dapi, db, amqp, ptr) m_events.labels('filter', 'in').inc(len(chgsets)) # Set timestamp from old seqno as new seqno might not yet exist seqno = db.pointer['seqno'] nptr = dapi.get_state('changesets', seqno=seqno) db.pointer_meta_update({'timestamp': nptr.timestamp()}) db.pointer_advance() m_diff_ts.set( time.mktime(nptr.timestamp().timetuple()) + nptr.timestamp().microsecond / 1E6) m_diff_proc_ts.set_to_current_time() m_seqno.set(seqno) m_head_seqno.set(head.sequenceno) m_csets.set(len(chgsets)) msg = {'pointer': seqno} r = amqp_gen.send(msg, schema_name='replication_pointer', schema_version=1, routing_key=AMQP_NEW_POINTER_KEY) logger.debug('New pointer send result: {}'.format(r)) m_events.labels('new_pointer', 'in').inc() except KeyboardInterrupt as e: logger.warn('Processing interrupted, exiting...') raise e except (requests.exceptions.Timeout, requests.exceptions.HTTPError, socket.error, socket.timeout, eventlet.timeout.Timeout) as e: logger.error('Error retrieving OSM data: '.format(e)) logger.error(traceback.format_exc()) time.sleep(60) if args and args.track: if start: end = time.time() elapsed = end - start else: elapsed = 0 if args.metrics: m_pt.observe(elapsed) if ptr >= head.sequenceno: # No more diffs to fetch delay = min(60, max(0, 60 - elapsed)) logger.info( 'Processing seqno {} took {:.2f}s. Sleeping {:.2f}s'. format(ptr, elapsed, delay)) time.sleep(delay) else: logger.info( 'Processing seqno {} took {:.2f}s. Head ptr is {}'.format( ptr, elapsed, head.sequenceno)) else: break return 0