def print_chgsets(self, db, print_meta=False):
     for c in db.chgsets_find(state=[
             db.STATE_CLOSED, db.STATE_OPEN, db.STATE_ANALYZING2,
             db.STATE_REANALYZING, db.STATE_DONE
     ]):
         cid = c['cid']
         meta = db.chgset_get_meta(cid)
         info = db.chgset_get_info(cid)
         #print 'cset=', pprint.pprint(data)
         if 'comment' in meta['tag'].keys():
             comment = meta['tag']['comment']
         else:
             comment = '-no comment-'
         if 'source' in meta['tag'].keys():
             source = "source='" + meta['tag']['source'] + "'"
         else:
             source = ''
         timestamp = oc.Changeset.get_timestamp(meta)[1]
         htimestamp = HumanTime.date2human(timestamp)
         print u"  {} \'{}\' {} ('{}') '{}' state={}".format(
             cid, meta['user'], htimestamp, timestamp, comment,
             info['state']).encode('ascii', 'backslashreplace')
         if print_meta:
             for k, v in meta.items():
                 print u' {0}:{1}'.format(k,
                                          v).encode('ascii',
                                                    'backslashreplace'),
             for k, v in meta.get('tags', {}).items():
                 print u' {0}:{1}'.format(k,
                                          v).encode('ascii',
                                                    'backslashreplace'),
             print
Esempio n. 2
0
def cset_check_drop_old(config, db, cset=None, cid=None):
    if not cset:
        cset = db.chgset_get(cid)
        if not cset:
            return True
    horizon_s = config.get('horizon_hours', 'tracker') * 3600
    if horizon_s > 0:
        now = datetime.datetime.utcnow().replace(tzinfo=pytz.utc)
        dt = now - datetime.timedelta(seconds=horizon_s)
        if cset['updated'] < dt:
            logger.info('Cset {} ready to be dropped, updated {}'.format(
                cset['cid'], HumanTime.date2human(dt)))
            return True
    logger.debug('Cset {} not ready to be dropped, updated {}'.format(
        cset['cid'], cset['updated']))
    return False
Esempio n. 3
0
def cset_ready_for_reprocessing(config,
                                db,
                                cset=None,
                                cid=None,
                                refresh_period=0):
    if not cset:
        cset = db.chgset_get(cid)
        if not cset:
            return True
    if refresh_period > 0:
        now = datetime.datetime.utcnow().replace(tzinfo=pytz.utc)
        dt = now - datetime.timedelta(minutes=refresh_period)
        if cset['refreshed'] < dt:
            logger.debug('Cset {} ready for refresh, refreshed {}'.format(
                cset['cid'], HumanTime.date2human(dt)))
            return True
    logger.debug(
        'Cset {} not ready for refresh, refreshed {}, refresh period {}'.
        format(cset['cid'], cset['refreshed'], refresh_period))
    return False
 def print_chgsets(self, db, print_meta=False):
     for c in db.chgsets_find(state=[db.STATE_CLOSED, db.STATE_OPEN, db.STATE_ANALYZING2,
                                     db.STATE_REANALYZING, db.STATE_DONE]):
         cid = c['cid']
         meta = db.chgset_get_meta(cid)
         info = db.chgset_get_info(cid)
         #print 'cset=', pprint.pprint(data)
         if 'comment' in meta['tag'].keys():
             comment = meta['tag']['comment']
         else:
             comment = '-no comment-'
         if 'source' in meta['tag'].keys():
             source = "source='"+meta['tag']['source']+"'"
         else:
             source = ''
         timestamp = oc.Changeset.get_timestamp(meta)[1]
         htimestamp = HumanTime.date2human(timestamp)
         print u"  {} \'{}\' {} ('{}') '{}' state={}".format(cid, meta['user'], htimestamp, timestamp, comment, info['state']).encode('ascii','backslashreplace')
         if print_meta:
             for k,v in meta.items():
                 print u' {0}:{1}'.format(k,v).encode('ascii','backslashreplace'),
             for k,v in meta.get('tags',{}).items():
                 print u' {0}:{1}'.format(k,v).encode('ascii','backslashreplace'),
             print
Esempio n. 5
0
def diff_fetch(args, config, db):
    logger.debug('Fetching minutely diff')

    if args and args.simulate:
        cid = args.simulate
        source = {
            'type': 'minute',
            'sequenceno': 123456789,
            'observed': datetime.datetime.utcnow().replace(tzinfo=pytz.utc)
        }
        db.chgset_append(cid, source)
        return
    if args and args.metrics:
        m_pt = prometheus_client.Histogram(
            'osmtracker_minutely_diff_processing_time_seconds',
            'Processing time for latest minutely diff (seconds)')
        m_diff_ts = prometheus_client.Gauge(
            'osmtracker_minutely_diff_timestamp',
            'Timestamp of recently processed minutely diff')
        m_diff_proc_ts = prometheus_client.Gauge(
            'osmtracker_minutely_diff_processing_timestamp',
            'Timestamp of when recently processed minutely diff was processed')
        m_seqno = prometheus_client.Gauge(
            'osmtracker_minutely_diff_latest_seqno',
            'Sequence number of recently processed minutely diff')
        m_head_seqno = prometheus_client.Gauge(
            'osmtracker_minutely_diff_head_seqno',
            'Head sequence number of minutely diff replication')
        m_csets = prometheus_client.Gauge(
            'osmtracker_minutely_diff_csets_observed',
            'Number of changesets observed in recently processed minutely diff'
        )

    dapi = osmdiff.OsmDiffApi()
    ptr = db.pointer

    if args:
        if args.log_level == 'DEBUG':
            dapi.debug = True

        if args.history:
            history = HumanTime.human2date(args.history)
            head = dapi.get_state('minute')
            pointer = dapi.get_seqno_le_timestamp('minute', history, head)
            db.pointer = pointer
        elif args.initptr or not ptr:
            head = dapi.get_state('minute', seqno=None)
            head.sequenceno_advance(offset=-1)
            db.pointer = head
            logger.debug('Initialized pointer to:{}'.format(db.pointer))

    while True:
        try:
            ptr = db.pointer['seqno']
            head = dapi.get_state('minute', seqno=None)
            start = None
            now = datetime.datetime.utcnow().replace(tzinfo=pytz.utc)
            if ptr <= head.sequenceno:
                logger.debug('Fetching diff, ptr={}, head={}'.format(
                    ptr, head.sequenceno))
                start = time.time()
                chgsets = fetch_and_process_diff(config, dapi, ptr, 'minute')
                logger.debug('{} changesets: {}'.format(len(chgsets), chgsets))
                for cid in chgsets:
                    source = {
                        'type': 'minute',
                        'sequenceno': ptr,
                        'observed': now
                    }
                    db.chgset_append(cid, source)
                # Set timestamp from old seqno as new seqno might not yet exist
                seqno = db.pointer['seqno']
                nptr = dapi.get_state('minute', seqno=seqno)
                db.pointer_meta_update({'timestamp': nptr.timestamp()})
                db.pointer_advance()
                m_diff_ts.set(
                    time.mktime(nptr.timestamp().timetuple()) +
                    nptr.timestamp().microsecond / 1E6)
                m_diff_proc_ts.set_to_current_time()
                m_seqno.set(seqno)
                m_head_seqno.set(head.sequenceno)
                m_csets.set(len(chgsets))
        except KeyboardInterrupt as e:
            logger.warn('Processing interrupted, exiting...')
            raise e
        except (urllib2.HTTPError, urllib2.URLError, socket.error,
                socket.timeout) as e:
            logger.error('Error retrieving OSM data: '.format(e))
            logger.error(traceback.format_exc())
            time.sleep(60)

        if args and args.track:
            if start:
                end = time.time()
                elapsed = end - start
            else:
                elapsed = 0
            if args.metrics:
                m_pt.observe(elapsed)
            if ptr >= head.sequenceno:  # No more diffs to fetch
                delay = min(60, max(0, 60 - elapsed))
                logger.info(
                    'Processing seqno {} took {:.2f}s. Sleeping {:.2f}s'.
                    format(ptr, elapsed, delay))
                time.sleep(delay)
            else:
                logger.info(
                    'Processing seqno {} took {:.2f}s. Head ptr is {}'.format(
                        ptr, elapsed, head.sequenceno))
        else:
            break
    return 0
def diff_fetch(args, config, db):
    logger.debug('Fetching minutely diff')

    if args and args.simulate:
        cid = args.simulate
        source = {'type': 'minute',
                  'sequenceno': 123456789,
                  'observed': datetime.datetime.utcnow().replace(tzinfo=pytz.utc)}
        db.chgset_append(cid, source)
        return

    dapi = osmdiff.OsmDiffApi()
    if args.log_level == 'DEBUG':
        dapi.debug = True

    ptr = db.pointer

    if args.history:
        history = HumanTime.human2date(args.history)
        head = dapi.get_state('minute')
        pointer = dapi.get_seqno_le_timestamp('minute', history, head)
        db.pointer = pointer
    elif args.initptr or not ptr:
        head = dapi.get_state('minute', seqno=None)
        head.sequenceno_advance(offset=-1)
        db.pointer = head
        logger.debug('Initialized pointer to:{}'.format(db.pointer))

    while True:
        try:
            ptr = db.pointer['seqno']
            head = dapi.get_state('minute', seqno=None)
            start = None
            if ptr <= head.sequenceno:
                logger.debug('Fetching diff, ptr={}, head={}'.format(ptr, head.sequenceno))
                start = time.time()
                chgsets = fetch_and_process_diff(config, dapi, ptr, 'minute')
                logger.debug('{} changesets: {}'.format(len(chgsets), chgsets))
                for cid in chgsets:
                    source = {'type': 'minute',
                              'sequenceno': ptr,
                              'observed': datetime.datetime.utcnow().replace(tzinfo=pytz.utc)}
                    db.chgset_append(cid, source)
                # Set timestamp from old seqno as new seqno might not yet exist
                nptr = dapi.get_state('minute', seqno=db.pointer['seqno'])
                db.pointer_meta_update({'timestamp': nptr.timestamp()})
                db.pointer_advance()
        except (urllib2.HTTPError, urllib2.URLError, socket.error, socket.timeout) as e:
            logger.error('Error retrieving OSM data: '.format(e))
            logger.error(traceback.format_exc())
            time.sleep(60)

        if args.track:
            if start:
                end = time.time()
                elapsed = end-start
            else:
                elapsed = 0
            if ptr >= head.sequenceno: # No more diffs to fetch
                delay = min(60, max(0, 60-elapsed))
                logger.info('Processing seqno {} took {:.2f}s. Sleeping {:.2f}s'.format(ptr, elapsed, delay))
                time.sleep(delay)
            else:
                logger.info('Processing seqno {} took {:.2f}s. Head ptr is {}'.format(ptr, elapsed, head.sequenceno))
        else:
            break
    return 0
Esempio n. 7
0
def diff_fetch(args, config, db):
    logger.debug('Fetching minutely diff')

    if args and args.simulate:
        cid = args.simulate
        source = {
            'type': 'changesets',
            'sequenceno': 123456789,
            'observed': datetime.datetime.utcnow().replace(tzinfo=pytz.utc)
        }
        db.chgset_append(cid, source)
        return
    if args and args.metrics:
        m_pt = prometheus_client.Histogram(
            'osmtracker_minutely_diff_processing_time_seconds',
            'Minutely diff processing time (seconds)')
        m_diff_ts = prometheus_client.Gauge(
            'osmtracker_minutely_diff_timestamp',
            'Timestamp of recently processed minutely diff')
        m_diff_proc_ts = prometheus_client.Gauge(
            'osmtracker_minutely_diff_processing_timestamp',
            'Timestamp of when recently processed minutely diff was processed')
        m_seqno = prometheus_client.Gauge(
            'osmtracker_minutely_diff_latest_seqno',
            'Sequence number of recently processed minutely diff')
        m_head_seqno = prometheus_client.Gauge(
            'osmtracker_minutely_diff_head_seqno',
            'Head sequence number of minutely diff replication')
        m_csets = prometheus_client.Gauge(
            'osmtracker_minutely_diff_csets_observed',
            'Number of changesets observed in recently processed minutely diff'
        )
        m_events = prometheus_client.Counter('osmtracker_events',
                                             'Number of events', EVENT_LABELS)

    dapi = osmdiff.OsmDiffApi()
    ptr = db.pointer

    if args:
        if args.log_level == 'DEBUG':
            dapi.debug = True

        amqp = messagebus.Amqp(args.amqp_url, AMQP_EXCHANGE_TOPIC, 'topic',
                               AMQP_QUEUES)
        amqp_gen = messagebus.Amqp(args.amqp_url, AMQP_EXCHANGE_FANOUT,
                                   'fanout', [], [])

        if args.history:
            history = HumanTime.human2date(args.history)
            head = dapi.get_state('changesets')
            pointer = dapi.get_seqno_le_timestamp('changesets', history, head)
            db.pointer = pointer
        elif args.initptr or not ptr:
            head = dapi.get_state('changesets', seqno=None)
            head.sequenceno_advance(offset=-1)
            db.pointer = head
            logger.debug('Initialized pointer to:{}'.format(db.pointer))

    while True:
        try:
            ptr = db.pointer['seqno']
            head = dapi.get_state('changesets', seqno=None)
            start = None
            if ptr <= head.sequenceno:
                logger.debug('Fetching diff, ptr={}, head={}'.format(
                    ptr, head))
                chgsets = diff_fetch_single(args, config, dapi, db, amqp, ptr)
                m_events.labels('filter', 'in').inc(len(chgsets))
                # Set timestamp from old seqno as new seqno might not yet exist
                seqno = db.pointer['seqno']
                nptr = dapi.get_state('changesets', seqno=seqno)
                db.pointer_meta_update({'timestamp': nptr.timestamp()})
                db.pointer_advance()

                m_diff_ts.set(
                    time.mktime(nptr.timestamp().timetuple()) +
                    nptr.timestamp().microsecond / 1E6)
                m_diff_proc_ts.set_to_current_time()
                m_seqno.set(seqno)
                m_head_seqno.set(head.sequenceno)
                m_csets.set(len(chgsets))
                msg = {'pointer': seqno}
                r = amqp_gen.send(msg,
                                  schema_name='replication_pointer',
                                  schema_version=1,
                                  routing_key=AMQP_NEW_POINTER_KEY)
                logger.debug('New pointer send result: {}'.format(r))
                m_events.labels('new_pointer', 'in').inc()
        except KeyboardInterrupt as e:
            logger.warn('Processing interrupted, exiting...')
            raise e
        except (requests.exceptions.Timeout, requests.exceptions.HTTPError,
                socket.error, socket.timeout, eventlet.timeout.Timeout) as e:
            logger.error('Error retrieving OSM data: '.format(e))
            logger.error(traceback.format_exc())
            time.sleep(60)

        if args and args.track:
            if start:
                end = time.time()
                elapsed = end - start
            else:
                elapsed = 0
            if args.metrics:
                m_pt.observe(elapsed)
            if ptr >= head.sequenceno:  # No more diffs to fetch
                delay = min(60, max(0, 60 - elapsed))
                logger.info(
                    'Processing seqno {} took {:.2f}s. Sleeping {:.2f}s'.
                    format(ptr, elapsed, delay))
                time.sleep(delay)
            else:
                logger.info(
                    'Processing seqno {} took {:.2f}s. Head ptr is {}'.format(
                        ptr, elapsed, head.sequenceno))
        else:
            break
    return 0