예제 #1
0
파일: utils.py 프로젝트: loum/domain-intel
def epoch_from_str(date, date_format='%Y-%m-%d'):
    """Given a string representation of *date*, returns the time since
    epoch equivalent as interpreted by *format*.

    Current implementation is Python 2 and 3 compatible.  However, same
    could be achieved with following entirely in Python 3::

        _dt.replace(tzinfo=.timezone.utc).timestamp()

    *date* is interpreted as timezone agnostic.

    Returns:
        seconds since epoch

    """
    timestamp = None

    try:
        _dt = datetime.datetime.strptime(date, date_format)
        timestamp = _dt.replace(tzinfo=pytz.UTC)
        timestamp = (_dt - datetime.datetime(1970, 1, 1)).total_seconds()
    except ValueError as err:
        log.error('Epoch conversion of "%s" with format "%s" error: %s',
                  date, date_format, err)

    return timestamp
예제 #2
0
    def request(url, tries=3):
        """Wrapper around :func:`urlopen` to AWIS call.

        On failure, will attempt another 2 tries for success.

        **Args:**
            *url*: the AWIS URL to call

            *tries*: number of failed tries allowed before flagging this
            attempt as a failure

        **Returns:**
            the HTTP response value

        """
        failed_requests = 0
        response_value = None
        while failed_requests < tries:
            try:
                log.debug('Request %d of %d: "%s"', (failed_requests + 1),
                          tries, url)
                response = urlopen(url)
                if response.code == 200:
                    response_value = response.read()
                    break
            except HTTPError as err:
                log.error('Request failed "%s"', err)

            failed_requests += 1
            if failed_requests >= tries:
                log.error('All requests failed')

        return response_value
예제 #3
0
    def edge_insert(self, edge_name, kwargs, dry=False):
        """Manage an ArangoDB edge insert.

        """
        persist_status = False

        edge = self.graph.edge_collection(edge_name)
        log.info('Inserting key: "%s" into edge %s', kwargs.get('_key'),
                 edge_name)
        if not dry:
            try:
                edge.insert(kwargs)
                persist_status = True
            except arango.ArangoError as err:
                log.error('%s: %s', err, kwargs)

        return persist_status
예제 #4
0
    def collection_insert(self, collection_name, kwargs, dry=False):
        """Insert *kwargs* into *collection_name*.

        Returns:
            Boolean try on success.  False otherwise

        """
        persist_status = False
        collection = self.graph.vertex_collection(collection_name)
        log.info('Inserting key: "%s" into collection %s', kwargs.get('_key'),
                 collection_name)
        if not dry:
            try:
                collection.insert(kwargs)
                persist_status = True
            except arango.exceptions.DocumentInsertError as err:
                log.error('%s: %s', err, kwargs)

        return persist_status
예제 #5
0
    def traverse_graph(self, label, as_json=True):
        """Traverse the :attr:`graph` starting at vertex denoted by
        *label*.

        Returns:
            the graph structure as a dictionary optionally converted
            to JSON if *as_json* is set

        """
        log.debug('Traversing label "%s"', label)

        result = None
        try:
            result = self.graph.traverse(label, direction='any', max_depth=1)
        except arango.exceptions.GraphTraverseError as err:
            log.error('Label "%s" traverse error: %s', label, err)

        if result is not None and as_json:
            result = json.dumps(result)

        return result
예제 #6
0
    def _check_xml_response(root):
        """Verify that a valid response has been received in the
        *root* :class:`lxml.Element`.

        Returns:
            Boolean ``True`` on success.  ``False`` otherwise

        """
        xpath = '//a:TrafficHistoryResponse/a:Response/a:ResponseStatus'
        _ns = domain_intel.common.NS
        response_status = root.xpath(xpath, namespaces=_ns)
        if not response_status:
            xpath = '//a:TrafficHistoryResponse/b:Response/a:ResponseStatus'
            response_status = root.xpath(xpath, namespaces=_ns)

        response_xpath = './a:StatusCode/text()'
        response = [
            x.xpath(response_xpath, namespaces=_ns)[0] for x in response_status
        ]

        if response[0] != 'Success':
            log.error('TrafficHistory XML reponse error ')

        return response[0] == 'Success'
예제 #7
0
파일: common.py 프로젝트: loum/domain-intel
for location in CONFIG_LOCATIONS:
    if location is None:
        continue

    config_file = location
    if os.path.isdir(config_file):
        config_file = os.path.join(config_file, 'config.json')

    try:
        with io.open(config_file, encoding='utf-8') as _fh:
            log.info('Sourcing config from %s', config_file)
            CONFIG = json.loads(_fh.read())
            break
    except IOError as err:
        # Not a bad thing if the open failed.  Just means that the config
        # source does not exist.
        continue

if CONFIG is None:
    log.error('Domain Intel Services no config file found in locations: %s',
              ', '.join([x for x in CONFIG_LOCATIONS if x is not None]))

with io.open(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..',
                          'config', 'country_codes.json'),
             encoding='utf-8') as _fh:
    COUNTRY_CODES = json.loads(_fh.read())

NS_20050711 = 'http://awis.amazonaws.com/doc/2005-07-11'
NS_20051005 = 'http://alexa.amazonaws.com/doc/2005-10-05/'
NS = {'a': NS_20051005, 'b': NS_20050711}
예제 #8
0
파일: stages.py 프로젝트: loum/domain-intel
    def run(self):
        self._init_kafka()

        # preflight checks, since run presumes and input and output side
        # we must validate that we have what we need.
        # this is not done in the constructor to support special case stages
        # i.e. root and final leaf node
        if self.kafka_consumer_group_id is None:
            raise GeoDNSError(
                "will not accept null kafka_consumer_group_id. set one if you are consuming"
            )

        if self.worker is None:
            raise GeoDNSError("need a worker!")

        if not self.is_producer and self.is_consumer:
            raise GeoDNSError(
                "cannot call run() without input and output topics")

        self.kafka_consumer.subscribe(self.kafka_consumer_topics)

        metrics = self.metrics
        for msg in self.kafka_consumer:
            metrics["messages_received"] += 1

            if self.dump:
                self._do_dump(msg.value, str(metrics["messages_received"]),
                              DUMP_CONSUME)

            last_exc = None
            for retry in range(0, self.retryable_exceptions_count):
                try:

                    # enforce process level timeout with signals
                    old_alarm_handler = signal.signal(
                        signal.SIGALRM, GeoDNSStage._timeout_handler)
                    signal.alarm(self.worker_timeout_seconds)

                    res = self.worker(msg.value)

                    signal.alarm(0)
                    signal.signal(signal.SIGALRM, old_alarm_handler)

                    last_exc = None
                    break
                except self.retryable_exceptions + (WorkerTimedOut, ) as exc:
                    log.error("caught retryable exceptions: %s", str(exc))
                    metrics["retryable_exceptions"] += 1
                    last_exc = exc
                    time.sleep(retry)

            if last_exc is not None:
                log.error("exceeded retryable exception count of %d",
                          self.retryable_exceptions_count)
                raise last_exc

            # try marshalling response
            metrics["messages_processed"] += 1
            if hasattr(res, "marshal"):
                res = res.marshal()
                metrics["responses_marshalled"] += 1

            for dest_topic in self.kafka_producer_topics:
                metrics["messages_sent"] += 1

                if not self.dry:
                    self.kafka_producer.send(dest_topic, value=res)
                else:
                    log.debug("%s: %s", dest_topic, res)
                    if self.dump:
                        self._do_dump(
                            res, "%d.%d" % (metrics["messages_received"],
                                            metrics["messages_sent"]),
                            DUMP_PUBLISH)

            self.kafka_producer.flush()
            self.kafka_consumer.commit()

            log.debug(metrics)

            if self.max_read_count is not None and metrics[
                    "messages_received"] >= self.max_read_count:
                break

        return metrics