def epoch_from_str(date, date_format='%Y-%m-%d'): """Given a string representation of *date*, returns the time since epoch equivalent as interpreted by *format*. Current implementation is Python 2 and 3 compatible. However, same could be achieved with following entirely in Python 3:: _dt.replace(tzinfo=.timezone.utc).timestamp() *date* is interpreted as timezone agnostic. Returns: seconds since epoch """ timestamp = None try: _dt = datetime.datetime.strptime(date, date_format) timestamp = _dt.replace(tzinfo=pytz.UTC) timestamp = (_dt - datetime.datetime(1970, 1, 1)).total_seconds() except ValueError as err: log.error('Epoch conversion of "%s" with format "%s" error: %s', date, date_format, err) return timestamp
def request(url, tries=3): """Wrapper around :func:`urlopen` to AWIS call. On failure, will attempt another 2 tries for success. **Args:** *url*: the AWIS URL to call *tries*: number of failed tries allowed before flagging this attempt as a failure **Returns:** the HTTP response value """ failed_requests = 0 response_value = None while failed_requests < tries: try: log.debug('Request %d of %d: "%s"', (failed_requests + 1), tries, url) response = urlopen(url) if response.code == 200: response_value = response.read() break except HTTPError as err: log.error('Request failed "%s"', err) failed_requests += 1 if failed_requests >= tries: log.error('All requests failed') return response_value
def edge_insert(self, edge_name, kwargs, dry=False): """Manage an ArangoDB edge insert. """ persist_status = False edge = self.graph.edge_collection(edge_name) log.info('Inserting key: "%s" into edge %s', kwargs.get('_key'), edge_name) if not dry: try: edge.insert(kwargs) persist_status = True except arango.ArangoError as err: log.error('%s: %s', err, kwargs) return persist_status
def collection_insert(self, collection_name, kwargs, dry=False): """Insert *kwargs* into *collection_name*. Returns: Boolean try on success. False otherwise """ persist_status = False collection = self.graph.vertex_collection(collection_name) log.info('Inserting key: "%s" into collection %s', kwargs.get('_key'), collection_name) if not dry: try: collection.insert(kwargs) persist_status = True except arango.exceptions.DocumentInsertError as err: log.error('%s: %s', err, kwargs) return persist_status
def traverse_graph(self, label, as_json=True): """Traverse the :attr:`graph` starting at vertex denoted by *label*. Returns: the graph structure as a dictionary optionally converted to JSON if *as_json* is set """ log.debug('Traversing label "%s"', label) result = None try: result = self.graph.traverse(label, direction='any', max_depth=1) except arango.exceptions.GraphTraverseError as err: log.error('Label "%s" traverse error: %s', label, err) if result is not None and as_json: result = json.dumps(result) return result
def _check_xml_response(root): """Verify that a valid response has been received in the *root* :class:`lxml.Element`. Returns: Boolean ``True`` on success. ``False`` otherwise """ xpath = '//a:TrafficHistoryResponse/a:Response/a:ResponseStatus' _ns = domain_intel.common.NS response_status = root.xpath(xpath, namespaces=_ns) if not response_status: xpath = '//a:TrafficHistoryResponse/b:Response/a:ResponseStatus' response_status = root.xpath(xpath, namespaces=_ns) response_xpath = './a:StatusCode/text()' response = [ x.xpath(response_xpath, namespaces=_ns)[0] for x in response_status ] if response[0] != 'Success': log.error('TrafficHistory XML reponse error ') return response[0] == 'Success'
for location in CONFIG_LOCATIONS: if location is None: continue config_file = location if os.path.isdir(config_file): config_file = os.path.join(config_file, 'config.json') try: with io.open(config_file, encoding='utf-8') as _fh: log.info('Sourcing config from %s', config_file) CONFIG = json.loads(_fh.read()) break except IOError as err: # Not a bad thing if the open failed. Just means that the config # source does not exist. continue if CONFIG is None: log.error('Domain Intel Services no config file found in locations: %s', ', '.join([x for x in CONFIG_LOCATIONS if x is not None])) with io.open(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'config', 'country_codes.json'), encoding='utf-8') as _fh: COUNTRY_CODES = json.loads(_fh.read()) NS_20050711 = 'http://awis.amazonaws.com/doc/2005-07-11' NS_20051005 = 'http://alexa.amazonaws.com/doc/2005-10-05/' NS = {'a': NS_20051005, 'b': NS_20050711}
def run(self): self._init_kafka() # preflight checks, since run presumes and input and output side # we must validate that we have what we need. # this is not done in the constructor to support special case stages # i.e. root and final leaf node if self.kafka_consumer_group_id is None: raise GeoDNSError( "will not accept null kafka_consumer_group_id. set one if you are consuming" ) if self.worker is None: raise GeoDNSError("need a worker!") if not self.is_producer and self.is_consumer: raise GeoDNSError( "cannot call run() without input and output topics") self.kafka_consumer.subscribe(self.kafka_consumer_topics) metrics = self.metrics for msg in self.kafka_consumer: metrics["messages_received"] += 1 if self.dump: self._do_dump(msg.value, str(metrics["messages_received"]), DUMP_CONSUME) last_exc = None for retry in range(0, self.retryable_exceptions_count): try: # enforce process level timeout with signals old_alarm_handler = signal.signal( signal.SIGALRM, GeoDNSStage._timeout_handler) signal.alarm(self.worker_timeout_seconds) res = self.worker(msg.value) signal.alarm(0) signal.signal(signal.SIGALRM, old_alarm_handler) last_exc = None break except self.retryable_exceptions + (WorkerTimedOut, ) as exc: log.error("caught retryable exceptions: %s", str(exc)) metrics["retryable_exceptions"] += 1 last_exc = exc time.sleep(retry) if last_exc is not None: log.error("exceeded retryable exception count of %d", self.retryable_exceptions_count) raise last_exc # try marshalling response metrics["messages_processed"] += 1 if hasattr(res, "marshal"): res = res.marshal() metrics["responses_marshalled"] += 1 for dest_topic in self.kafka_producer_topics: metrics["messages_sent"] += 1 if not self.dry: self.kafka_producer.send(dest_topic, value=res) else: log.debug("%s: %s", dest_topic, res) if self.dump: self._do_dump( res, "%d.%d" % (metrics["messages_received"], metrics["messages_sent"]), DUMP_PUBLISH) self.kafka_producer.flush() self.kafka_consumer.commit() log.debug(metrics) if self.max_read_count is not None and metrics[ "messages_received"] >= self.max_read_count: break return metrics