def find_zone_serial(self, zone_name): """Get serial from a zone by running knotc :returns: serial (int or None) :raises: exceptions.Backend """ zone_name = zone_name.rstrip('.') LOG.debug("Finding %s", zone_name) # Output example: # [530336536.com.] type: slave | serial: 0 | next-event: idle | # auto-dnssec: disabled] try: out, err = execute(self._knotc_cmd_name, 'zone-status', zone_name) except ProcessExecutionError as e: if 'no such zone' in e.stdout: # Zone not found return None LOG.error(_LE("Command output: %(out)r Stderr: %(err)r"), { 'out': e.stdout, 'err': e.stderr }) raise exceptions.Backend(e) try: serial = out.split('|')[1].split()[1] return int(serial) except Exception as e: LOG.error(_LE("Unable to parse knotc output: %r"), out) raise exceptions.Backend("Unexpected knotc zone-status output")
def start(self): # Build the Pool (and related) Object from Config context = DesignateContext.get_admin_context() pool_id = CONF['service:pool_manager'].pool_id has_targets = False # TODO(kiall): This block of code should be replaced with a cleaner, # limited version. e.g. should retry for X minutes, and # backoff rather than fixed retry intervals. while not has_targets: try: self.pool = self.central_api.get_pool(context, pool_id) if len(self.pool.targets) > 0: has_targets = True else: LOG.error(_LE("No targets for %s found."), self.pool) time.sleep(5) # Pool data may not have migrated to the DB yet except exceptions.PoolNotFound: LOG.error(_LE("Pool ID %s not found."), pool_id) time.sleep(5) # designate-central service may not have started yet except messaging.exceptions.MessagingTimeout: time.sleep(0.2) # designate-central failed in an unknown way, don't allow another # failing / not started service to cause pool-manager to crash. except Exception: LOG.exception(_LE("An unknown exception occurred while " "fetching pool details")) time.sleep(5) # Create the necessary Backend instances for each target self._setup_target_backends() for target in self.pool.targets: self.target_backends[target.id].start() super(Service, self).start() # Setup a Leader Election, use for ensuring certain tasks are executed # on exactly one pool-manager instance at a time] self._pool_election = coordination.LeaderElection( self._coordinator, '%s:%s' % (self.service_name, self.pool.id)) self._pool_election.start() if CONF['service:pool_manager'].enable_recovery_timer: interval = CONF['service:pool_manager'].periodic_recovery_interval LOG.info(_LI('Starting periodic recovery timer every' ' %(interval)s s') % {'interval': interval}) self.tg.add_timer(interval, self.periodic_recovery, interval) if CONF['service:pool_manager'].enable_sync_timer: interval = CONF['service:pool_manager'].periodic_sync_interval LOG.info(_LI('Starting periodic synchronization timer every' ' %(interval)s s') % {'interval': interval}) self.tg.add_timer(interval, self.periodic_sync, interval)
def from_file(self, input_file=None, delimiter=None): input_file = str(input_file) if input_file is not None else None if not os.path.exists(input_file): raise Exception('TLD Input file Not Found') LOG.info(_LI("Importing TLDs from %s") % input_file) error_lines = [] tlds_added = 0 with open(input_file) as inf: csv.register_dialect('import-tlds', delimiter=str(delimiter)) reader = csv.DictReader(inf, fieldnames=['name', 'description'], restkey='extra_fields', dialect='import-tlds') for line in reader: # check if there are more than 2 fields if 'extra_fields' in line: error_lines.append("InvalidLine --> " + self._convert_tld_dict_to_str(line)) else: tlds_added += self._validate_and_create_tld(line, error_lines) LOG.info(_LI("Number of tlds added: %d") % tlds_added) errors = len(error_lines) if errors > 0: LOG.error(_LE("Number of errors: %d") % errors) # Sorting the errors and printing them so that it is easier to # read the errors LOG.error(_LE("Error Lines:\n%s") % '\n'.join(sorted(error_lines)))
def __call__(self, request): # Generate the initial context. This may be updated by other middleware # as we learn more information about the Request. ctxt = context.DesignateContext.get_admin_context(all_tenants=True) try: message = dns.message.from_wire(request['payload'], self.tsig_keyring) if message.had_tsig: LOG.debug('Request signed with TSIG key: %s', message.keyname) # Create + Attach the initial "environ" dict. This is similar to # the environ dict used in typical WSGI middleware. message.environ = { 'context': ctxt, 'addr': request['addr'], } except dns.message.UnknownTSIGKey: LOG.error(_LE("Unknown TSIG key from %(host)s:" "%(port)d") % {'host': request['addr'][0], 'port': request['addr'][1]}) response = self._build_error_response() except dns.tsig.BadSignature: LOG.error(_LE("Invalid TSIG signature from %(host)s:" "%(port)d") % {'host': request['addr'][0], 'port': request['addr'][1]}) response = self._build_error_response() except dns.exception.DNSException: LOG.error(_LE("Failed to deserialize packet from %(host)s:" "%(port)d") % {'host': request['addr'][0], 'port': request['addr'][1]}) response = self._build_error_response() except Exception: LOG.exception(_LE("Unknown exception deserializing packet " "from %(host)s %(port)d") % {'host': request['addr'][0], 'port': request['addr'][1]}) response = self._build_error_response() else: # Hand the Deserialized packet onto the Application for response in self.application(message): # Serialize and return the response if present if isinstance(response, dns.message.Message): yield response.to_wire(max_size=65535) elif isinstance(response, dns.renderer.Renderer): yield response.get_wire()
def parse(cls, format_, values, output_object, *args, **kwargs): LOG.debug("Creating %s object with values %r" % (output_object.obj_name(), values)) try: if isinstance(output_object, objects.ListObjectMixin): # type_ = 'list' return cls.get_object_adapter( format_, output_object)._parse_list( values, output_object, *args, **kwargs) else: # type_ = 'object' return cls.get_object_adapter( format_, output_object)._parse_object( values, output_object, *args, **kwargs) except TypeError as e: LOG.exception(_LE("TypeError creating %(name)s with values" " %(values)r") % {"name": output_object.obj_name(), "values": values}) error_message = str.format( 'Provided object does not match schema. ' 'Got a TypeError with message %s' % six.text_type(e)) raise exceptions.InvalidObject(error_message) except AttributeError as e: LOG.exception(_LE("AttributeError creating %(name)s " "with values %(values)r") % {"name": output_object.obj_name(), "values": values}) error_message = str.format( 'Provided object is not valid. ' 'Got an AttributeError with message %s' % six.text_type(e)) raise exceptions.InvalidObject(error_message) except exceptions.InvalidObject: LOG.info(_LI("InvalidObject creating %(name)s with " "values %(values)r") % {"name": output_object.obj_name(), "values": values}) raise except Exception as e: LOG.exception(_LE("Exception creating %(name)s with " "values %(values)r") % {"name": output_object.obj_name(), "values": values}) error_message = str.format( 'Provided object is not valid. ' 'Got a %s error with message %s' % (type(e).__name__, six.text_type(e))) raise exceptions.InvalidObject(error_message)
def _rebuild_data_cdb(self): """Rebuild data.cdb file from zone datafiles Requires global lock On zone creation, axfr-get creates datafiles atomically by doing rename. On zone deletion, os.remove deletes the file atomically Globbing and reading the datafiles can be done without locking on them. The data and data.cdb files are written into a unique temp directory """ tmpdir = tempfile.mkdtemp(dir=self._datafiles_dir) data_fn = os.path.join(tmpdir, 'data') tmp_cdb_fn = os.path.join(tmpdir, 'data.cdb') try: self._concatenate_zone_datafiles(data_fn, self._datafiles_path_glob) # Generate the data.cdb file LOG.info(_LI("Updating data.cdb")) LOG.debug("Convert %s to %s", data_fn, tmp_cdb_fn) try: out, err = execute( cfg.CONF[CFG_GROUP].tinydns_data_cmd_name, cwd=tmpdir ) except ProcessExecutionError as e: LOG.error(_LE("Failed to generate data.cdb")) LOG.error(_LE("Command output: %(out)r Stderr: %(err)r"), { 'out': e.stdout, 'err': e.stderr }) raise exceptions.Backend("Failed to generate data.cdb") LOG.debug("Move %s to %s", tmp_cdb_fn, self._tinydns_cdb_filename) try: os.rename(tmp_cdb_fn, self._tinydns_cdb_filename) except OSError: os.remove(tmp_cdb_fn) LOG.error(_LE("Unable to move data.cdb to %s"), self._tinydns_cdb_filename) raise exceptions.Backend("Unable to move data.cdb") finally: try: os.remove(data_fn) except OSError: pass try: os.removedirs(tmpdir) except OSError: pass
def start(self): # Build the Pool (and related) Object from Config context = DesignateContext.get_admin_context() pool_id = CONF['service:pool_manager'].pool_id has_targets = False while not has_targets: try: self.pool = self.central_api.get_pool(context, pool_id) if len(self.pool.targets) > 0: has_targets = True else: LOG.error(_LE("No targets for %s found."), self.pool) time.sleep(5) # Pool data may not have migrated to the DB yet except exceptions.PoolNotFound: LOG.error(_LE("Pool ID %s not found."), pool_id) time.sleep(5) # designate-central service may not have started yet except messaging.exceptions.MessagingTimeout: time.sleep(0.2) # Create the necessary Backend instances for each target self._setup_target_backends() for target in self.pool.targets: self.target_backends[target.id].start() super(Service, self).start() # Setup a Leader Election, use for ensuring certain tasks are executed # on exactly one pool-manager instance at a time] self._pool_election = coordination.LeaderElection( self._coordinator, '%s:%s' % (self.service_name, self.pool.id)) self._pool_election.start() if CONF['service:pool_manager'].enable_recovery_timer: interval = CONF['service:pool_manager'].periodic_recovery_interval LOG.info(_LI('Starting periodic recovery timer every' ' %(interval)s s') % {'interval': interval}) self.tg.add_timer(interval, self.periodic_recovery, interval) if CONF['service:pool_manager'].enable_sync_timer: interval = CONF['service:pool_manager'].periodic_sync_interval LOG.info(_LI('Starting periodic synchronization timer every' ' %(interval)s s') % {'interval': interval}) self.tg.add_timer(interval, self.periodic_sync, interval)
def refresh_auth(self): service = "HTTP@" + self.hostname flags = kerberos.GSS_C_MUTUAL_FLAG | kerberos.GSS_C_SEQUENCE_FLAG try: (_, vc) = kerberos.authGSSClientInit(service, flags) except kerberos.GSSError as e: LOG.error(_LE("caught kerberos exception %r") % e) raise IPAAuthError(str(e)) try: kerberos.authGSSClientStep(vc, "") except kerberos.GSSError as e: LOG.error(_LE("caught kerberos exception %r") % e) raise IPAAuthError(str(e)) self.token = kerberos.authGSSClientResponse(vc)
def _rebuild_data_cdb(self): """Rebuild data.cdb file from zone datafiles Requires global lock On zone creation, axfr-get creates datafiles atomically by doing rename. On zone deletion, os.remove deletes the file atomically Globbing and reading the datafiles can be done without locking on them. The data and data.cdb files are written into a unique temp directory """ tmpdir = tempfile.mkdtemp(dir=self._datafiles_dir) data_fn = os.path.join(tmpdir, 'data') tmp_cdb_fn = os.path.join(tmpdir, 'data.cdb') try: self._concatenate_zone_datafiles(data_fn, self._datafiles_path_glob) # Generate the data.cdb file LOG.info(_LI("Updating data.cdb")) LOG.debug("Convert %s to %s", data_fn, tmp_cdb_fn) try: out, err = execute(cfg.CONF[CFG_GROUP].tinydns_data_cmd_name, cwd=tmpdir) except ProcessExecutionError as e: LOG.error(_LE("Failed to generate data.cdb")) LOG.error(_LE("Command output: %(out)r Stderr: %(err)r"), { 'out': e.stdout, 'err': e.stderr }) raise exceptions.Backend("Failed to generate data.cdb") LOG.debug("Move %s to %s", tmp_cdb_fn, self._tinydns_cdb_filename) try: os.rename(tmp_cdb_fn, self._tinydns_cdb_filename) except OSError: os.remove(tmp_cdb_fn) LOG.error(_LE("Unable to move data.cdb to %s"), self._tinydns_cdb_filename) raise exceptions.Backend("Unable to move data.cdb") finally: try: os.remove(data_fn) except OSError: pass try: os.removedirs(tmpdir) except OSError: pass
def do_axfr(zone_name, servers, timeout=None, source=None): """ Requests an AXFR for a given zone name and process the response :returns: Zone instance from dnspython """ random.shuffle(servers) timeout = timeout or cfg.CONF["service:mdns"].xfr_timeout xfr = None for srv in servers: to = eventlet.Timeout(timeout) log_info = {'name': zone_name, 'host': srv} try: LOG.info(_LI("Doing AXFR for %(name)s from %(host)s"), log_info) xfr = dns.query.xfr(srv['host'], zone_name, relativize=False, timeout=1, port=srv['port'], source=source) raw_zone = dns.zone.from_xfr(xfr, relativize=False) break except eventlet.Timeout as t: if t == to: msg = _LE("AXFR timed out for %(name)s from %(host)s") LOG.error(msg % log_info) continue except dns.exception.FormError: msg = _LE("Zone %(name)s is not present on %(host)s." "Trying next server.") LOG.error(msg % log_info) except socket.error: msg = _LE("Connection error when doing AXFR for %(name)s from " "%(host)s") LOG.error(msg % log_info) except Exception: msg = _LE("Problem doing AXFR %(name)s from %(host)s. " "Trying next server.") LOG.exception(msg % log_info) finally: to.cancel() continue else: msg = _LE("XFR failed for %(name)s. No servers in %(servers)s was " "reached.") raise exceptions.XFRFailure( msg % {"name": zone_name, "servers": servers}) LOG.debug("AXFR Successful for %s" % raw_zone.origin.to_text()) return raw_zone
def _get_members(self, group_id): get_members_req = self._coordinator.get_members(group_id) try: return get_members_req.get() except tooz.coordination.GroupNotCreated: LOG.error(_LE('Attempting to partition over a non-existent group: ' '%s'), self._group_id) raise except tooz.coordination.ToozError: LOG.error(_LE('Error getting group membership info from ' 'coordination backend.')) raise
def _dns_handle(self, addr, payload, client=None): """ Handle a DNS Query :param addr: Tuple of the client's (IP, Port) :param payload: Raw DNS query payload :param client: Client socket (for TCP only) """ try: # Call into the DNS Application itself with the payload and addr for response in self._dns_application( {'payload': payload, 'addr': addr}): # Send back a response only if present if response is not None: if client: # Handle TCP Responses msg_length = len(response) tcp_response = struct.pack("!H", msg_length) + response client.send(tcp_response) else: # Handle UDP Responses self._dns_sock_udp.sendto(response, addr) except Exception: LOG.exception(_LE("Unhandled exception while processing request " "from %(host)s:%(port)d") % {'host': addr[0], 'port': addr[1]}) # Close the TCP connection if we have one. if client: client.close()
def _dns_handle_udp(self, sock_udp): """Handle a DNS Query over UDP in a dedicated thread :param sock_udp: UDP socket :type sock_udp: socket :raises: None """ LOG.info(_LI("_handle_udp thread started")) while True: try: # TODO(kiall): Determine the appropriate default value for # UDP recvfrom. payload, addr = sock_udp.recvfrom(8192) LOG.debug("Handling UDP Request from: %(host)s:%(port)d" % {'host': addr[0], 'port': addr[1]}) # Dispatch a thread to handle the query self.tg.add_thread(self._dns_handle_udp_query, sock_udp, addr, payload) except socket.error as e: errname = errno.errorcode[e.args[0]] LOG.warning( _LW("Socket error %(err)s from: %(host)s:%(port)d") % {'host': addr[0], 'port': addr[1], 'err': errname}) except Exception: LOG.exception(_LE("Unknown exception handling UDP request " "from: %(host)s:%(port)d") % {'host': addr[0], 'port': addr[1]})
def _split_host_port(l): try: host, port = l.split(':', 1) return host, int(port) except ValueError: LOG.exception(_LE('Invalid ip:port pair: %s'), l) raise
def _create_zone_on_target(self, context, target, zone): """Called by create_zone, run create_zone on backends :param context: Security context information. :param target: Target to create Zone on :param zone: Zone to be created :return: True/False """ LOG.debug("Creating zone %s on target %s", zone.name, target.id) backend = self.target_backends[target.id] retries = 0 while retries < self.max_retries: try: backend.create_zone(context, zone) return True except Exception: retries += 1 LOG.exception( _LE("Failed to create zone %(zone)s on " "target %(target)s on attempt %(attempt)d"), {"zone": zone.name, "target": target.id, "attempt": retries}, ) # noqa time.sleep(self.retry_interval) return False
def _execute_knotc(self, *knotc_args, **kw): """Run the Knot client and check the output :param expected_output: expected output (default: 'OK') :type expected_output: str :param expected_error: expected alternative output, will be \ logged as info(). Default: not set. :type expected_error: str """ # Knotc returns "0" even on failure, we have to check for 'OK' # https://gitlab.labs.nic.cz/labs/knot/issues/456 LOG.debug("Executing knotc with %r", knotc_args) expected = kw.get('expected_output', 'OK') expected_alt = kw.get('expected_error', None) try: out, err = execute(self._knotc_cmd_name, *knotc_args) out = out.rstrip() LOG.debug("Command output: %r" % out) if out != expected: if expected_alt is not None and out == expected_alt: LOG.info(_LI("Ignoring error: %r"), out) else: raise ProcessExecutionError(stdout=out, stderr=err) except ProcessExecutionError as e: LOG.error(_LE("Command output: %(out)r Stderr: %(err)r"), { 'out': e.stdout, 'err': e.stderr }) raise exceptions.Backend(e)
def main(): utils.read_config('designate', sys.argv) logging.setup(CONF, 'designate') gmr.TextGuruMeditation.setup_autorun(version) # NOTE(timsim): This is to ensure people don't start the wrong # services when the worker model is enabled. if cfg.CONF['service:worker'].enabled: LOG.error( _LE('You have designate-worker enabled, starting ' 'designate-pool-manager is incompatible with ' 'designate-worker. You need to start ' 'designate-worker instead.')) sys.exit(1) LOG.warning( _LW('designate-pool-manager is DEPRECATED in favor of ' 'designate-worker and will be removed during the Ocata ' 'cycle')) server = pool_manager_service.Service( threads=CONF['service:pool_manager'].threads) hookpoints.log_hook_setup() service.serve(server, workers=CONF['service:pool_manager'].workers) service.wait()
def _delete_domain_on_target(self, context, target, domain): """ :param context: Security context information. :param target: Target to delete Domain from :param domain: Domain to be deleted :return: True/False """ LOG.debug("Deleting domain %s on target %s", domain.name, target.id) backend = self.target_backends[target.id] retries = 0 while retries < self.max_retries: try: backend.delete_domain(context, domain) return True except Exception: retries += 1 LOG.exception( _LE("Failed to delete domain %(domain)s on " "target %(target)s on attempt %(attempt)d"), { 'domain': domain.name, 'target': target.id, 'attempt': retries }) time.sleep(self.retry_interval) return False
def _dns_handle_udp_query(self, sock, addr, payload): """ Handle a DNS Query over UDP :param sock: UDP socket :type sock: socket :param addr: Tuple of the client's (IP, Port) :type addr: tuple :param payload: Raw DNS query payload :type payload: string :raises: None """ try: # Call into the DNS Application itself with the payload and addr for response in self._dns_application( {'payload': payload, 'addr': addr}): # Send back a response only if present if response is not None: sock.sendto(response, addr) except Exception: LOG.exception(_LE("Unhandled exception while processing request " "from %(host)s:%(port)d") % {'host': addr[0], 'port': addr[1]})
def _dns_handle_udp(self, sock_udp): LOG.info(_LI("_handle_udp thread started")) while True: try: # TODO(kiall): Determine the appropriate default value for # UDP recvfrom. payload, addr = sock_udp.recvfrom(8192) LOG.debug("Handling UDP Request from: %(host)s:%(port)d" % {'host': addr[0], 'port': addr[1]}) # Dispatch a thread to handle the query self.tg.add_thread(self._dns_handle, addr, payload, sock_udp=sock_udp) except socket.error as e: errname = errno.errorcode[e.args[0]] LOG.warning( _LW("Socket error %(err)s from: %(host)s:%(port)d") % {'host': addr[0], 'port': addr[1], 'err': errname}) except Exception: LOG.exception(_LE("Unknown exception handling UDP request " "from: %(host)s:%(port)d") % {'host': addr[0], 'port': addr[1]})
def periodic_recovery(self): """ :return: None """ # TODO(kiall): Replace this inter-process-lock with a distributed # lock, likely using the tooz library - see bug 1445127. with lockutils.lock('periodic_recovery', external=True, delay=30): context = DesignateContext.get_admin_context(all_tenants=True) LOG.debug("Starting Periodic Recovery") try: # Handle Deletion Failures domains = self._get_failed_domains(context, DELETE_ACTION) for domain in domains: self.delete_domain(context, domain) # Handle Creation Failures domains = self._get_failed_domains(context, CREATE_ACTION) for domain in domains: self.create_domain(context, domain) # Handle Update Failures domains = self._get_failed_domains(context, UPDATE_ACTION) for domain in domains: self.update_domain(context, domain) except Exception: LOG.exception( _LE('An unhandled exception in periodic ' 'recovery occurred'))
def _dns_handle(self, addr, payload, client=None, sock_udp=None): """ Handle a DNS Query :param addr: Tuple of the client's (IP, Port) :param payload: Raw DNS query payload :param client: Client socket (for TCP only) """ try: # Call into the DNS Application itself with the payload and addr for response in self._dns_application( {'payload': payload, 'addr': addr}): # Send back a response only if present if response is not None: if client: # Handle TCP Responses msg_length = len(response) tcp_response = struct.pack("!H", msg_length) + response client.sendall(tcp_response) else: # Handle UDP Responses sock_udp.sendto(response, addr) except Exception: LOG.exception(_LE("Unhandled exception while processing request " "from %(host)s:%(port)d") % {'host': addr[0], 'port': addr[1]}) # Close the TCP connection if we have one. if client: client.close()
def _delete_zone_on_target(self, context, target, zone): """ :param context: Security context information. :param target: Target to delete Zone from :param zone: Zone to be deleted :return: True/False """ LOG.debug("Deleting zone %s on target %s", zone.name, target.id) backend = self.target_backends[target.id] retries = 0 while retries < self.max_retries: try: backend.delete_zone(context, zone) return True except Exception: retries += 1 LOG.exception(_LE("Failed to delete zone %(zone)s on " "target %(target)s on attempt %(attempt)d"), { 'zone': zone.name, 'target': target.id, 'attempt': retries }) time.sleep(self.retry_interval) return False
def periodic_recovery(self): """ Runs only on the pool leader :return: None """ if not self._pool_election.is_leader: return context = self._get_admin_context_all_tenants() LOG.debug("Starting Periodic Recovery") try: # Handle Deletion Failures zones = self._get_failed_zones(context, DELETE_ACTION) LOG.info(_LI("periodic_recovery:delete_zone needed on %d zones"), len(zones)) for zone in zones: self.delete_zone(context, zone) # Handle Creation Failures zones = self._get_failed_zones(context, CREATE_ACTION) LOG.info(_LI("periodic_recovery:create_zone needed on %d zones"), len(zones)) for zone in zones: self.create_zone(context, zone) # Handle Update Failures zones = self._get_failed_zones(context, UPDATE_ACTION) LOG.info(_LI("periodic_recovery:update_zone needed on %d zones"), len(zones)) for zone in zones: self.update_zone(context, zone) except Exception: LOG.exception(_LE("An unhandled exception in periodic " "recovery occurred"))
def periodic_sync(self): """ :return: None """ LOG.debug("Calling periodic_sync.") context = DesignateContext.get_admin_context(all_tenants=True) criterion = { 'pool_id': cfg.CONF['service:pool_manager'].pool_id, 'status': '%s%s' % ('!', ERROR_STATUS) } periodic_sync_seconds = \ cfg.CONF['service:pool_manager'].periodic_sync_seconds if periodic_sync_seconds is not None: # Generate the current serial, will provide a UTC Unix TS. current = utils.increment_serial() criterion['serial'] = ">%s" % (current - periodic_sync_seconds) domains = self.central_api.find_domains(context, criterion) try: for domain in domains: self.update_domain(context, domain) except Exception: LOG.exception( _LE('An unhandled exception in periodic sync ' 'occurred. This should never happen!'))
def periodic_sync(self): """ :return: None """ # NOTE(kiall): Only run this periodic task on the pool leader if self._pool_election.is_leader: context = DesignateContext.get_admin_context(all_tenants=True) LOG.debug("Starting Periodic Synchronization") criterion = { 'pool_id': CONF['service:pool_manager'].pool_id, 'status': '!%s' % ERROR_STATUS } periodic_sync_seconds = \ CONF['service:pool_manager'].periodic_sync_seconds if periodic_sync_seconds is not None: # Generate the current serial, will provide a UTC Unix TS. current = utils.increment_serial() criterion['serial'] = ">%s" % (current - periodic_sync_seconds) zones = self.central_api.find_zones(context, criterion) try: for zone in zones: # TODO(kiall): If the zone was created within the last # periodic_sync_seconds, attempt to recreate # to fill in targets which may have failed. self.update_zone(context, zone) except Exception: LOG.exception(_LE('An unhandled exception in periodic ' 'synchronization occurred.'))
def periodic_recovery(self): """ :return: None """ # NOTE(kiall): Only run this periodic task on the pool leader if self._pool_election.is_leader: context = DesignateContext.get_admin_context(all_tenants=True) LOG.debug("Starting Periodic Recovery") try: # Handle Deletion Failures zones = self._get_failed_zones(context, DELETE_ACTION) for zone in zones: self.delete_zone(context, zone) # Handle Creation Failures zones = self._get_failed_zones(context, CREATE_ACTION) for zone in zones: self.create_zone(context, zone) # Handle Update Failures zones = self._get_failed_zones(context, UPDATE_ACTION) for zone in zones: self.update_zone(context, zone) except Exception: LOG.exception(_LE('An unhandled exception in periodic ' 'recovery occurred'))
def periodic_sync(self): """Periodically sync all the zones that are not in ERROR status Runs only on the pool leader :return: None """ if not self._pool_election.is_leader: return LOG.debug("Starting Periodic Synchronization") context = self._get_admin_context_all_tenants() zones = self._fetch_healthy_zones(context) zones = set(zones) # TODO(kiall): If the zone was created within the last # periodic_sync_seconds, attempt to recreate # to fill in targets which may have failed. retry_gen = _constant_retries(self._periodic_sync_max_attempts, self._periodic_sync_retry_interval) for is_last_cycle in retry_gen: zones_in_error = [] for zone in zones: try: success = self.update_zone(context, zone) if not success: zones_in_error.append(zone) except Exception: LOG.exception(_LE("An unhandled exception in periodic " "synchronization occurred.")) zones_in_error.append(zone) if not zones_in_error: return zones = zones_in_error for zone in zones_in_error: self.central_api.update_status(context, zone.id, ERROR_STATUS, zone.serial)
def periodic_recovery(self): """ :return: None """ # TODO(kiall): Replace this inter-process-lock with a distributed # lock, likely using the tooz library - see bug 1445127. with lockutils.lock('periodic_recovery', external=True, delay=30): context = DesignateContext.get_admin_context(all_tenants=True) LOG.debug("Starting Periodic Recovery") try: # Handle Deletion Failures domains = self._get_failed_domains(context, DELETE_ACTION) for domain in domains: self.delete_domain(context, domain) # Handle Creation Failures domains = self._get_failed_domains(context, CREATE_ACTION) for domain in domains: self.create_domain(context, domain) # Handle Update Failures domains = self._get_failed_domains(context, UPDATE_ACTION) for domain in domains: self.update_domain(context, domain) except Exception: LOG.exception(_LE('An unhandled exception in periodic ' 'recovery occurred'))
def _call(endpoint, region, *args, **kw): client = get_client(context, endpoint=endpoint) LOG.debug("Attempting to fetch FloatingIPs from %s @ %s", endpoint, region) try: fips = client.list_floatingips(*args, **kw) except neutron_exceptions.Unauthorized as e: # NOTE: 401 might be that the user doesn't have neutron # activated in a particular region, we'll just log the failure # and go on with our lives. LOG.warning( _LW("Calling Neutron resulted in a 401, " "please investigate.")) LOG.exception(e) return except Exception as e: LOG.error( _LE('Failed calling Neutron ' '%(region)s - %(endpoint)s'), { 'region': region, 'endpoint': endpoint }) LOG.exception(e) failed.append((e, endpoint, region)) return for fip in fips['floatingips']: data.append({ 'id': fip['id'], 'address': fip['floating_ip_address'], 'region': region }) LOG.debug("Added %i FloatingIPs from %s @ %s", len(data), endpoint, region)
def _dns_handle_udp(self): LOG.info(_LI("_handle_udp thread started")) while True: try: # TODO(kiall): Determine the appropriate default value for # UDP recvfrom. payload, addr = self._dns_sock_udp.recvfrom(8192) LOG.debug("Handling UDP Request from: %(host)s:%(port)d" % { 'host': addr[0], 'port': addr[1] }) # Dispatch a thread to handle the query self.tg.add_thread(self._dns_handle, addr, payload) except socket.error as e: errname = errno.errorcode[e.args[0]] LOG.warn( _LW("Socket error %(err)s from: %(host)s:%(port)d") % { 'host': addr[0], 'port': addr[1], 'err': errname }) except Exception: LOG.exception( _LE("Unknown exception handling UDP request " "from: %(host)s:%(port)d") % { 'host': addr[0], 'port': addr[1] })
def _handle_errors(self, request, exception): response = {} headers = [ ('Content-Type', 'application/json'), ] rendered_errors = DesignateAdapter.render( self.api_version, exception.errors, failed_object=exception.object) url = getattr(request, 'url', None) response['code'] = exception.error_code response['type'] = exception.error_type or 'unknown' response['errors'] = rendered_errors # Return the new response if 'context' in request.environ: response['request_id'] = request.environ['context'].request_id notifications.send_api_fault(request.environ['context'], url, response['code'], exception) else: # TODO(ekarlso): Remove after verifying that there's actually a # context always set LOG.error(_LE('Missing context in request, please check.')) return flask.Response(status=exception.error_code, headers=headers, response=json.dumps(response))
def _call(endpoint, region, *args, **kw): client = get_client(context, endpoint=endpoint) LOG.debug("Attempting to fetch FloatingIPs from %s @ %s" % (endpoint, region)) try: fips = client.list_floatingips(*args, **kw) except neutron_exceptions.Unauthorized as e: # NOTE: 401 might be that the user doesn't have neutron # activated in a particular region, we'll just log the failure # and go on with our lives. LOG.warn(_LW("Calling Neutron resulted in a 401, " "please investigate.")) LOG.exception(e) return except Exception as e: LOG.error(_LE('Failed calling Neutron ' '%(region)s - %(endpoint)s') % {'region': region, 'endpoint': endpoint}) LOG.exception(e) failed.append((e, endpoint, region)) return for fip in fips['floatingips']: data.append({ 'id': fip['id'], 'address': fip['floating_ip_address'], 'region': region }) LOG.debug("Added %i FloatingIPs from %s @ %s" % (len(data), endpoint, region))
def _delete_domain_on_target(self, context, target, domain): """ :param context: Security context information. :param target: Target to delete Domain from :param domain: Domain to be deleted :return: True/False """ LOG.debug("Deleting domain %s on target %s", domain.name, target.id) backend = self.target_backends[target.id] retries = 0 while retries < self.max_retries: try: backend.delete_domain(context, domain) return True except Exception: retries += 1 LOG.exception(_LE("Failed to delete domain %(domain)s on " "target %(target)s on attempt %(attempt)d"), { 'domain': domain.name, 'target': target.id, 'attempt': retries }) time.sleep(self.retry_interval) return False
def _handle_exception(self, request, e, status=500, response=None): response = response or {} # Log the exception ASAP unless it is a 404 Not Found if not getattr(e, 'expected', False): LOG.exception(e) headers = [ ('Content-Type', 'application/json'), ] url = getattr(request, 'url', None) # Set a response code and type, if they are missing. if 'code' not in response: response['code'] = status if 'type' not in response: response['type'] = 'unknown' # Return the new response if 'context' in request.environ: response['request_id'] = request.environ['context'].request_id notifications.send_api_fault(request.environ['context'], url, response['code'], e) else: # TODO(ekarlso): Remove after verifying that there's actually a # context always set LOG.error(_LE('Missing context in request, please check.')) return flask.Response(status=status, headers=headers, response=json.dumps(response))
def _dns_handle_udp_query(self, sock, addr, payload): """ Handle a DNS Query over UDP :param sock: UDP socket :type sock: socket :param addr: Tuple of the client's (IP, Port) :type addr: tuple :param payload: Raw DNS query payload :type payload: string :raises: None """ try: # Call into the DNS Application itself with the payload and addr for response in self._dns_application({ 'payload': payload, 'addr': addr }): # Send back a response only if present if response is not None: sock.sendto(response, addr) except Exception: LOG.exception( _LE("Unhandled exception while processing request " "from %(host)s:%(port)d") % { 'host': addr[0], 'port': addr[1] })
def _extract_zones(self): zones = [] for zone in self._zone_regex.finditer(self._conf): content = zone.group('content') name = zone.group('name') # Make sure it's a master zone: if self._type_master_regex.search(content): zonefile = self._zonefile_regex.search(content).group('file') try: zone_object = dns.zone.from_file(zonefile, allow_include=True) except dns.zone.UnknownOrigin: LOG.info( _LI('%(zonefile)s is missing $ORIGIN, ' 'inserting %(name)s'), { 'zonefile': zonefile, 'name': name }) zone_object = dns.zone.from_file(zonefile, allow_include=True, origin=name) except dns.zone.NoSOA: LOG.error(_LE('%s has no SOA') % zonefile) zones.append(Zone(zone_object)) return zones
def __call__(self, request): # Generate the initial context. This may be updated by other middleware # as we learn more information about the Request. ctxt = context.DesignateContext.get_admin_context(all_tenants=True) try: message = dns.message.from_wire(request['payload'], self.tsig_keyring) if message.had_tsig: LOG.debug('Request signed with TSIG key: %s', message.keyname) # Create + Attach the initial "environ" dict. This is similar to # the environ dict used in typical WSGI middleware. message.environ = { 'context': ctxt, 'addr': request['addr'], } except dns.message.UnknownTSIGKey: LOG.error(_LE("Unknown TSIG key from %(host)s:" "%(port)d") % {'host': request['addr'][0], 'port': request['addr'][1]}) response = self._build_error_response() except dns.tsig.BadSignature: LOG.error(_LE("Invalid TSIG signature from %(host)s:" "%(port)d") % {'host': request['addr'][0], 'port': request['addr'][1]}) response = self._build_error_response() except dns.exception.DNSException: LOG.error(_LE("Failed to deserialize packet from %(host)s:" "%(port)d") % {'host': request['addr'][0], 'port': request['addr'][1]}) response = self._build_error_response() else: # Hand the Deserialized packet onto the Application response = self.application(message) # Serialize and return the response if present if response is not None: return response.to_wire()
def wrapper(*a, **kw): try: return fn(*a, **kw) except exceptions.Backend as e: raise e except Exception as e: LOG.error(_LE("Unhandled exception %s"), e.message, exc_info=True) raise exceptions.Backend(e.message)
def _coordinator_heartbeat(self): if not self._coordination_started: return try: self._coordinator.heartbeat() except tooz.coordination.ToozError: LOG.exception(_LE("Error sending a heartbeat to coordination " "backend."))
def wrapper(*a, **kw): try: return fn(*a, **kw) except exceptions.Backend: raise except Exception as e: LOG.error(_LE("Unhandled exception %s"), str(e), exc_info=True) raise exceptions.Backend(str(e))
def _coordinator_heartbeat(self): if not self._coordination_started: return try: self._coordinator.heartbeat() except tooz.coordination.ToozError: LOG.exception(_LE('Error sending a heartbeat to coordination ' 'backend.'))
def _dns_handle_tcp(self, sock_tcp): LOG.info(_LI("_handle_tcp thread started")) while True: try: client, addr = sock_tcp.accept() if self._service_config.tcp_recv_timeout: client.settimeout(self._service_config.tcp_recv_timeout) LOG.debug("Handling TCP Request from: %(host)s:%(port)d" % {'host': addr[0], 'port': addr[1]}) # Prepare a variable for the payload to be buffered payload = "" # Receive the first 2 bytes containing the payload length expected_length_raw = client.recv(2) (expected_length, ) = struct.unpack('!H', expected_length_raw) # Keep receiving data until we've got all the data we expect while len(payload) < expected_length: data = client.recv(65535) if not data: break payload += data # NOTE: Any uncaught exceptions will result in the main loop # ending unexpectedly. Ensure proper ordering of blocks, and # ensure no exceptions are generated from within. except socket.timeout: client.close() LOG.warning(_LW("TCP Timeout from: %(host)s:%(port)d") % {'host': addr[0], 'port': addr[1]}) except socket.error as e: client.close() errname = errno.errorcode[e.args[0]] LOG.warning( _LW("Socket error %(err)s from: %(host)s:%(port)d") % {'host': addr[0], 'port': addr[1], 'err': errname}) except struct.error: client.close() LOG.warning(_LW("Invalid packet from: %(host)s:%(port)d") % {'host': addr[0], 'port': addr[1]}) except Exception: client.close() LOG.exception(_LE("Unknown exception handling TCP request " "from: %(host)s:%(port)d") % {'host': addr[0], 'port': addr[1]}) else: # Dispatch a thread to handle the query self.tg.add_thread(self._dns_handle, addr, payload, client=client)
def _get_members(self, group_id): get_members_req = self._coordinator.get_members(group_id) try: return get_members_req.get() except tooz.coordination.GroupNotCreated: LOG.error(_LE('Attempting to partition over a non-existent group: ' '%s'), self._group_id) raise
def _dns_handle_tcp(self): LOG.info(_LI("_handle_tcp thread started")) while True: try: client, addr = self._dns_sock_tcp.accept() if self._service_config.tcp_recv_timeout: client.settimeout(self._service_config.tcp_recv_timeout) LOG.debug("Handling TCP Request from: %(host)s:%(port)d" % {'host': addr[0], 'port': addr[1]}) # Prepare a variable for the payload to be buffered payload = "" # Receive the first 2 bytes containing the payload length expected_length_raw = client.recv(2) (expected_length, ) = struct.unpack('!H', expected_length_raw) # Keep receiving data until we've got all the data we expect while len(payload) < expected_length: data = client.recv(65535) if not data: break payload += data except socket.error as e: client.close() errname = errno.errorcode[e.args[0]] LOG.warn(_LW("Socket error %(err)s from: %(host)s:%(port)d") % {'host': addr[0], 'port': addr[1], 'err': errname}) except socket.timeout: client.close() LOG.warn(_LW("TCP Timeout from: %(host)s:%(port)d") % {'host': addr[0], 'port': addr[1]}) except struct.error: client.close() LOG.warn(_LW("Invalid packet from: %(host)s:%(port)d") % {'host': addr[0], 'port': addr[1]}) except Exception: client.close() LOG.exception(_LE("Unknown exception handling TCP request " "from: %(host)s:%(port)d") % {'host': addr[0], 'port': addr[1]}) else: # Dispatch a thread to handle the query self.tg.add_thread(self._dns_handle, addr, payload, client=client)
def _dns_handle_tcp(self, sock_tcp): LOG.info(_LI("_handle_tcp thread started")) while True: try: # handle a new TCP connection client, addr = sock_tcp.accept() if self._service_config.tcp_recv_timeout: client.settimeout(self._service_config.tcp_recv_timeout) LOG.debug("Handling TCP Request from: %(host)s:%(port)d" % { 'host': addr[0], 'port': addr[1] }) # Dispatch a thread to handle the connection self.tg.add_thread(self._dns_handle_tcp_conn, addr, client) # NOTE: Any uncaught exceptions will result in the main loop # ending unexpectedly. Ensure proper ordering of blocks, and # ensure no exceptions are generated from within. except socket.timeout: client.close() LOG.warning( _LW("TCP Timeout from: %(host)s:%(port)d") % { 'host': addr[0], 'port': addr[1] }) except socket.error as e: client.close() errname = errno.errorcode[e.args[0]] LOG.warning( _LW("Socket error %(err)s from: %(host)s:%(port)d") % { 'host': addr[0], 'port': addr[1], 'err': errname }) except Exception: client.close() LOG.exception( _LE("Unknown exception handling TCP request " "from: %(host)s:%(port)d") % { 'host': addr[0], 'port': addr[1] })
def periodic_recovery(self): """ :return: """ LOG.debug("Calling periodic_recovery.") context = DesignateContext.get_admin_context(all_tenants=True) try: self._periodic_delete_domains_that_failed(context) self._periodic_create_domains_that_failed(context) self._periodic_update_domains_that_failed(context) except Exception: LOG.exception( _LE('An unhandled exception in periodic recovery ' 'occurred. This should never happen!'))
def _handle_create(self, request): response = dns.message.make_response(request) question = request.question[0] requester = request.environ['addr'][0] zone_name = question.name.to_text().decode('utf-8') if not self._allowed(request, requester, "CREATE", zone_name): response.set_rcode(dns.rcode.from_text("REFUSED")) return response serial = self.backend.find_zone_serial(zone_name) if serial is not None: # Does this warrant a warning? # There is a race condition between checking if the zone exists # and creating it. LOG.warning(_LW("Not creating %(name)s, zone already exists"), {'name': zone_name}) # Provide an authoritative answer response.flags |= dns.flags.AA return response LOG.debug("Received %(verb)s for %(name)s from %(host)s", { 'verb': "CREATE", 'name': zone_name, 'host': requester }) try: # Receive an AXFR from MiniDNS to populate the zone zone = dnsutils.do_axfr(zone_name, self.masters, source=self.transfer_source) self.backend.create_zone(zone) except Exception as e: # TODO(Federico) unknown exceptions should be logged with a full # traceback. Same in the other methods. LOG.error(_LE("Exception while creating zone %r"), e) response.set_rcode(dns.rcode.from_text("SERVFAIL")) return response # Provide an authoritative answer response.flags |= dns.flags.AA return response