def get_cluster_topology(self, seed_nodes): cluster_names = list() self.schema_versions = list() self.status_topology_hash = list() topologies = list() count = 0 tried_hosts = [] for host in seed_nodes: tried_hosts.append(host) conn = self._connection(host) describe_res = self.run_nodetool(conn, "describecluster") status_res = self.run_nodetool(conn, "status") if (describe_res.status == 0) and (status_res.status == 0): (cluster_name, schema_version) = cstar.nodetoolparser.parse_describe_cluster(describe_res.out) if cluster_name not in cluster_names: cluster_names.append(cluster_name) topologies.append(cstar.nodetoolparser.parse_nodetool_status(status_res.out, cluster_name, self.reverse_dns_preheat, self.resolve_hostnames)) self.schema_versions.append(schema_version) self.status_topology_hash.append(topologies[len(topologies) - 1].get_hash()) count += 1 if count >= MAX_ATTEMPTS: break if len(topologies) > 0: final_topology = set() for i in range(len(topologies)): final_topology.update(topologies[i].hosts) return Topology(final_topology) raise HostIsDown("Could not find any working host while fetching topology. Is Cassandra actually running? Tried the following hosts:", ", ".join([x.ip for x in tried_hosts]))
def get_endpoint_mapping(self, topology): count = 0 tried_hosts = [] for host in topology.get_up(): tried_hosts.append(host) conn = self._connection(host) mappings = [] if self.key_space: keyspaces = [self.key_space] else: keyspaces = self.get_keyspaces(conn) for keyspace in keyspaces: if keyspace != "system": debug("Fetching endpoint mapping for keyspace", keyspace) res = conn.run(("nodetool", "describering", keyspace)) has_error = False if res.status != 0: has_error = True break describering = cstar.nodetoolparser.parse_nodetool_describering(res.out) range_mapping = cstar.nodetoolparser.convert_describering_to_range_mapping(describering) mappings.append(cstar.endpoint_mapping.parse(range_mapping, topology, lookup=ip_lookup)) if not has_error: return cstar.endpoint_mapping.merge(mappings) count += 1 if count >= MAX_ATTEMPTS: break raise HostIsDown("Could not find any working host while fetching endpoint mapping. Tried the following hosts:", ", ".join(host.fqdn for host in tried_hosts))
def find_next_host(strategy, topology, endpoint_mapping, progress, cluster_parallel, dc_parallel, max_concurrency, stop_after, ignore_down_nodes): """Entry point for figuring out which host to do things on next.""" if stop_after and ((len(progress.running) + len(progress.done) + len(progress.failed)) >= stop_after): return None remaining = topology.without_hosts(progress.done).without_hosts(progress.running).without_hosts(progress.failed) if progress.running and not cluster_parallel: remaining = remaining.with_cluster(next(iter(progress.running)).cluster) if progress.running and not dc_parallel: remaining = remaining.with_dc(next(iter(progress.running)).dc) if not remaining: return None if max_concurrency and (len(progress.running) >= max_concurrency): return None if not ignore_down_nodes: for host in remaining: if not host.is_up: raise HostIsDown(host) return _strategy_mapping[strategy](remaining, endpoint_mapping, progress.running)
def get_endpoint_mapping(self, topology): clusters = [] failed_hosts = [] mappings = [] count = 0 endpoint_mappings = self.maybe_get_data_from_cache("endpoint_mapping") if endpoint_mappings is not None: return endpoint_mappings for host in topology.get_up(): if host.cluster in clusters: # We need to fetch keyspaces on one node per cluster, no more. continue count = 0 conn = self._connection(host) if self.key_space: keyspaces = [self.key_space] else: keyspaces = self.get_keyspaces(conn) has_error = True for keyspace in keyspaces: if not keyspace.startswith("system"): debug("Fetching endpoint mapping for keyspace", keyspace) res = self.run_nodetool(conn, *("describering", keyspace)) has_error = False if res.status != 0 and not keyspace.startswith("system"): has_error = True break describering = cstar.nodetoolparser.parse_nodetool_describering( res.out) range_mapping = cstar.nodetoolparser.convert_describering_to_range_mapping( describering) mappings.append( cstar.endpoint_mapping.parse(range_mapping, topology, lookup=ip_lookup)) if has_error: if count >= MAX_ATTEMPTS: failed_hosts += host break else: clusters.append(host.cluster) count += 1 if failed_hosts: raise HostIsDown("Following hosts couldn't be reached: {}".format( ', '.join(host.fqdn for host in failed_hosts))) endpoint_mappings = cstar.endpoint_mapping.merge(mappings) pickle.dump(dict(endpoint_mappings), open(self.get_cache_file_path("endpoint_mapping"), 'wb')) return endpoint_mappings
def get_cluster_topology(self, seed_nodes): count = 0 tried_hosts = [] for host in seed_nodes: tried_hosts.append(host) conn = self._connection(host) describe_res = conn.run(("nodetool", "describecluster")) topology_res = conn.run(("nodetool", "ring")) if (describe_res.status == 0) and (topology_res.status == 0): cluster_name = cstar.nodetoolparser.parse_describe_cluster(describe_res.out) topology = cstar.nodetoolparser.parse_nodetool_ring(topology_res.out, cluster_name, self.reverse_dns_preheat) return topology count += 1 if count >= MAX_ATTEMPTS: break raise HostIsDown("Could not find any working host while fetching topology. Is Cassandra actually running? Tried the following hosts:", ", ".join(tried_hosts))
def run(self): self.do_loop = True cstar.jobwriter.write(self) if not self.state.is_healthy(): raise HostIsDown( "Can't run job because hosts are down: " + ", ".join( host.fqdn for host in self.state.current_topology.get_down())) while self.do_loop: self.schedule_all_runnable_jobs() if self.state.is_done(): self.do_loop = False self.wait_for_any_job() self.wait_for_all_jobs() cstar.jobprinter.print_progress(self.state.original_topology, self.state.progress, self.state.current_topology.get_down()) self.print_outcome()