def __zookeeper_path(dist_directory: str = MC_DIST_DIR, zookeeper_version: str = MC_ZOOKEEPER_VERSION) -> str: """Return path to where ZooKeeper distribution should be located.""" dist_path = resolve_absolute_path_under_mc_root(path=dist_directory) zookeeper_directory = "zookeeper-%s" % zookeeper_version solr_path = os.path.join(dist_path, zookeeper_directory) return solr_path
def run_solr_standalone( hostname: str = fqdn(), port: int = MC_SOLR_STANDALONE_PORT, base_data_dir: str = MC_SOLR_BASE_DATA_DIR, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION, jvm_heap_size: str = MC_SOLR_STANDALONE_JVM_HEAP_SIZE, ): """Run standalone instance of Solr.""" if not __solr_is_installed(dist_directory=dist_directory, solr_version=solr_version): l.info("Solr is not installed, installing...") __install_solr(dist_directory=dist_directory, solr_version=solr_version) base_data_dir = resolve_absolute_path_under_mc_root(path=base_data_dir, must_exist=True) standalone_data_dir = __standalone_data_dir(base_data_dir=base_data_dir) if tcp_port_is_open(port=port): raise Exception("Port %d is already open on this machine." % port) l.info("Starting standalone Solr instance on port %d..." % port) __run_solr( hostname=hostname, port=port, instance_data_dir=standalone_data_dir, jvm_heap_size=jvm_heap_size, jvm_opts=MC_SOLR_STANDALONE_JVM_OPTS, connect_timeout=MC_SOLR_STANDALONE_CONNECT_RETRIES, dist_directory=dist_directory, solr_version=solr_version, )
def run_solr_standalone(hostname: str = fqdn(), port: int = MC_SOLR_STANDALONE_PORT, base_data_dir: str = MC_SOLR_BASE_DATA_DIR, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION, jvm_heap_size: str = MC_SOLR_STANDALONE_JVM_HEAP_SIZE): """Run standalone instance of Solr.""" if not __solr_is_installed(dist_directory=dist_directory, solr_version=solr_version): l.info("Solr is not installed, installing...") __install_solr(dist_directory=dist_directory, solr_version=solr_version) base_data_dir = resolve_absolute_path_under_mc_root(path=base_data_dir, must_exist=True) standalone_data_dir = __standalone_data_dir(base_data_dir=base_data_dir) if tcp_port_is_open(port=port): raise McSolrRunException("Port %d is already open on this machine." % port) l.info("Starting standalone Solr instance on port %d..." % port) __run_solr(hostname=hostname, port=port, instance_data_dir=standalone_data_dir, jvm_heap_size=jvm_heap_size, jvm_opts=MC_SOLR_STANDALONE_JVM_OPTS, connect_timeout=MC_SOLR_STANDALONE_CONNECT_RETRIES, dist_directory=dist_directory, solr_version=solr_version)
def test_resolve_absolute_path_under_mc_root(): path = mc_paths.resolve_absolute_path_under_mc_root(path='.', must_exist=True) assert len(path) > 0 # Path that exists path = mc_paths.resolve_absolute_path_under_mc_root(path='mediawords.yml', must_exist=True) assert len(path) > 0 assert os.path.isfile(path) is True # Path that does not exist path = mc_paths.resolve_absolute_path_under_mc_root( path='TOTALLY_DOES_NOT_EXIST', must_exist=False) assert len(path) > 0 assert os.path.isfile(path) is False
def __solr_path(dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION) -> str: """Return path to where Solr distribution should be located.""" dist_path = resolve_absolute_path_under_mc_root(path=dist_directory, must_exist=True) solr_directory = "solr-%s" % solr_version solr_path = os.path.join(dist_path, solr_directory) return solr_path
def run_solr_shard(shard_num: int, shard_count: int, hostname: str = None, starting_port: int = MC_SOLR_CLUSTER_STARTING_PORT, base_data_dir: str = MC_SOLR_BASE_DATA_DIR, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION, zookeeper_host: str = MC_SOLR_CLUSTER_ZOOKEEPER_HOST, zookeeper_port: int = MC_SOLR_CLUSTER_ZOOKEEPER_PORT, jvm_heap_size: str = MC_SOLR_CLUSTER_JVM_HEAP_SIZE) -> None: """Run Solr shard, install Solr if needed; read configuration from ZooKeeper.""" if shard_num < 1: raise McSolrRunException("Shard number must be 1 or greater.") if shard_count < 1: raise McSolrRunException("Shard count must be 1 or greater.") if not __solr_is_installed(dist_directory=dist_directory, solr_version=solr_version): log.info("Solr is not installed, installing...") __install_solr(dist_directory=dist_directory, solr_version=solr_version) if hostname is None: hostname = fqdn() base_data_dir = resolve_absolute_path_under_mc_root(path=base_data_dir, must_exist=True) shard_port = __shard_port(shard_num=shard_num, starting_port=starting_port) shard_data_dir = __shard_data_dir(shard_num=shard_num, base_data_dir=base_data_dir) log.info("Waiting for ZooKeeper to start on %s:%d..." % (zookeeper_host, zookeeper_port)) wait_for_tcp_port_to_open( hostname=zookeeper_host, port=zookeeper_port, retries=MC_SOLR_CLUSTER_ZOOKEEPER_CONNECT_RETRIES) log.info("ZooKeeper is up!") log.info("Starting Solr shard %d on port %d..." % (shard_num, shard_port)) # noinspection SpellCheckingInspection shard_args = [ "-DzkHost=%s:%d" % (zookeeper_host, zookeeper_port), "-DnumShards=%d" % shard_count, ] __run_solr(hostname=hostname, port=shard_port, instance_data_dir=shard_data_dir, jvm_heap_size=jvm_heap_size, jvm_opts=MC_SOLR_CLUSTER_JVM_OPTS, start_jar_args=shard_args, connect_timeout=MC_SOLR_CLUSTER_CONNECT_RETRIES, dist_directory=dist_directory, solr_version=solr_version)
def run_solr_shard(shard_num: int, shard_count: int, hostname: str = None, starting_port: int = MC_SOLR_CLUSTER_STARTING_PORT, base_data_dir: str = MC_SOLR_BASE_DATA_DIR, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION, zookeeper_host: str = MC_SOLR_CLUSTER_ZOOKEEPER_HOST, zookeeper_port: int = MC_SOLR_CLUSTER_ZOOKEEPER_PORT, jvm_heap_size: str = MC_SOLR_CLUSTER_JVM_HEAP_SIZE) -> None: """Run Solr shard, install Solr if needed; read configuration from ZooKeeper.""" if shard_num < 1: raise McSolrRunException("Shard number must be 1 or greater.") if shard_count < 1: raise McSolrRunException("Shard count must be 1 or greater.") if not __solr_is_installed(dist_directory=dist_directory, solr_version=solr_version): log.info("Solr is not installed, installing...") __install_solr(dist_directory=dist_directory, solr_version=solr_version) if hostname is None: hostname = fqdn() base_data_dir = resolve_absolute_path_under_mc_root(path=base_data_dir, must_exist=True) shard_port = __shard_port(shard_num=shard_num, starting_port=starting_port) shard_data_dir = __shard_data_dir(shard_num=shard_num, base_data_dir=base_data_dir) log.info("Waiting for ZooKeeper to start on %s:%d..." % (zookeeper_host, zookeeper_port)) wait_for_tcp_port_to_open(hostname=zookeeper_host, port=zookeeper_port, retries=MC_SOLR_CLUSTER_ZOOKEEPER_CONNECT_RETRIES) log.info("ZooKeeper is up!") log.info("Starting Solr shard %d on port %d..." % (shard_num, shard_port)) # noinspection SpellCheckingInspection shard_args = [ "-DzkHost=%s:%d" % (zookeeper_host, zookeeper_port), "-DnumShards=%d" % shard_count, ] __run_solr(hostname=hostname, port=shard_port, instance_data_dir=shard_data_dir, jvm_heap_size=jvm_heap_size, jvm_opts=MC_SOLR_CLUSTER_JVM_OPTS, start_jar_args=shard_args, connect_timeout=MC_SOLR_CLUSTER_CONNECT_RETRIES, dist_directory=dist_directory, solr_version=solr_version)
def upgrade_lucene_standalone_index(base_data_dir: str = MC_SOLR_BASE_DATA_DIR, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION): """Upgrade Lucene index using the IndexUpgrader tool to standalone instance.""" base_data_dir = resolve_absolute_path_under_mc_root(path=base_data_dir, must_exist=True) log.info("Making sure standalone instance isn't running...") port = MC_SOLR_STANDALONE_PORT if tcp_port_is_open(port=port): raise McSolrRunException("Solr standalone instance is running on port %d." % port) log.info("Made sure standalone instance isn't running.") log.info("Upgrading standalone instance indexes...") standalone_data_dir = __standalone_data_dir(base_data_dir=base_data_dir) __upgrade_lucene_index(instance_data_dir=standalone_data_dir, dist_directory=dist_directory, solr_version=solr_version) log.info("Upgraded standalone instance indexes...")
def upgrade_lucene_standalone_index( base_data_dir: str = MC_SOLR_BASE_DATA_DIR, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION ): """Upgrade Lucene index using the IndexUpgrader tool to standalone instance.""" base_data_dir = resolve_absolute_path_under_mc_root(path=base_data_dir, must_exist=True) l.info("Making sure standalone instance isn't running...") port = MC_SOLR_STANDALONE_PORT if tcp_port_is_open(port=port): raise Exception("Solr standalone instance is running on port %d." % port) l.info("Made sure standalone instance isn't running.") l.info("Upgrading standalone instance indexes...") standalone_data_dir = __standalone_data_dir(base_data_dir=base_data_dir) __upgrade_lucene_index( instance_data_dir=standalone_data_dir, dist_directory=dist_directory, solr_version=solr_version ) l.info("Upgraded standalone instance indexes...")
def upgrade_lucene_shards_indexes(base_data_dir: str = MC_SOLR_BASE_DATA_DIR, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION): """Upgrade Lucene indexes using the IndexUpgrader tool to all shards.""" base_data_dir = resolve_absolute_path_under_mc_root(path=base_data_dir, must_exist=True) # Try to guess shard count from how many shards are in data directory l.info("Looking for shards...") shard_num = 0 shard_count = 0 while True: shard_num += 1 shard_data_dir = __shard_data_dir(shard_num=shard_num, base_data_dir=base_data_dir) if os.path.isdir(shard_data_dir): shard_count += 1 else: break if shard_count < 2: raise McSolrRunException("Found less than 2 shards.") l.info("Found %d shards." % shard_count) l.info("Making sure shards aren't running...") for shard_num in range(1, shard_count + 1): shard_port = __shard_port(shard_num=shard_num, starting_port=MC_SOLR_CLUSTER_STARTING_PORT) if tcp_port_is_open(port=shard_port): raise McSolrRunException("Solr shard %d is running on port %d." % (shard_num, shard_port)) l.info("Made sure shards aren't running.") l.info("Upgrading shard indexes...") for shard_num in range(1, shard_count + 1): shard_data_dir = __shard_data_dir(shard_num=shard_num, base_data_dir=base_data_dir) __upgrade_lucene_index(instance_data_dir=shard_data_dir, dist_directory=dist_directory, solr_version=solr_version) l.info("Upgraded shard indexes.")
def upgrade_lucene_shards_indexes( base_data_dir: str = MC_SOLR_BASE_DATA_DIR, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION ): """Upgrade Lucene indexes using the IndexUpgrader tool to all shards.""" base_data_dir = resolve_absolute_path_under_mc_root(path=base_data_dir, must_exist=True) # Try to guess shard count from how many shards are in data directory l.info("Looking for shards...") shard_num = 0 shard_count = 0 while True: shard_num += 1 shard_data_dir = __shard_data_dir(shard_num=shard_num, base_data_dir=base_data_dir) if os.path.isdir(shard_data_dir): shard_count += 1 else: break if shard_count < 2: raise Exception("Found less than 2 shards.") l.info("Found %d shards." % shard_count) l.info("Making sure shards aren't running...") for shard_num in range(1, shard_count + 1): shard_port = __shard_port(shard_num=shard_num, starting_port=MC_SOLR_CLUSTER_STARTING_PORT) if tcp_port_is_open(port=shard_port): raise Exception("Solr shard %d is running on port %d." % (shard_num, shard_port)) l.info("Made sure shards aren't running.") l.info("Upgrading shard indexes...") for shard_num in range(1, shard_count + 1): shard_data_dir = __shard_data_dir(shard_num=shard_num, base_data_dir=base_data_dir) __upgrade_lucene_index( instance_data_dir=shard_data_dir, dist_directory=dist_directory, solr_version=solr_version ) l.info("Upgraded shard indexes.")
def run_zookeeper(dist_directory: str = MC_DIST_DIR, listen: str = MC_ZOOKEEPER_LISTEN, port: int = MC_ZOOKEEPER_PORT, data_dir: str = MC_SOLR_BASE_DATA_DIR, zookeeper_version: str = MC_ZOOKEEPER_VERSION, solr_version: str = MC_SOLR_VERSION) -> None: """Run ZooKeeper, install if needed too.""" if not __zookeeper_is_installed(): log.info("ZooKeeper is not installed, installing...") __install_zookeeper() data_dir = resolve_absolute_path_under_mc_root(path=data_dir, must_exist=True) zookeeper_data_dir = os.path.join(data_dir, "mediacloud-cluster-zookeeper") if not os.path.isdir(zookeeper_data_dir): log.info("Creating data directory at %s..." % zookeeper_data_dir) mkdir_p(zookeeper_data_dir) if tcp_port_is_open(port=port): raise McZooKeeperRunException("Port %d is already open on this machine." % port) zookeeper_path = __zookeeper_path(dist_directory=dist_directory, zookeeper_version=zookeeper_version) zkserver_path = os.path.join(zookeeper_path, "bin", "zkServer.sh") if not os.path.isfile(zkserver_path): raise McZooKeeperRunException("zkServer.sh at '%s' was not found." % zkserver_path) log4j_properties_path = os.path.join(zookeeper_path, "conf", "log4j.properties") if not os.path.isfile(log4j_properties_path): raise McZooKeeperRunException("log4j.properties at '%s' was not found.") zoo_cnf_path = os.path.join(zookeeper_data_dir, "zoo.cfg") log.info("Creating zoo.cfg in '%s'..." % zoo_cnf_path) with open(zoo_cnf_path, 'w') as zoo_cnf: zoo_cnf.write(""" # # This file is autogenerated. Please do not modify it! # clientPortAddress=%(listen)s clientPort=%(port)d dataDir=%(data_dir)s # Must be between zkClientTimeout / 2 and zkClientTimeout / 20 tickTime=30000 initLimit=10 syncLimit=10 """ % { "listen": listen, "port": port, "data_dir": zookeeper_data_dir, }) zookeeper_env = os.environ.copy() zookeeper_env["ZOOCFGDIR"] = zookeeper_data_dir # Serves as configuration dir too zookeeper_env["ZOOCFG"] = "zoo.cfg" zookeeper_env["ZOO_LOG_DIR"] = zookeeper_data_dir zookeeper_env["SERVER_JVMFLAGS"] = "-Dlog4j.configuration=file://" + os.path.abspath(log4j_properties_path) args = [ zkserver_path, "start-foreground" ] log.info("Starting ZooKeeper on %s:%d..." % (listen, port)) log.debug("Running command: %s" % str(args)) log.debug("Environment variables: %s" % str(zookeeper_env)) process = subprocess.Popen(args, env=zookeeper_env) global __zookeeper_pid __zookeeper_pid = process.pid # Declare that we don't care about the exit code of the child process so # it doesn't become a zombie when it gets killed in signal handler signal.signal(signal.SIGCHLD, signal.SIG_IGN) signal.signal(signal.SIGTERM, __kill_zookeeper_process) # SIGTERM is handled differently for whatever reason atexit.register(__kill_zookeeper_process) log.info("ZooKeeper PID: %d" % __zookeeper_pid) log.info("Waiting for ZooKeeper to start at port %d..." % port) zookeeper_started = wait_for_tcp_port_to_open(port=port, retries=MC_ZOOKEEPER_CONNECT_RETRIES) if not zookeeper_started: raise McZooKeeperRunException("Unable to connect to ZooKeeper at port %d" % port) log.info("Uploading initial Solr collection configurations to ZooKeeper...") update_zookeeper_solr_configuration(zookeeper_host="localhost", zookeeper_port=port, dist_directory=dist_directory, solr_version=solr_version) log.info("ZooKeeper is ready on port %d!" % port) while True: time.sleep(1)
def __solr_home_path(solr_home_dir: str = MC_SOLR_HOME_DIR) -> str: """Return path to Solr home (with collection subdirectories).""" solr_home_path = resolve_absolute_path_under_mc_root(path=solr_home_dir, must_exist=True) return solr_home_path
def __raise_if_old_shards_exist() -> None: """Raise exception with migration instructions if old shard directories exist already.""" pwd = resolve_absolute_path_under_mc_root(path=".") old_shards = glob.glob(pwd + "/mediacloud-shard-*") if len(old_shards) == 0: # No old shards to migrate return num_shards = 0 for old_shard_path in old_shards: old_shard_dir = os.path.basename(old_shard_path) old_shard_num = re.search(r"^mediacloud-shard-(\d+?)$", old_shard_dir) if old_shard_num is None: raise Exception("Unable to parse shard number for old shard directory '%s'" % old_shard_dir) old_shard_num = int(old_shard_num.group(1)) num_shards = max(num_shards, old_shard_num) exc_message = "Old shards were found at paths:\n\n" for old_shard_path in old_shards: exc_message += "* %s\n" % old_shard_path exc_message += "\n" exc_message += "Please migrate them by running:\n" exc_message += "\n" exc_message += "cd %s\n" % pwd exc_message += "\n" exc_message += "# Create empty new shard directory structure for each shard:\n" for shard_num in range(1, num_shards + 1): exc_message += ( "./run_solr_shard.py --shard_num %(shard_num)d --shard_count %(shard_count)d " + '|| echo "It\'s fine to fail at this point."\n' ) % {"shard_num": shard_num, "shard_count": num_shards} exc_message += "\n" exc_message += "# Move data from old shards to new ones\n" for shard_num in range(1, num_shards + 1): shard_solr_path = "mediacloud-shard-%d/solr/" % shard_num shard_collection_paths = glob.glob(shard_solr_path + "/collection*") if len(shard_collection_paths) == 0: raise Exception("No collections found in shard '%d'" % shard_num) for collection_path in shard_collection_paths: collection_name = os.path.basename(collection_path) src_collection_data_path = os.path.join(shard_solr_path, collection_name, "data") if not os.path.isdir(src_collection_data_path): raise Exception("Source data directory '%s' does not exist." % src_collection_data_path) dst_shard_data_dir = __shard_data_dir(shard_num=shard_num) dst_collection_data_path = os.path.join(dst_shard_data_dir, collection_name, "data") if os.path.isdir(dst_collection_data_path): raise Exception("Destination data directory '%s' already exists." % dst_collection_data_path) exc_message += "mv %(src_collection_data_dir)s %(dst_collection_data_dir)s\n" % { "src_collection_data_dir": src_collection_data_path, "dst_collection_data_dir": dst_collection_data_path, } exc_message += "\n" exc_message += "# Remove old shards\n" for shard_num in range(1, num_shards + 1): exc_message += "rm -rf mediacloud-shard-%d/\n" % shard_num raise Exception(exc_message)
def __raise_if_old_shards_exist() -> None: """Raise exception with migration instructions if old shard directories exist already.""" pwd = resolve_absolute_path_under_mc_root(path=".") old_shards = glob.glob(pwd + "/mediacloud-shard-*") if len(old_shards) == 0: # No old shards to migrate return num_shards = 0 for old_shard_path in old_shards: old_shard_dir = os.path.basename(old_shard_path) old_shard_num = re.search(r'^mediacloud-shard-(\d+?)$', old_shard_dir) if old_shard_num is None: raise McSolrRunException( "Unable to parse shard number for old shard directory '%s'" % old_shard_dir) old_shard_num = int(old_shard_num.group(1)) num_shards = max(num_shards, old_shard_num) exc_message = "Old shards were found at paths:\n\n" for old_shard_path in old_shards: exc_message += "* %s\n" % old_shard_path exc_message += "\n" exc_message += "Please migrate them by running:\n" exc_message += "\n" exc_message += "cd %s\n" % pwd exc_message += "\n" exc_message += "# Create empty new shard directory structure for each shard:\n" for shard_num in range(1, num_shards + 1): exc_message += ( "./run_solr_shard.py --shard_num %(shard_num)d --shard_count %(shard_count)d " + "|| echo \"It's fine to fail at this point.\"\n") % { "shard_num": shard_num, "shard_count": num_shards, } exc_message += "\n" exc_message += "# Move data from old shards to new ones\n" for shard_num in range(1, num_shards + 1): shard_solr_path = "mediacloud-shard-%d/solr/" % shard_num shard_collection_paths = glob.glob(shard_solr_path + "/collection*") if len(shard_collection_paths) == 0: raise McSolrRunException("No collections found in shard '%d'" % shard_num) for collection_path in shard_collection_paths: collection_name = os.path.basename(collection_path) src_collection_data_path = os.path.join(shard_solr_path, collection_name, "data") if not os.path.isdir(src_collection_data_path): raise McSolrRunException( "Source data directory '%s' does not exist." % src_collection_data_path) dst_shard_data_dir = __shard_data_dir(shard_num=shard_num) dst_collection_data_path = os.path.join(dst_shard_data_dir, collection_name, "data") if os.path.isdir(dst_collection_data_path): raise McSolrRunException( "Destination data directory '%s' already exists." % dst_collection_data_path) exc_message += "mv %(src_collection_data_dir)s %(dst_collection_data_dir)s\n" % { "src_collection_data_dir": src_collection_data_path, "dst_collection_data_dir": dst_collection_data_path, } exc_message += "\n" exc_message += "# Remove old shards\n" for shard_num in range(1, num_shards + 1): exc_message += "rm -rf mediacloud-shard-%d/\n" % shard_num raise McSolrRunException(exc_message)
def run_zookeeper(dist_directory: str = MC_DIST_DIR, listen: str = MC_ZOOKEEPER_LISTEN, port: int = MC_ZOOKEEPER_PORT, data_dir: str = MC_SOLR_BASE_DATA_DIR, zookeeper_version: str = MC_ZOOKEEPER_VERSION, solr_version: str = MC_SOLR_VERSION) -> None: """Run ZooKeeper, install if needed too.""" if not __zookeeper_is_installed(): log.info("ZooKeeper is not installed, installing...") __install_zookeeper() data_dir = resolve_absolute_path_under_mc_root(path=data_dir, must_exist=True) zookeeper_data_dir = os.path.join(data_dir, "mediacloud-cluster-zookeeper") if not os.path.isdir(zookeeper_data_dir): log.info("Creating data directory at %s..." % zookeeper_data_dir) mkdir_p(zookeeper_data_dir) if tcp_port_is_open(port=port): raise McZooKeeperRunException( "Port %d is already open on this machine." % port) zookeeper_path = __zookeeper_path(dist_directory=dist_directory, zookeeper_version=zookeeper_version) zkserver_path = os.path.join(zookeeper_path, "bin", "zkServer.sh") if not os.path.isfile(zkserver_path): raise McZooKeeperRunException("zkServer.sh at '%s' was not found." % zkserver_path) log4j_properties_path = os.path.join(zookeeper_path, "conf", "log4j.properties") if not os.path.isfile(log4j_properties_path): raise McZooKeeperRunException( "log4j.properties at '%s' was not found.") zoo_cnf_path = os.path.join(zookeeper_data_dir, "zoo.cfg") log.info("Creating zoo.cfg in '%s'..." % zoo_cnf_path) with open(zoo_cnf_path, 'w') as zoo_cnf: zoo_cnf.write(""" # # This file is autogenerated. Please do not modify it! # clientPortAddress=%(listen)s clientPort=%(port)d dataDir=%(data_dir)s # Must be between zkClientTimeout / 2 and zkClientTimeout / 20 tickTime=30000 initLimit=10 syncLimit=10 """ % { "listen": listen, "port": port, "data_dir": zookeeper_data_dir, }) zookeeper_env = os.environ.copy() zookeeper_env[ "ZOOCFGDIR"] = zookeeper_data_dir # Serves as configuration dir too zookeeper_env["ZOOCFG"] = "zoo.cfg" zookeeper_env["ZOO_LOG_DIR"] = zookeeper_data_dir zookeeper_env[ "SERVER_JVMFLAGS"] = "-Dlog4j.configuration=file://" + os.path.abspath( log4j_properties_path) args = [zkserver_path, "start-foreground"] log.info("Starting ZooKeeper on %s:%d..." % (listen, port)) log.debug("Running command: %s" % str(args)) log.debug("Environment variables: %s" % str(zookeeper_env)) process = subprocess.Popen(args, env=zookeeper_env) global __zookeeper_pid __zookeeper_pid = process.pid # Declare that we don't care about the exit code of the child process so # it doesn't become a zombie when it gets killed in signal handler signal.signal(signal.SIGCHLD, signal.SIG_IGN) signal.signal(signal.SIGTERM, __kill_zookeeper_process ) # SIGTERM is handled differently for whatever reason atexit.register(__kill_zookeeper_process) log.info("ZooKeeper PID: %d" % __zookeeper_pid) log.info("Waiting for ZooKeeper to start at port %d..." % port) zookeeper_started = wait_for_tcp_port_to_open( port=port, retries=MC_ZOOKEEPER_CONNECT_RETRIES) if not zookeeper_started: raise McZooKeeperRunException( "Unable to connect to ZooKeeper at port %d" % port) log.info( "Uploading initial Solr collection configurations to ZooKeeper...") update_zookeeper_solr_configuration(zookeeper_host="localhost", zookeeper_port=port, dist_directory=dist_directory, solr_version=solr_version) log.info("ZooKeeper is ready on port %d!" % port) while True: time.sleep(1)