def test_tcp_port_is_open(): random_port = random_unused_port() assert tcp_port_is_open(random_port) is False # Open port s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind(('localhost', random_port)) s.listen() assert tcp_port_is_open(random_port) is True # Close port s.close() assert tcp_port_is_open(random_port) is False
def optimize_solr_index(host: str = "localhost", port: int = MC_SOLR_STANDALONE_PORT, collections: List[str] = None): """Optimize collection indexes. In SolrCloud cluster, optimization command run on one of the shards will trigger optimization on all of them.""" if collections is None: collections = __collections().keys() l.debug("Solr collections to reindex: %s" % ", ".join(collections)) if not tcp_port_is_open(hostname=host, port=port): raise Exception("Solr is not running on %s:%d." % (host, port)) l.info("Optimizing indexes on %s:%d..." % (host, port)) for collection_name in sorted(collections): l.info("Optimizing collection's '%s' index on %s:%d..." % (collection_name, host, port)) url = "http://%(host)s:%(port)d/solr/%(collection_name)s/update?optimize=true" % { "host": host, "port": port, "collection_name": collection_name, } l.debug("Requesting URL %s..." % url) try: urlopen(url) except URLError as e: raise Exception( "Unable to optimize collection '%s' index on %s:%d: %s" % (collection_name, host, port, e.reason) ) l.info("Optimized indexes on %s:%d." % (host, port))
def reload_solr_shard(shard_num: int, host: str = "localhost", starting_port: int = MC_SOLR_CLUSTER_STARTING_PORT): """Reload Solr shard after ZooKeeper configuration change.""" if shard_num < 1: raise Exception("Shard number must be 1 or greater.") shard_port = __shard_port(shard_num=shard_num, starting_port=starting_port) if not tcp_port_is_open(hostname=host, port=shard_port): raise Exception("Shard %d is not running on %s:%d." % (shard_num, host, shard_port)) l.info("Reloading shard %d on %s:%d..." % (shard_num, host, shard_port)) collections = __collections() l.debug("Solr collections: %s" % collections) for collection_name, collection_path in sorted(collections.items()): l.info("Reloading collection '%s' on shard %d on %s:%d..." % (collection_name, shard_num, host, shard_port)) url = "http://%(host)s:%(port)d/solr/admin/cores?action=RELOAD&core=%(collection_name)s" % { "host": host, "port": shard_port, "collection_name": collection_name, } l.debug("Requesting URL %s..." % url) try: urlopen(url) except URLError as e: raise Exception("Unable to reload shard %d on %s:%d: %s" % (shard_num, host, shard_port, e.reason)) l.info("Reloaded shard %d on %s:%d." % (shard_num, host, shard_port))
def run_solr_standalone( hostname: str = fqdn(), port: int = MC_SOLR_STANDALONE_PORT, base_data_dir: str = MC_SOLR_BASE_DATA_DIR, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION, jvm_heap_size: str = MC_SOLR_STANDALONE_JVM_HEAP_SIZE, ): """Run standalone instance of Solr.""" if not __solr_is_installed(dist_directory=dist_directory, solr_version=solr_version): l.info("Solr is not installed, installing...") __install_solr(dist_directory=dist_directory, solr_version=solr_version) base_data_dir = resolve_absolute_path_under_mc_root(path=base_data_dir, must_exist=True) standalone_data_dir = __standalone_data_dir(base_data_dir=base_data_dir) if tcp_port_is_open(port=port): raise Exception("Port %d is already open on this machine." % port) l.info("Starting standalone Solr instance on port %d..." % port) __run_solr( hostname=hostname, port=port, instance_data_dir=standalone_data_dir, jvm_heap_size=jvm_heap_size, jvm_opts=MC_SOLR_STANDALONE_JVM_OPTS, connect_timeout=MC_SOLR_STANDALONE_CONNECT_RETRIES, dist_directory=dist_directory, solr_version=solr_version, )
def stop(self): """Stop the webserver.""" if not tcp_port_is_open(port=self.__port): log.warning("Port %d is not open." % self.__port) return if self.__http_server_thread is None: log.warning("HTTP server process is None.") return log.info('Stopping test web server %s:%d' % (self.__host, self.__port,)) # HTTP server itself is running in a fork, and it creates forks for every request which, at the point of killing # the server, might be in various states. So, we just SIGKILL all those PIDs in the most gruesome way. self.__http_server_active_pids_lock.acquire() for pid, value in self.__http_server_active_pids.items(): if value is True: log.debug("Killing PID %d" % pid) try: os.kill(pid, signal.SIGKILL) self.__http_server_active_pids[pid] = False except OSError as ex: log.error("Unable to kill PID %d: %s" % (pid, str(ex),)) self.__http_server_active_pids_lock.release() self.__http_server_thread.join() self.__http_server_thread.terminate() self.__http_server_thread = None if not wait_for_tcp_port_to_close(port=self.__port, retries=20, delay=0.1): raise McHashServerException("Port %d is still open." % self.__port)
def start(self, delay: int = 0): """Start the webserver. Arguments: delay - number of seconds to delay before starting server """ if tcp_port_is_open(port=self.__port): raise McHashServerException("Port %d is already open." % self.__port) log.info('Starting test web server %s:%d' % (self.__host, self.__port,)) log.debug('Pages: %s' % str(self.__pages)) # "threading.Thread()" doesn't work with Perl callers self.__http_server_thread = multiprocessing.Process( target=self.__start_http_server, args=( self.__host, self.__port, self.__pages, self.__http_server_active_pids, self.__http_server_active_pids_lock, delay ) ) self.__http_server_thread.daemon = True self.__http_server_thread.start() if delay == 0: if not wait_for_tcp_port_to_open(port=self.__port, retries=20, delay=0.1): raise McHashServerException("Port %d is not open." % self.__port)
def __init__(self, port: int, pages: dict): """HTTP server's constructor. Arguments: port - port to start server on (0 to choose random open port) pages - dict describing pages to serve, as described in docstring above """ self.__host = '127.0.0.1' self.__http_server_thread = None if len(pages) == 0: log.warning("Pages dictionary is empty.") if port == 0: port = START_RANDOM_PORT while tcp_port_is_open(port): port += 1 self.__port = port # MC_REWRITE_TO_PYTHON: Decode page keys from bytes pages = {decode_object_from_bytes_if_needed(k): v for k, v in pages.items()} self.__pages = pages self.__http_server_active_pids = multiprocessing.Manager().dict() self.__http_server_active_pids_lock = multiprocessing.Lock()
def __run_solr_zkcli( zkcli_args: List[str], zookeeper_host: str = MC_SOLR_CLUSTER_ZOOKEEPER_HOST, zookeeper_port: int = MC_SOLR_CLUSTER_ZOOKEEPER_PORT, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION, ) -> None: """Run Solr's zkcli.sh helper script.""" solr_path = __solr_path(dist_directory=dist_directory, solr_version=solr_version) jetty_home_path = __jetty_home_path(dist_directory=dist_directory, solr_version=solr_version) log4j_properties_path = None log4j_properties_expected_paths = [ # Solr 4.6 os.path.join(jetty_home_path, "cloud-scripts", "log4j.properties"), # Solr 4.10+ os.path.join(jetty_home_path, "scripts", "cloud-scripts", "log4j.properties"), ] for expected_path in log4j_properties_expected_paths: if os.path.isfile(expected_path): log4j_properties_path = expected_path break if log4j_properties_path is None: raise Exception( "Unable to find log4j.properties file for zkcli.sh script in paths: %s" % str(log4j_properties_expected_paths) ) if not tcp_port_is_open(hostname=zookeeper_host, port=zookeeper_port): raise Exception("ZooKeeper is not running at %s:%d." % (zookeeper_host, zookeeper_port)) jetty_home_path = __jetty_home_path(dist_directory=dist_directory, solr_version=solr_version) zkhost = "%s:%d" % (zookeeper_host, zookeeper_port) java_classpath_dirs = [ # Solr 4 os.path.join(solr_path, "dist", "*"), os.path.join(jetty_home_path, "solr-webapp", "webapp", "WEB-INF", "lib", "*"), os.path.join(jetty_home_path, "lib", "ext", "*"), ] args = [ "java", "-classpath", ":".join(java_classpath_dirs), "-Dlog4j.configuration=file://" + os.path.abspath(log4j_properties_path), "org.apache.solr.cloud.ZkCLI", "-zkhost", zkhost, ] + zkcli_args run_command_in_foreground(args)
def _fetch_url( db: DatabaseHandler, url: str, network_down_host: str = DEFAULT_NETWORK_DOWN_HOST, network_down_port: int = DEFAULT_NETWORK_DOWN_PORT, network_down_timeout: int = DEFAULT_NETWORK_DOWN_TIMEOUT, domain_timeout: typing.Optional[int] = None) -> FetchLinkResponse: """Fetch a url and return the content. If fetching the url results in a 400 error, check whether the network_down_host is accessible. If so, return the errored response. Otherwise, wait network_down_timeout seconds and try again. This function catches McGetException and returns a dummy 400 Response object. Arguments: db - db handle url - url to fetch network_down_host - host to check if network is down on error network_down_port - port to check if network is down on error network_down_timeout - seconds to wait if the network is down domain_timeout - value to pass to ThrottledUserAgent() Returns: Response object """ if mediawords.tm.stories.url_has_binary_extension(url): return _make_dummy_bypassed_response(url) while True: ua = ThrottledUserAgent(db, domain_timeout=domain_timeout) if mediawords.util.url.is_http_url(url): ua_response = ua.get_follow_http_html_redirects(url) response = FetchLinkResponse.from_useragent_response( url, ua_response) else: response = FetchLinkResponse( url=url, is_success=False, code=HTTPStatus.BAD_REQUEST.value, message=HTTPStatus.BAD_REQUEST.phrase, content='bad url', last_requested_url=None, ) if response.is_success: return response if response.code == HTTPStatus.BAD_REQUEST.value and not tcp_port_is_open( port=network_down_port, hostname=network_down_host): log.warning( "Response failed with %s and network is down. Waiting to retry ..." % (url, )) time.sleep(network_down_timeout) else: return response
def _fetch_url( db: DatabaseHandler, url: str, network_down_host: str = DEFAULT_NETWORK_DOWN_HOST, network_down_port: int = DEFAULT_NETWORK_DOWN_PORT, network_down_timeout: int = DEFAULT_NETWORK_DOWN_TIMEOUT, domain_timeout: typing.Optional[int] = None) -> FetchLinkResponse: """Fetch a url and return the content. If fetching the url results in a 400 error, check whether the network_down_host is accessible. If so, return the errored response. Otherwise, wait network_down_timeout seconds and try again. This function catches McGetException and returns a dummy 400 Response object. Arguments: db - db handle url - url to fetch network_down_host - host to check if network is down on error network_down_port - port to check if network is down on error network_down_timeout - seconds to wait if the network is down domain_timeout - value to pass to ThrottledUserAgent() Returns: Response object """ if mediawords.tm.stories.url_has_binary_extension(url): return _make_dummy_bypassed_response(url) while True: ua = ThrottledUserAgent(db, domain_timeout=domain_timeout) if mediawords.util.url.is_http_url(url): ua_response = ua.get_follow_http_html_redirects(url) response = FetchLinkResponse.from_useragent_response(url, ua_response) else: response = FetchLinkResponse( url=url, is_success=False, code=HTTPStatus.BAD_REQUEST.value, message=HTTPStatus.BAD_REQUEST.phrase, content='bad url', last_requested_url=None, ) if response.is_success: return response if response.code == HTTPStatus.BAD_REQUEST.value and not tcp_port_is_open(port=network_down_port, hostname=network_down_host): log.warning("Response failed with %s and network is down. Waiting to retry ..." % (url,)) time.sleep(network_down_timeout) else: return response
def update_zookeeper_solr_configuration( zookeeper_host: str = MC_SOLR_CLUSTER_ZOOKEEPER_HOST, zookeeper_port: int = MC_SOLR_CLUSTER_ZOOKEEPER_PORT, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION) -> None: """Update Solr's configuration on ZooKeeper.""" if not __solr_is_installed(): log.info("Solr is not installed, installing...") __install_solr() if not tcp_port_is_open(hostname=zookeeper_host, port=zookeeper_port): raise McSolrRunException("ZooKeeper is not running at %s:%d." % (zookeeper_host, zookeeper_port)) collections = __collections() log.debug("Solr collections: %s" % collections) log.info("Uploading Solr collection configurations to ZooKeeper...") for collection_name, collection_path in sorted(collections.items()): collection_conf_path = os.path.join(collection_path, "conf") # Copy configuration because ZooKeeper's uploader doesn't like symlinks log.info( "Copying collection's '%s' configuration to a temporary directory..." % collection_name) collection_conf_temp_dir = os.path.join(tempfile.mkdtemp(), collection_name) shutil.copytree(collection_conf_path, collection_conf_temp_dir) log.info("Uploading collection's '%s' configuration at '%s'..." % (collection_name, collection_conf_temp_dir)) __run_solr_zkcli(zkcli_args=[ "-cmd", "upconfig", "-confdir", collection_conf_temp_dir, "-confname", collection_name ], zookeeper_host=zookeeper_host, zookeeper_port=zookeeper_port, dist_directory=dist_directory, solr_version=solr_version) log.info("Linking collection's '%s' configuration..." % collection_name) __run_solr_zkcli(zkcli_args=[ "-cmd", "linkconfig", "-collection", collection_name, "-confname", collection_name ], zookeeper_host=zookeeper_host, zookeeper_port=zookeeper_port, dist_directory=dist_directory, solr_version=solr_version) log.info("Uploaded Solr collection configurations to ZooKeeper.")
def test_random_port() -> None: """Test assigning a random port where port = 0.""" hss = [] for i in range(3): hs = HashServer(port=0, pages={'/foo': 'bar'}) assert hs is not None hs.start() assert hs.port() >= START_RANDOM_PORT assert tcp_port_is_open(hs.port()) assert str(requests.get(hs.page_url('/foo')).text) == 'bar' hss.append(hs) [hs.stop() for hs in hss]
def update_zookeeper_solr_configuration( zookeeper_host: str = MC_SOLR_CLUSTER_ZOOKEEPER_HOST, zookeeper_port: int = MC_SOLR_CLUSTER_ZOOKEEPER_PORT, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION, ) -> None: """Update Solr's configuration on ZooKeeper.""" if not __solr_is_installed(): l.info("Solr is not installed, installing...") __install_solr() if not tcp_port_is_open(hostname=zookeeper_host, port=zookeeper_port): raise Exception("ZooKeeper is not running at %s:%d." % (zookeeper_host, zookeeper_port)) collections = __collections() l.debug("Solr collections: %s" % collections) l.info("Uploading Solr collection configurations to ZooKeeper...") for collection_name, collection_path in sorted(collections.items()): collection_conf_path = os.path.join(collection_path, "conf") # Copy configuration because ZooKeeper's uploader doesn't like symlinks l.info("Copying collection's '%s' configuration to a temporary directory..." % collection_name) collection_conf_temp_dir = os.path.join(tempfile.mkdtemp(), collection_name) shutil.copytree(collection_conf_path, collection_conf_temp_dir) l.info("Uploading collection's '%s' configuration at '%s'..." % (collection_name, collection_conf_temp_dir)) __run_solr_zkcli( zkcli_args=["-cmd", "upconfig", "-confdir", collection_conf_temp_dir, "-confname", collection_name], zookeeper_host=zookeeper_host, zookeeper_port=zookeeper_port, dist_directory=dist_directory, solr_version=solr_version, ) l.info("Linking collection's '%s' configuration..." % collection_name) __run_solr_zkcli( zkcli_args=["-cmd", "linkconfig", "-collection", collection_name, "-confname", collection_name], zookeeper_host=zookeeper_host, zookeeper_port=zookeeper_port, dist_directory=dist_directory, solr_version=solr_version, ) l.info("Uploaded Solr collection configurations to ZooKeeper.")
def upgrade_lucene_standalone_index( base_data_dir: str = MC_SOLR_BASE_DATA_DIR, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION ): """Upgrade Lucene index using the IndexUpgrader tool to standalone instance.""" base_data_dir = resolve_absolute_path_under_mc_root(path=base_data_dir, must_exist=True) l.info("Making sure standalone instance isn't running...") port = MC_SOLR_STANDALONE_PORT if tcp_port_is_open(port=port): raise Exception("Solr standalone instance is running on port %d." % port) l.info("Made sure standalone instance isn't running.") l.info("Upgrading standalone instance indexes...") standalone_data_dir = __standalone_data_dir(base_data_dir=base_data_dir) __upgrade_lucene_index( instance_data_dir=standalone_data_dir, dist_directory=dist_directory, solr_version=solr_version ) l.info("Upgraded standalone instance indexes...")
def upgrade_lucene_standalone_index(base_data_dir: str = MC_SOLR_BASE_DATA_DIR, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION): """Upgrade Lucene index using the IndexUpgrader tool to standalone instance.""" base_data_dir = resolve_absolute_path_under_mc_root(path=base_data_dir, must_exist=True) log.info("Making sure standalone instance isn't running...") port = MC_SOLR_STANDALONE_PORT if tcp_port_is_open(port=port): raise McSolrRunException("Solr standalone instance is running on port %d." % port) log.info("Made sure standalone instance isn't running.") log.info("Upgrading standalone instance indexes...") standalone_data_dir = __standalone_data_dir(base_data_dir=base_data_dir) __upgrade_lucene_index(instance_data_dir=standalone_data_dir, dist_directory=dist_directory, solr_version=solr_version) log.info("Upgraded standalone instance indexes...")
def __run_solr_zkcli(zkcli_args: List[str], zookeeper_host: str = MC_SOLR_CLUSTER_ZOOKEEPER_HOST, zookeeper_port: int = MC_SOLR_CLUSTER_ZOOKEEPER_PORT, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION) -> None: """Run Solr's zkcli.sh helper script.""" solr_path = __solr_path(dist_directory=dist_directory, solr_version=solr_version) jetty_home_path = __jetty_home_path(dist_directory=dist_directory, solr_version=solr_version) log4j_properties_path = os.path.join(jetty_home_path, "scripts", "cloud-scripts", "log4j.properties") if not os.path.isfile(log4j_properties_path): raise McSolrRunException( "Unable to find log4j.properties file for zkcli.sh script at path: %s" % log4j_properties_path) if not tcp_port_is_open(hostname=zookeeper_host, port=zookeeper_port): raise McSolrRunException("ZooKeeper is not running at %s:%d." % (zookeeper_host, zookeeper_port)) jetty_home_path = __jetty_home_path(dist_directory=dist_directory, solr_version=solr_version) zkhost = "%s:%d" % (zookeeper_host, zookeeper_port) java_classpath_dirs = [ os.path.join(solr_path, "dist", "*"), os.path.join(jetty_home_path, "solr-webapp", "webapp", "WEB-INF", "lib", "*"), os.path.join(jetty_home_path, "lib", "ext", "*"), ] args = [ "java", "-classpath", ":".join(java_classpath_dirs), "-Dlog4j.configuration=file://" + os.path.abspath(log4j_properties_path), "org.apache.solr.cloud.ZkCLI", "-zkhost", zkhost ] + zkcli_args run_command_in_foreground(args)
def upgrade_lucene_shards_indexes(base_data_dir: str = MC_SOLR_BASE_DATA_DIR, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION): """Upgrade Lucene indexes using the IndexUpgrader tool to all shards.""" base_data_dir = resolve_absolute_path_under_mc_root(path=base_data_dir, must_exist=True) # Try to guess shard count from how many shards are in data directory l.info("Looking for shards...") shard_num = 0 shard_count = 0 while True: shard_num += 1 shard_data_dir = __shard_data_dir(shard_num=shard_num, base_data_dir=base_data_dir) if os.path.isdir(shard_data_dir): shard_count += 1 else: break if shard_count < 2: raise McSolrRunException("Found less than 2 shards.") l.info("Found %d shards." % shard_count) l.info("Making sure shards aren't running...") for shard_num in range(1, shard_count + 1): shard_port = __shard_port(shard_num=shard_num, starting_port=MC_SOLR_CLUSTER_STARTING_PORT) if tcp_port_is_open(port=shard_port): raise McSolrRunException("Solr shard %d is running on port %d." % (shard_num, shard_port)) l.info("Made sure shards aren't running.") l.info("Upgrading shard indexes...") for shard_num in range(1, shard_count + 1): shard_data_dir = __shard_data_dir(shard_num=shard_num, base_data_dir=base_data_dir) __upgrade_lucene_index(instance_data_dir=shard_data_dir, dist_directory=dist_directory, solr_version=solr_version) l.info("Upgraded shard indexes.")
def stop(self): """Stop the webserver.""" if not tcp_port_is_open(port=self.__port): log.warning("Port %d is not open." % self.__port) return if self.__http_server_thread is None: log.warning("HTTP server process is None.") return log.debug('Stopping test web server %s:%d' % ( self.__host, self.__port, )) # HTTP server itself is running in a fork, and it creates forks for every request which, at the point of killing # the server, might be in various states. So, we just SIGKILL all those PIDs in the most gruesome way. self.__http_server_active_pids_lock.acquire() for pid, value in self.__http_server_active_pids.items(): if value is True: log.debug("Killing PID %d" % pid) try: os.kill(pid, signal.SIGKILL) self.__http_server_active_pids[pid] = False except OSError as ex: log.error("Unable to kill PID %d: %s" % ( pid, str(ex), )) self.__http_server_active_pids_lock.release() self.__http_server_thread.join() self.__http_server_thread.terminate() self.__http_server_thread = None if not wait_for_tcp_port_to_close( port=self.__port, retries=20, delay=0.1): raise McHashServerException("Port %d is still open." % self.__port)
def optimize_solr_index(host: str = "localhost", port: int = MC_SOLR_STANDALONE_PORT, collections: List[str] = None): """Optimize collection indexes. In SolrCloud cluster, optimization command run on one of the shards will trigger optimization on all of them.""" if collections is None: collections = __collections().keys() l.debug("Solr collections to reindex: %s" % ', '.join(collections)) if not tcp_port_is_open(hostname=host, port=port): raise McSolrRunException("Solr is not running on %s:%d." % (host, port)) l.info("Optimizing indexes on %s:%d..." % (host, port)) for collection_name in sorted(collections): l.info("Optimizing collection's '%s' index on %s:%d..." % (collection_name, host, port)) url = "http://%(host)s:%(port)d/solr/%(collection_name)s/update?optimize=true" % { "host": host, "port": port, "collection_name": collection_name, } l.debug("Requesting URL %s..." % url) try: urlopen(url) except URLError as e: raise McSolrRunException( "Unable to optimize collection '%s' index on %s:%d: %s" % (collection_name, host, port, e.reason)) l.info("Optimized indexes on %s:%d." % (host, port))
def upgrade_lucene_shards_indexes( base_data_dir: str = MC_SOLR_BASE_DATA_DIR, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION ): """Upgrade Lucene indexes using the IndexUpgrader tool to all shards.""" base_data_dir = resolve_absolute_path_under_mc_root(path=base_data_dir, must_exist=True) # Try to guess shard count from how many shards are in data directory l.info("Looking for shards...") shard_num = 0 shard_count = 0 while True: shard_num += 1 shard_data_dir = __shard_data_dir(shard_num=shard_num, base_data_dir=base_data_dir) if os.path.isdir(shard_data_dir): shard_count += 1 else: break if shard_count < 2: raise Exception("Found less than 2 shards.") l.info("Found %d shards." % shard_count) l.info("Making sure shards aren't running...") for shard_num in range(1, shard_count + 1): shard_port = __shard_port(shard_num=shard_num, starting_port=MC_SOLR_CLUSTER_STARTING_PORT) if tcp_port_is_open(port=shard_port): raise Exception("Solr shard %d is running on port %d." % (shard_num, shard_port)) l.info("Made sure shards aren't running.") l.info("Upgrading shard indexes...") for shard_num in range(1, shard_count + 1): shard_data_dir = __shard_data_dir(shard_num=shard_num, base_data_dir=base_data_dir) __upgrade_lucene_index( instance_data_dir=shard_data_dir, dist_directory=dist_directory, solr_version=solr_version ) l.info("Upgraded shard indexes.")
def __run_solr(port: int, instance_data_dir: str, hostname: str = fqdn(), jvm_heap_size: str = None, start_jar_args: List[str] = None, jvm_opts: List[str] = None, connect_timeout: int = 120, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION) -> None: """Run Solr instance.""" if jvm_opts is None: jvm_opts = MC_SOLR_STANDALONE_JVM_OPTS if start_jar_args is None: start_jar_args = [] if not __solr_is_installed(): l.info("Solr is not installed, installing...") __install_solr() solr_home_dir = __solr_home_path(solr_home_dir=MC_SOLR_HOME_DIR) if not os.path.isdir(solr_home_dir): raise McSolrRunException("Solr home directory '%s' does not exist." % solr_home_dir) solr_path = __solr_path(dist_directory=dist_directory, solr_version=solr_version) if not os.path.isdir(instance_data_dir): l.info("Creating data directory at %s..." % instance_data_dir) mkdir_p(instance_data_dir) l.info("Updating collections at %s..." % instance_data_dir) collections = __collections(solr_home_dir=solr_home_dir) for collection_name, collection_path in sorted(collections.items()): l.info("Updating collection '%s'..." % collection_name) collection_conf_src_dir = os.path.join(collection_path, "conf") if not os.path.isdir(collection_conf_src_dir): raise McSolrRunException( "Configuration for collection '%s' at %s does not exist" % (collection_name, collection_conf_src_dir)) collection_dst_dir = os.path.join(instance_data_dir, collection_name) mkdir_p(collection_dst_dir) # Remove and copy configuration in case it has changed # (don't symlink because Solr 5.5+ doesn't like those) collection_conf_dst_dir = os.path.join(collection_dst_dir, "conf") if os.path.lexists(collection_conf_dst_dir): l.debug("Removing old collection configuration in '%s'..." % collection_conf_dst_dir) if os.path.islink(collection_conf_dst_dir): # Might still be a link from older Solr versions os.unlink(collection_conf_dst_dir) else: shutil.rmtree(collection_conf_dst_dir) l.info("Copying '%s' to '%s'..." % (collection_conf_src_dir, collection_conf_dst_dir)) shutil.copytree(collection_conf_src_dir, collection_conf_dst_dir, symlinks=False) l.info("Updating core.properties for collection '%s'..." % collection_name) core_properties_path = os.path.join(collection_dst_dir, "core.properties") with open(core_properties_path, 'w') as core_properties_file: core_properties_file.write( """ # # This file is autogenerated. Don't bother editing it! # name=%(collection_name)s instanceDir=%(instance_dir)s """ % { "collection_name": collection_name, "instance_dir": collection_dst_dir, }) l.info("Symlinking shard configuration...") config_items_to_symlink = [ "contexts", "etc", "modules", "resources", "solr.xml", ] for config_item in config_items_to_symlink: config_item_src_path = os.path.join(solr_home_dir, config_item) if not os.path.exists(config_item_src_path): raise McSolrRunException( "Expected configuration item '%s' does not exist" % config_item_src_path) # Recreate symlink just in case config_item_dst_path = os.path.join(instance_data_dir, config_item) if os.path.lexists(config_item_dst_path): if not os.path.islink(config_item_dst_path): raise McSolrRunException( "Configuration item '%s' exists but is not a symlink." % config_item_dst_path) os.unlink(config_item_dst_path) l.info("Symlinking '%s' to '%s'..." % (config_item_src_path, config_item_dst_path)) relative_symlink(config_item_src_path, config_item_dst_path) jetty_home_path = __jetty_home_path(dist_directory=dist_directory, solr_version=solr_version) l.info("Symlinking libraries and JARs...") library_items_to_symlink = [ "lib", "solr-webapp", "start.jar", "solr", "solr-webapp", ] for library_item in library_items_to_symlink: library_item_src_path = os.path.join(jetty_home_path, library_item) if not os.path.exists(library_item_src_path): raise McSolrRunException( "Expected library item '%s' does not exist" % library_item_src_path) # Recreate symlink just in case library_item_dst_path = os.path.join(instance_data_dir, library_item) if os.path.lexists(library_item_dst_path): if not os.path.islink(library_item_dst_path): raise McSolrRunException( "Library item '%s' exists but is not a symlink." % library_item_dst_path) os.unlink(library_item_dst_path) l.info("Symlinking '%s' to '%s'..." % (library_item_src_path, library_item_dst_path)) relative_symlink(library_item_src_path, library_item_dst_path) log4j_properties_path = os.path.join(solr_home_dir, "resources", "log4j.properties") if not os.path.isfile(log4j_properties_path): raise McSolrRunException("log4j.properties at '%s' was not found.") start_jar_path = os.path.join(jetty_home_path, "start.jar") if not os.path.isfile(start_jar_path): raise McSolrRunException("start.jar at '%s' was not found." % start_jar_path) solr_webapp_path = os.path.abspath( os.path.join(jetty_home_path, "solr-webapp")) if not os.path.isdir(solr_webapp_path): raise McSolrRunException("Solr webapp dir at '%s' was not found." % solr_webapp_path) if not hostname_resolves(hostname): raise McSolrRunException("Hostname '%s' does not resolve." % hostname) if tcp_port_is_open(port=port): raise McSolrRunException("Port %d is already open on this machine." % port) __raise_if_old_shards_exist() args = ["java"] l.info("Starting Solr instance on %s, port %d..." % (hostname, port)) if jvm_heap_size is not None: args += ["-Xmx%s" % jvm_heap_size] args += jvm_opts # noinspection SpellCheckingInspection args += [ "-server", "-Djava.util.logging.config.file=file://" + os.path.abspath(log4j_properties_path), "-Djetty.base=%s" % instance_data_dir, "-Djetty.home=%s" % instance_data_dir, "-Djetty.port=%d" % port, "-Dsolr.solr.home=%s" % instance_data_dir, "-Dsolr.data.dir=%s" % instance_data_dir, "-Dhost=%s" % hostname, "-Dmediacloud.luceneMatchVersion=%s" % MC_SOLR_LUCENEMATCHVERSION, # write heap dump to data directory on OOM errors "-XX:+HeapDumpOnOutOfMemoryError", "-XX:HeapDumpPath=%s" % instance_data_dir, # needed for resolving paths to JARs in solrconfig.xml "-Dmediacloud.solr_dist_dir=%s" % solr_path, "-Dmediacloud.solr_webapp_dir=%s" % solr_webapp_path, ] args += start_jar_args args += [ "-jar", start_jar_path, "--module=http", ] l.debug("Running command: %s" % ' '.join(args)) process = subprocess.Popen(args) global __solr_pid __solr_pid = process.pid # Declare that we don't care about the exit code of the child process so # it doesn't become a zombie when it gets killed in signal handler signal.signal(signal.SIGCHLD, signal.SIG_IGN) signal.signal(signal.SIGTERM, __kill_solr_process ) # SIGTERM is handled differently for whatever reason atexit.register(__kill_solr_process) l.info("Solr PID: %d" % __solr_pid) l.info("Solr is starting on port %d, will be available shortly..." % port) wait_for_tcp_port_to_open(port=port, retries=connect_timeout) l.info("Solr is running on port %d!" % port) while True: time.sleep(1)
def test_random_unused_port(): random_port = random_unused_port() assert tcp_port_is_open(random_port) is False
def test_http_hash_server_multiple_clients(): """Test running hash server with multiple clients.""" port = random_unused_port() # noinspection PyTypeChecker,PyUnusedLocal def __callback_timeout(request: HashServer.Request) -> Union[str, bytes]: r = "" r += "HTTP/1.0 200 OK\r\n" r += "Content-Type: text/html; charset=UTF-8\r\n" r += "\r\n" r += "And now we wait" time.sleep(10) return str.encode(r) pages = { '/a': 'ππ©πͺπ΄ πͺπ΄ π±π’π¨π¦ π.', '/timeout': { 'callback': __callback_timeout }, # '/does-not-exist': '404', '/b': 'πΏπππ ππ ππππ π.', '/c': 'ππππ€ ππ€ π‘πππ β.', } hs = HashServer(port=port, pages=pages) assert hs hs.start() assert tcp_port_is_open(port=port) base_url = 'http://localhost:%d' % port session = FuturesSession(max_workers=10) future_a = session.get('%s/a' % base_url, timeout=2) future_timeout = session.get('%s/timeout' % base_url, timeout=2) future_404 = session.get('%s/does-not-exist' % base_url, timeout=2) future_b = session.get('%s/b' % base_url, timeout=2) future_c = session.get('%s/c' % base_url, timeout=2) response_a = future_a.result() with pytest.raises(requests.Timeout): future_timeout.result() response_404 = future_404.result() response_b = future_b.result() response_c = future_c.result() assert response_b.status_code == 200 assert response_b.text == 'πΏπππ ππ ππππ π.' assert response_c.status_code == 200 assert response_c.text == 'ππππ€ ππ€ π‘πππ β.' assert response_404.status_code == 404 assert response_a.status_code == 200 assert response_a.text == 'ππ©πͺπ΄ πͺπ΄ π±π’π¨π¦ π.' hs.stop()
def run_zookeeper(dist_directory: str = MC_DIST_DIR, listen: str = MC_ZOOKEEPER_LISTEN, port: int = MC_ZOOKEEPER_PORT, data_dir: str = MC_SOLR_BASE_DATA_DIR, zookeeper_version: str = MC_ZOOKEEPER_VERSION, solr_version: str = MC_SOLR_VERSION) -> None: """Run ZooKeeper, install if needed too.""" if not __zookeeper_is_installed(): log.info("ZooKeeper is not installed, installing...") __install_zookeeper() data_dir = resolve_absolute_path_under_mc_root(path=data_dir, must_exist=True) zookeeper_data_dir = os.path.join(data_dir, "mediacloud-cluster-zookeeper") if not os.path.isdir(zookeeper_data_dir): log.info("Creating data directory at %s..." % zookeeper_data_dir) mkdir_p(zookeeper_data_dir) if tcp_port_is_open(port=port): raise McZooKeeperRunException( "Port %d is already open on this machine." % port) zookeeper_path = __zookeeper_path(dist_directory=dist_directory, zookeeper_version=zookeeper_version) zkserver_path = os.path.join(zookeeper_path, "bin", "zkServer.sh") if not os.path.isfile(zkserver_path): raise McZooKeeperRunException("zkServer.sh at '%s' was not found." % zkserver_path) log4j_properties_path = os.path.join(zookeeper_path, "conf", "log4j.properties") if not os.path.isfile(log4j_properties_path): raise McZooKeeperRunException( "log4j.properties at '%s' was not found.") zoo_cnf_path = os.path.join(zookeeper_data_dir, "zoo.cfg") log.info("Creating zoo.cfg in '%s'..." % zoo_cnf_path) with open(zoo_cnf_path, 'w') as zoo_cnf: zoo_cnf.write(""" # # This file is autogenerated. Please do not modify it! # clientPortAddress=%(listen)s clientPort=%(port)d dataDir=%(data_dir)s # Must be between zkClientTimeout / 2 and zkClientTimeout / 20 tickTime=30000 initLimit=10 syncLimit=10 """ % { "listen": listen, "port": port, "data_dir": zookeeper_data_dir, }) zookeeper_env = os.environ.copy() zookeeper_env[ "ZOOCFGDIR"] = zookeeper_data_dir # Serves as configuration dir too zookeeper_env["ZOOCFG"] = "zoo.cfg" zookeeper_env["ZOO_LOG_DIR"] = zookeeper_data_dir zookeeper_env[ "SERVER_JVMFLAGS"] = "-Dlog4j.configuration=file://" + os.path.abspath( log4j_properties_path) args = [zkserver_path, "start-foreground"] log.info("Starting ZooKeeper on %s:%d..." % (listen, port)) log.debug("Running command: %s" % str(args)) log.debug("Environment variables: %s" % str(zookeeper_env)) process = subprocess.Popen(args, env=zookeeper_env) global __zookeeper_pid __zookeeper_pid = process.pid # Declare that we don't care about the exit code of the child process so # it doesn't become a zombie when it gets killed in signal handler signal.signal(signal.SIGCHLD, signal.SIG_IGN) signal.signal(signal.SIGTERM, __kill_zookeeper_process ) # SIGTERM is handled differently for whatever reason atexit.register(__kill_zookeeper_process) log.info("ZooKeeper PID: %d" % __zookeeper_pid) log.info("Waiting for ZooKeeper to start at port %d..." % port) zookeeper_started = wait_for_tcp_port_to_open( port=port, retries=MC_ZOOKEEPER_CONNECT_RETRIES) if not zookeeper_started: raise McZooKeeperRunException( "Unable to connect to ZooKeeper at port %d" % port) log.info( "Uploading initial Solr collection configurations to ZooKeeper...") update_zookeeper_solr_configuration(zookeeper_host="localhost", zookeeper_port=port, dist_directory=dist_directory, solr_version=solr_version) log.info("ZooKeeper is ready on port %d!" % port) while True: time.sleep(1)
def __run_solr( port: int, instance_data_dir: str, hostname: str = fqdn(), jvm_heap_size: str = None, start_jar_args: List[str] = None, jvm_opts: List[str] = None, connect_timeout: int = 120, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION, ) -> None: """Run Solr instance.""" if jvm_opts is None: jvm_opts = MC_SOLR_STANDALONE_JVM_OPTS if start_jar_args is None: start_jar_args = [] if not __solr_is_installed(): l.info("Solr is not installed, installing...") __install_solr() solr_home_dir = __solr_home_path(solr_home_dir=MC_SOLR_HOME_DIR) if not os.path.isdir(solr_home_dir): raise Exception("Solr home directory '%s' does not exist." % solr_home_dir) solr_path = __solr_path(dist_directory=dist_directory, solr_version=solr_version) if not os.path.isdir(instance_data_dir): l.info("Creating data directory at %s..." % instance_data_dir) mkdir_p(instance_data_dir) l.info("Updating collections at %s..." % instance_data_dir) collections = __collections(solr_home_dir=solr_home_dir) for collection_name, collection_path in sorted(collections.items()): l.info("Updating collection '%s'..." % collection_name) collection_conf_src_dir = os.path.join(collection_path, "conf") if not os.path.isdir(collection_conf_src_dir): raise Exception( "Configuration for collection '%s' at %s does not exist" % (collection_name, collection_conf_src_dir) ) collection_dst_dir = os.path.join(instance_data_dir, collection_name) mkdir_p(collection_dst_dir) # Remove and copy configuration in case it has changed # (don't symlink because Solr 5.5+ doesn't like those) collection_conf_dst_dir = os.path.join(collection_dst_dir, "conf") if os.path.lexists(collection_conf_dst_dir): l.debug("Removing old collection configuration in '%s'..." % collection_conf_dst_dir) if os.path.islink(collection_conf_dst_dir): # Might still be a link from older Solr versions os.unlink(collection_conf_dst_dir) else: shutil.rmtree(collection_conf_dst_dir) l.info("Copying '%s' to '%s'..." % (collection_conf_src_dir, collection_conf_dst_dir)) shutil.copytree(collection_conf_src_dir, collection_conf_dst_dir, symlinks=False) l.info("Updating core.properties for collection '%s'..." % collection_name) core_properties_path = os.path.join(collection_dst_dir, "core.properties") with open(core_properties_path, "w") as core_properties_file: core_properties_file.write( """ # # This file is autogenerated. Don't bother editing it! # name=%(collection_name)s instanceDir=%(instance_dir)s """ % {"collection_name": collection_name, "instance_dir": collection_dst_dir} ) l.info("Symlinking shard configuration...") config_items_to_symlink = ["contexts", "etc", "modules", "resources", "solr.xml"] for config_item in config_items_to_symlink: config_item_src_path = os.path.join(solr_home_dir, config_item) if not os.path.exists(config_item_src_path): raise Exception("Expected configuration item '%s' does not exist" % config_item_src_path) # Recreate symlink just in case config_item_dst_path = os.path.join(instance_data_dir, config_item) if os.path.lexists(config_item_dst_path): if not os.path.islink(config_item_dst_path): raise Exception("Configuration item '%s' exists but is not a symlink." % config_item_dst_path) os.unlink(config_item_dst_path) l.info("Symlinking '%s' to '%s'..." % (config_item_src_path, config_item_dst_path)) relative_symlink(config_item_src_path, config_item_dst_path) jetty_home_path = __jetty_home_path(dist_directory=dist_directory, solr_version=solr_version) l.info("Symlinking libraries and JARs...") library_items_to_symlink = ["lib", "solr-webapp", "start.jar", "solr", "solr-webapp"] for library_item in library_items_to_symlink: library_item_src_path = os.path.join(jetty_home_path, library_item) if not os.path.exists(library_item_src_path): raise Exception("Expected library item '%s' does not exist" % library_item_src_path) # Recreate symlink just in case library_item_dst_path = os.path.join(instance_data_dir, library_item) if os.path.lexists(library_item_dst_path): if not os.path.islink(library_item_dst_path): raise Exception("Library item '%s' exists but is not a symlink." % library_item_dst_path) os.unlink(library_item_dst_path) l.info("Symlinking '%s' to '%s'..." % (library_item_src_path, library_item_dst_path)) relative_symlink(library_item_src_path, library_item_dst_path) log4j_properties_path = os.path.join(solr_home_dir, "resources", "log4j.properties") if not os.path.isfile(log4j_properties_path): raise Exception("log4j.properties at '%s' was not found.") start_jar_path = os.path.join(jetty_home_path, "start.jar") if not os.path.isfile(start_jar_path): raise Exception("start.jar at '%s' was not found." % start_jar_path) solr_webapp_path = os.path.abspath(os.path.join(jetty_home_path, "solr-webapp")) if not os.path.isdir(solr_webapp_path): raise Exception("Solr webapp dir at '%s' was not found." % solr_webapp_path) if not hostname_resolves(hostname): raise Exception("Hostname '%s' does not resolve." % hostname) if tcp_port_is_open(port=port): raise Exception("Port %d is already open on this machine." % port) __raise_if_old_shards_exist() args = ["java"] l.info("Starting Solr instance on %s, port %d..." % (hostname, port)) if jvm_heap_size is not None: args += ["-Xmx%s" % jvm_heap_size] args += jvm_opts # noinspection SpellCheckingInspection args += [ "-server", "-Djava.util.logging.config.file=file://" + os.path.abspath(log4j_properties_path), "-Djetty.base=%s" % instance_data_dir, "-Djetty.home=%s" % instance_data_dir, "-Djetty.port=%d" % port, "-Dsolr.solr.home=%s" % instance_data_dir, "-Dsolr.data.dir=%s" % instance_data_dir, "-Dhost=%s" % hostname, "-Dmediacloud.luceneMatchVersion=%s" % MC_SOLR_LUCENEMATCHVERSION, # write heap dump to data directory on OOM errors "-XX:+HeapDumpOnOutOfMemoryError", "-XX:HeapDumpPath=%s" % instance_data_dir, # needed for resolving paths to JARs in solrconfig.xml "-Dmediacloud.solr_dist_dir=%s" % solr_path, "-Dmediacloud.solr_webapp_dir=%s" % solr_webapp_path, ] args += start_jar_args args += ["-jar", start_jar_path, "--module=http"] l.debug("Running command: %s" % " ".join(args)) process = subprocess.Popen(args) global __solr_pid __solr_pid = process.pid # Declare that we don't care about the exit code of the child process so # it doesn't become a zombie when it gets killed in signal handler signal.signal(signal.SIGCHLD, signal.SIG_IGN) signal.signal(signal.SIGTERM, __kill_solr_process) # SIGTERM is handled differently for whatever reason atexit.register(__kill_solr_process) l.info("Solr PID: %d" % __solr_pid) l.info("Solr is starting on port %d, will be available shortly..." % port) wait_for_tcp_port_to_open(port=port, retries=connect_timeout) l.info("Solr is running on port %d!" % port) while True: time.sleep(1)
def run_zookeeper(dist_directory: str = MC_DIST_DIR, listen: str = MC_ZOOKEEPER_LISTEN, port: int = MC_ZOOKEEPER_PORT, data_dir: str = MC_SOLR_BASE_DATA_DIR, zookeeper_version: str = MC_ZOOKEEPER_VERSION, solr_version: str = MC_SOLR_VERSION) -> None: """Run ZooKeeper, install if needed too.""" if not __zookeeper_is_installed(): log.info("ZooKeeper is not installed, installing...") __install_zookeeper() data_dir = resolve_absolute_path_under_mc_root(path=data_dir, must_exist=True) zookeeper_data_dir = os.path.join(data_dir, "mediacloud-cluster-zookeeper") if not os.path.isdir(zookeeper_data_dir): log.info("Creating data directory at %s..." % zookeeper_data_dir) mkdir_p(zookeeper_data_dir) if tcp_port_is_open(port=port): raise McZooKeeperRunException("Port %d is already open on this machine." % port) zookeeper_path = __zookeeper_path(dist_directory=dist_directory, zookeeper_version=zookeeper_version) zkserver_path = os.path.join(zookeeper_path, "bin", "zkServer.sh") if not os.path.isfile(zkserver_path): raise McZooKeeperRunException("zkServer.sh at '%s' was not found." % zkserver_path) log4j_properties_path = os.path.join(zookeeper_path, "conf", "log4j.properties") if not os.path.isfile(log4j_properties_path): raise McZooKeeperRunException("log4j.properties at '%s' was not found.") zoo_cnf_path = os.path.join(zookeeper_data_dir, "zoo.cfg") log.info("Creating zoo.cfg in '%s'..." % zoo_cnf_path) with open(zoo_cnf_path, 'w') as zoo_cnf: zoo_cnf.write(""" # # This file is autogenerated. Please do not modify it! # clientPortAddress=%(listen)s clientPort=%(port)d dataDir=%(data_dir)s # Must be between zkClientTimeout / 2 and zkClientTimeout / 20 tickTime=30000 initLimit=10 syncLimit=10 """ % { "listen": listen, "port": port, "data_dir": zookeeper_data_dir, }) zookeeper_env = os.environ.copy() zookeeper_env["ZOOCFGDIR"] = zookeeper_data_dir # Serves as configuration dir too zookeeper_env["ZOOCFG"] = "zoo.cfg" zookeeper_env["ZOO_LOG_DIR"] = zookeeper_data_dir zookeeper_env["SERVER_JVMFLAGS"] = "-Dlog4j.configuration=file://" + os.path.abspath(log4j_properties_path) args = [ zkserver_path, "start-foreground" ] log.info("Starting ZooKeeper on %s:%d..." % (listen, port)) log.debug("Running command: %s" % str(args)) log.debug("Environment variables: %s" % str(zookeeper_env)) process = subprocess.Popen(args, env=zookeeper_env) global __zookeeper_pid __zookeeper_pid = process.pid # Declare that we don't care about the exit code of the child process so # it doesn't become a zombie when it gets killed in signal handler signal.signal(signal.SIGCHLD, signal.SIG_IGN) signal.signal(signal.SIGTERM, __kill_zookeeper_process) # SIGTERM is handled differently for whatever reason atexit.register(__kill_zookeeper_process) log.info("ZooKeeper PID: %d" % __zookeeper_pid) log.info("Waiting for ZooKeeper to start at port %d..." % port) zookeeper_started = wait_for_tcp_port_to_open(port=port, retries=MC_ZOOKEEPER_CONNECT_RETRIES) if not zookeeper_started: raise McZooKeeperRunException("Unable to connect to ZooKeeper at port %d" % port) log.info("Uploading initial Solr collection configurations to ZooKeeper...") update_zookeeper_solr_configuration(zookeeper_host="localhost", zookeeper_port=port, dist_directory=dist_directory, solr_version=solr_version) log.info("ZooKeeper is ready on port %d!" % port) while True: time.sleep(1)
def test_http_hash_server(): port = random_unused_port() base_url = 'http://localhost:%d' % port def __simple_callback(request: HashServer.Request) -> Union[str, bytes]: r = "" r += "HTTP/1.0 200 OK\r\n" r += "Content-Type: application/json; charset=UTF-8\r\n" r += "\r\n" r += json.dumps({ 'name': 'callback', 'method': request.method(), 'url': request.url(), 'content-type': request.content_type(), 'params': request.query_params(), 'cookies': request.cookies(), }) return str.encode(r) # noinspection PyUnusedLocal def __callback_cookie_redirect(request: HashServer.Request) -> str: r = "" r += "HTTP/1.0 302 Moved Temporarily\r\n" r += "Content-Type: text/html; charset=UTF-8\r\n" r += "Location: /check_cookie\r\n" r += "Set-Cookie: test_cookie=I'm a cookie and I know it!\r\n" r += "\r\n" r += "Redirecting to the cookie check page..." return r def __callback_post(request: HashServer.Request) -> Union[str, bytes]: r = "" r += "HTTP/1.0 200 OK\r\n" r += "Content-Type: application/json; charset=UTF-8\r\n" r += "\r\n" r += json.dumps({ 'name': 'callback_post', 'post_data': request.content(), }) return str.encode(r) pages = { '/': 'home', '/foo': b'foo', '/bar': 'bar Δ Δ ', '/foo-bar': { b'redirect': b'/bar' }, '/localhost': { 'redirect': "http://localhost:%d/" % port }, b'/127-foo': { b'redirect': "http://127.0.0.1:%d/foo" % port }, '/auth': { b'auth': b'foo:bar', b'content': b"foo bar \xf0\x90\x28\xbc" }, '/404': { b'content': b'not found', b'http_status_code': 404 }, '/callback': { b'callback': __simple_callback }, # Test setting cookies, redirects '/callback_cookie_redirect': { 'callback': __callback_cookie_redirect }, # POST data '/callback_post': { 'callback': __callback_post }, } hs = HashServer(port=port, pages=pages) assert hs hs.start() assert tcp_port_is_open(port=port) assert str(requests.get('%s/' % base_url).text) == 'home' assert str(requests.get('%s/foo' % base_url).text) == 'foo' assert str(requests.get('%s/bar' % base_url).text) == 'bar Δ Δ ' assert str(requests.get('%s/foo-bar' % base_url).text) == 'bar Δ Δ ' assert str(requests.get('%s/localhost' % base_url).text) == 'home' assert str(requests.get('%s/127-foo' % base_url).text) == 'foo' # Path normalization assert str(requests.get('%s//' % base_url).text) == 'home' assert str(requests.get('%s///' % base_url).text) == 'home' assert str(requests.get('%s/something/../' % base_url).text) == 'home' assert str(requests.get('%s/something/..//' % base_url).text) == 'home' assert str(requests.get('%s/something/..///' % base_url).text) == 'home' assert str(requests.get('%s/foo/' % base_url).text) == 'foo' assert str(requests.get('%s/foo//' % base_url).text) == 'foo' assert str(requests.get('%s/foo///' % base_url).text) == 'foo' assert str(requests.get('%s/foo' % base_url).text) == 'foo' assert str(requests.get('%s/bar/../foo' % base_url).text) == 'foo' assert str(requests.get('%s/bar/../foo/' % base_url).text) == 'foo' assert str(requests.get('%s/bar/../foo//' % base_url).text) == 'foo' assert str(requests.get('%s/bar/../foo///' % base_url).text) == 'foo' response_json = requests.get('%s/callback?a=b&c=d' % base_url, cookies={ 'cookie_name': 'cookie_value' }).json() assert response_json == { 'name': 'callback', 'method': 'GET', 'url': 'http://%s:%d/callback?a=b&c=d' % (_fqdn(), port), 'content-type': None, 'params': { 'a': 'b', 'c': 'd', }, 'cookies': { 'cookie_name': 'cookie_value', }, } response = requests.get('%s/callback_cookie_redirect' % base_url, allow_redirects=False) assert response.status_code == 302 assert response.headers['Location'] == '/check_cookie' response = requests.get("%s/404" % base_url) assert response.status_code == HTTPStatus.NOT_FOUND.value assert 'Not Found' in response.reason auth_url = "%s/auth" % base_url assert requests.get(auth_url).status_code == HTTPStatus.UNAUTHORIZED assert requests.get(auth_url, auth=('foo', 'foo')).status_code == HTTPStatus.UNAUTHORIZED response = requests.get(auth_url, auth=('foo', 'bar')) assert response.status_code == HTTPStatus.OK assert response.content == b"foo bar \xf0\x90\x28\xbc" assert urls_are_equal(url1=hs.page_url('/callback?a=b&c=d'), url2='http://%s:%d/callback' % (_fqdn(), port)) with pytest.raises(McHashServerException): hs.page_url('/does-not-exist') response_json = requests.post('%s/callback_post' % base_url, data='abc=def').json() assert response_json == { 'name': 'callback_post', 'post_data': 'abc=def', } hs.stop()