def test_transform_command_for_large_serverset(self): try: command = '/usr/local/bin/zk_download_data.py -f /var/serverset/discovery.stingray_dsl_mapper.prod -p /discovery/stingray_dsl_mapper/prod -m serverset' notification_timestamp = 1426859717.707331 with open(LARGE_SERVERSET_EXAMPLE_FILE_PATH) as f: large_serverset_value = f.readline() (transformed_command, tmp_filepath) = transform_command_with_value( command, large_serverset_value, notification_timestamp) santinized_value = large_serverset_value.strip('\n').strip('\r') expected_tmp_filepath = '/tmp/zk_update_largefile_' + zk_util.get_md5_digest( santinized_value) + '_' + str(notification_timestamp) expected_transformed_command = '/usr/local/bin/zk_download_data.py -l ' \ + expected_tmp_filepath + ' -f /var/serverset/discovery.stingray_dsl_mapper.prod -p /discovery/stingray_dsl_mapper/prod -m serverset' self.assertEqual(expected_transformed_command, transformed_command) self.assertEqual(expected_tmp_filepath, tmp_filepath) # Validate the file content tmpfile = open(expected_tmp_filepath) tmpfile_value = tmpfile.readline().rstrip('\n') tmpfile_md5 = tmpfile.readline() expected_tmpfile_value = santinized_value expected_tmpfile_md5 = zk_util.get_md5_digest( expected_tmpfile_value) self.assertEqual(expected_tmpfile_value, tmpfile_value) self.assertEqual(expected_tmpfile_md5, tmpfile_md5) finally: if os.path.isfile(tmp_filepath): os.remove(tmp_filepath)
def test_transform_command_for_large_serverset(self): try: command = '/usr/local/bin/zk_download_data.py -f /var/serverset/discovery.stingray_dsl_mapper.prod -p /discovery/stingray_dsl_mapper/prod -m serverset' notification_timestamp = 1426859717.707331 with open(LARGE_SERVERSET_EXAMPLE_FILE_PATH) as f: large_serverset_value = f.readline() (transformed_command, tmp_filepath) = transform_command_with_value( command, large_serverset_value, notification_timestamp) santinized_value = large_serverset_value.strip('\n').strip('\r') expected_tmp_filepath = '/tmp/zk_update_largefile_' + zk_util.get_md5_digest( santinized_value) + '_' + str(notification_timestamp) expected_transformed_command = '/usr/local/bin/zk_download_data.py -l ' \ + expected_tmp_filepath + ' -f /var/serverset/discovery.stingray_dsl_mapper.prod -p /discovery/stingray_dsl_mapper/prod -m serverset' self.assertEqual(expected_transformed_command, transformed_command) self.assertEqual(expected_tmp_filepath, tmp_filepath) # Validate the file content tmpfile = open(expected_tmp_filepath) tmpfile_value = tmpfile.readline().rstrip('\n') tmpfile_md5 = tmpfile.readline() expected_tmpfile_value = santinized_value expected_tmpfile_md5 = zk_util.get_md5_digest(expected_tmpfile_value) self.assertEqual(expected_tmpfile_value, tmpfile_value) self.assertEqual(expected_tmpfile_md5, tmpfile_md5) finally: if os.path.isfile(tmp_filepath): os.remove(tmp_filepath)
def transform_command_with_value(command, value, notification_timestamp): python_download_script = 'zk_download_data.py' if len(value) > _LONG_VALUE_THRESHOLD: # If the value is too long (serverset is too large), OSError may be thrown. # Instead of passing it in command line, write to a temp file and # let zk_download_data read from it. value = value.replace("\n", "").replace("\r", "") md5digest = zk_util.get_md5_digest(value) tmp_filename = 'zk_update_largefile_' + md5digest + '_' + str(notification_timestamp) tmp_dir = tempfile.gettempprefix() tmp_filepath = os.path.join('/', tmp_dir, tmp_filename) log.info("This is a long value, write it to temp file %s", tmp_filepath) try: with open(tmp_filepath, 'w') as f: f.write(value + '\n' + md5digest) except Exception as e: log.exception( "%s: Failed to generate temp file %s for storing large size values" % (e.message, tmp_filepath)) return (None, None) finally: f.close() transformed_command = command.replace( python_download_script, "%s -l %s" % ( python_download_script, tmp_filepath)) return transformed_command, tmp_filepath else: transformed_command = command.replace( python_download_script, "%s -v '%s'" % ( python_download_script, value)) return transformed_command, None
def main(): global args parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "-f", "--file", dest="file", metavar="FILENAME", required=True, help="The target file name for storing the downloaded data.", ) parser.add_argument("-p", "--path", dest="path", metavar="PATH", help="The zookeeper path to download data from") parser.add_argument("-m", "--mode", dest="mode", metavar="MODE", required=True, help="The download mode [config | serverset]") parser.add_argument( "-s", "--show", dest="show", action='store_true', help="Flag to indicate whether to show the value in zookeeper only, " "no change is really made when it is set.") parser.add_argument( "--allow-same-content", dest="allow_same_content", action='store_true', help="Flag to indicate whether the script thinks downloading the same " "content is OK, no setting this flag will make the script exit " "with non-zero code when the content in zookeeper is the same as " "the target file.") parser.add_argument( "-r", "--serverset-rejection-ratio", dest="rejection_ratio", metavar="RATIO", default=DEFAULT_SERVERSET_REJECTION_RATIO, type=float, help="The ratio to reject a new serverset, if the count of servers " "in the new serverset is smaller than the multiplication of the " "count of servers in the existing target file and the ratio, the " "new serverset will be rejected and not written to the target" "file, default is %.2f." % DEFAULT_SERVERSET_REJECTION_RATIO) parser.add_argument( "--serverset-minimum-size", dest="serverset_minimum_size", metavar="MINSIZE", default=DEFAULT_SERVERSET_MINIMUM_SIZE, type=int, help="The minimum size of a serverset. If the serverset's size ever" "falls below this min size, zk_update_monitor will reject the update" "and will not write to the target file. Default is %d." % DEFAULT_SERVERSET_MINIMUM_SIZE) parser.add_argument( "-v", "--value", dest="value", metavar="VALUE", help="The value in zookeeper, once this is provided, the script will " "not try to get data from zookeeper, instead it will use this " "provided value to write to the file. For config mode, the value " "is the same as the data in zookeeper, for serverset mode, the " "value is a list of hosts separated by ','") parser.add_argument("--from-s3", dest="s3_key", metavar="S3_KEY", help="The S3 key to download the data from") parser.add_argument("--report-metadata", dest="report_metadata", default=False, action='store_true', help="If enabled, the downloader script " "will report metadata of serverset" "or config back to the caller (zk_download_data.py)") parser.add_argument("-l", "--from-value-file", dest="from_value_file", help="To get the actual value from a file") parser.add_argument("-o", "--port", dest="port", metavar="PORT", help="The port the flask app in ZUM is running on") ######## REQUIRED ######### # ZK / S3 Configurations. # ########################### parser.add_argument( "-z", "--zk-hosts-file-path", dest="zk_hosts_file", metavar="ZKHOSTS", required=True, help="The path of file which have a list of Zookeeper endpoints " "(host:port) which keeps the metaconfig as well as " "the config/serversets") parser.add_argument( "-a", "--aws-key-file", dest="aws_key_file", metavar="AWSKEY", help="The path of the file storing AWS access and secret keys", ) parser.add_argument("-b", "--s3-bucket", dest="s3_bucket", metavar="BUCKET", help="The S3 bucket storing metaconfigs / configs") parser.add_argument("-e", "--aws-s3-endpoint", dest="s3_endpoint", metavar="ENDPOINT", default="s3.amazonaws.com", help="The S3 endpoint storing metaconfig / configs") args = parser.parse_args() mode = args.mode.upper() initialize_zookeeper_aws_s3_configs(args) initialize_zk_update_monitor_endpoints(args.port) version = None notification_timestamp = time.time() # this is used to calculate MD5 which is consistent with the santinized # value in zk_update_monitor santinized_zk_data = None if mode == 'CONFIG' and args.s3_key: if not AWS_KEY_FILE or not S3_BUCKET or not S3_ENDPOINT: log.error( "No AWS key file or S3 Config is provided for accessing S3.") exit(_NO_AWS_KEY_FILE) log.info("Downloading from s3 key %s in bucket %s" % (args.s3_key, S3_BUCKET)) # We use the s3_key (which is actually a prefix) concatenated with the value from zk node to # form the s3 path. Therefore, each time we update new data to s3, we create a new S3 # key(can be located from the zk_node value), so as to make use of the "almost" # read-after-create guarantee from the s3 "special" url. zk_value = args.value if zk_value: log.debug("Use provided value %s" % zk_value) else: zk_value, version = \ get_data_from_zk(args.path, mode) s3_path = zk_util.construct_s3_path(args.s3_key, zk_value) # Download data from the s3 path. try: zk_data = s3config.S3Config( AWS_KEY_FILE, S3_BUCKET, s3_endpoint=S3_ENDPOINT).get_config_string(s3_path) except ValueError as ve: # If the s3 key specified by the zk_node does not exist, this is probably due to s3 # read-after-write inconsistency. Abort updating the local file. log.error( "Abort downloading from s3 key %s due to ValueError: %s" % (s3_path, str(ve))) exit(_S3_VALUE_ERROR_CODE) except Exception as e: log.error( "Abort downloading from s3 key %s due to unexpected s3 exception: %s" % (s3_path, str(e))) exit(_S3_ERROR_CODE) else: if args.value: log.debug("Use provided value %s" % args.value) if mode == 'SERVERSET': # remove duplicates in the server set value endpoints_set = set(args.value.split(',')) verify_and_correct_endpoints_set(endpoints_set, args.file) zk_data = '\n'.join(endpoints_set) else: zk_data = args.value elif args.from_value_file: from_value_file = args.from_value_file log.debug("Use provided value in %s" % args.from_value_file) try: with open(from_value_file, 'r') as f: # verify the file content is good. value = f.readline().rstrip('\n') md5digest = f.readline() calculated_md5digest = zk_util.get_md5_digest(value) if calculated_md5digest != md5digest: log.error("Temp file %s content does not match md5" % from_value_file) f.close() exit(_BROKEN_VALUE_FILE) if mode == 'SERVERSET': endpoints_set = set(value.split(',')) verify_and_correct_endpoints_set(endpoints_set, args.file) zk_data = '\n'.join(endpoints_set) else: zk_data = value except: log.error("Error reading from temp file %s" % from_value_file) exit(_BROKEN_VALUE_FILE) else: zk_data, version = \ get_data_from_zk(args.path, mode) if zk_data is None: log.error("Failed to get data from zookeeper.") exit(_FAILED_TO_GET_DATA_FROM_ZK_CODE) if args.show: print(zk_data) return log.debug("Target file = %s" % (args.file)) try: with open(args.file, "r") as file: existing_content = file.read() except IOError: existing_content = None if args.report_metadata: if mode == 'SERVERSET': serverset_list = zk_data.split('\n') serverset_list.sort() santinized_zk_data = string.join(serverset_list, ',') # Get the file modification time if args.file and os.path.exists(args.file): modification_timestamp = os.path.getmtime(args.file) else: modification_timestamp = 0 report_metadata(mode, santinized_zk_data, args.path, version, notification_timestamp, modification_timestamp) if _same_content(existing_content, zk_data, mode): log.warn("The data in zookeeper is the same as the target file %s, " "ignore." % args.file) if not args.allow_same_content: # Needs to exit with the special return code so the daemon can # ignore it. exit(_SAME_CONTENT_CODE) else: exit(0) found_stale_content = False if os.environ.get('__ZK_UPDATE_MONITOR_REFRESH__', False): found_stale_content = \ _check_stale_file_content(existing_content, args.file, zk_data, mode) log.info("Generating file %s with the data from zookeeper..." % (args.file)) tmp_dir = tempfile.gettempprefix() tmp_filename = os.path.join('/', tmp_dir, args.file.replace("/", "_")) try: with open(tmp_filename, 'w') as f: f.write(zk_data) except: log.exception("Failed to generate file %s from config in zookeeper. " "Data to write: %s" % (tmp_filename, zk_data)) exit(_UNHANDLED_EXCPETION) shutil.copy2(tmp_filename, args.file) log.info("File is generated.") # Report the file modification time. report_file_modification(mode, args.path, time.time()) if found_stale_content: exit(_STALE_LOCAL_FILE_CODE)
def main(): global args parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "-f", "--file", dest="file", metavar="FILENAME", required=True, help="The target file name for storing the downloaded data.",) parser.add_argument( "-p", "--path", dest="path", metavar="PATH", help="The zookeeper path to download data from") parser.add_argument( "-m", "--mode", dest="mode", metavar="MODE", required=True, help="The download mode [config | serverset]") parser.add_argument( "-s", "--show", dest="show", action='store_true', help="Flag to indicate whether to show the value in zookeeper only, " "no change is really made when it is set.") parser.add_argument( "--allow-same-content", dest="allow_same_content", action='store_true', help="Flag to indicate whether the script thinks downloading the same " "content is OK, no setting this flag will make the script exit " "with non-zero code when the content in zookeeper is the same as " "the target file.") parser.add_argument( "-r", "--serverset-rejection-ratio", dest="rejection_ratio", metavar="RATIO", default=DEFAULT_SERVERSET_REJECTION_RATIO, type=float, help="The ratio to reject a new serverset, if the count of servers " "in the new serverset is smaller than the multiplication of the " "count of servers in the existing target file and the ratio, the " "new serverset will be rejected and not written to the target" "file, default is %.2f." % DEFAULT_SERVERSET_REJECTION_RATIO) parser.add_argument( "--serverset-minimum-size", dest="serverset_minimum_size", metavar="MINSIZE", default=DEFAULT_SERVERSET_MINIMUM_SIZE, type=int, help="The minimum size of a serverset. If the serverset's size ever" "falls below this min size, zk_update_monitor will reject the update" "and will not write to the target file. Default is %d." % DEFAULT_SERVERSET_MINIMUM_SIZE) parser.add_argument( "-v", "--value", dest="value", metavar="VALUE", help="The value in zookeeper, once this is provided, the script will " "not try to get data from zookeeper, instead it will use this " "provided value to write to the file. For config mode, the value " "is the same as the data in zookeeper, for serverset mode, the " "value is a list of hosts separated by ','") parser.add_argument( "--from-s3", dest="s3_key", metavar="S3_KEY", help="The S3 key to download the data from") parser.add_argument( "--report-metadata", dest="report_metadata", default=False, action='store_true', help="If enabled, the downloader script " "will report metadata of serverset" "or config back to the caller (zk_download_data.py)" ) parser.add_argument( "-l", "--from-value-file", dest="from_value_file", help="To get the actual value from a file" ) parser.add_argument( "-o", "--port", dest="port", metavar="PORT", help="The port the flask app in ZUM is running on" ) ######## REQUIRED ######### # ZK / S3 Configurations. # ########################### parser.add_argument( "-z", "--zk-hosts-file-path", dest="zk_hosts_file", metavar="ZKHOSTS", required=True, help="The path of file which have a list of Zookeeper endpoints " "(host:port) which keeps the metaconfig as well as " "the config/serversets" ) parser.add_argument( "-a", "--aws-key-file", dest="aws_key_file", metavar="AWSKEY", help="The path of the file storing AWS access and secret keys", ) parser.add_argument( "-b", "--s3-bucket", dest="s3_bucket", metavar="BUCKET", help="The S3 bucket storing metaconfigs / configs" ) parser.add_argument( "-e", "--aws-s3-endpoint", dest="s3_endpoint", metavar="ENDPOINT", default="s3.amazonaws.com", help="The S3 endpoint storing metaconfig / configs" ) args = parser.parse_args() mode = args.mode.upper() initialize_zookeeper_aws_s3_configs(args) initialize_zk_update_monitor_endpoints(args.port) version = None notification_timestamp = time.time() # this is used to calculate MD5 which is consistent with the santinized # value in zk_update_monitor santinized_zk_data = None if mode == 'CONFIG' and args.s3_key: if not AWS_KEY_FILE or not S3_BUCKET or not S3_ENDPOINT: log.error("No AWS key file or S3 Config is provided for accessing S3.") exit(_NO_AWS_KEY_FILE) log.info("Downloading from s3 key %s in bucket %s" % (args.s3_key, S3_BUCKET)) # We use the s3_key (which is actually a prefix) concatenated with the value from zk node to # form the s3 path. Therefore, each time we update new data to s3, we create a new S3 # key(can be located from the zk_node value), so as to make use of the "almost" # read-after-create guarantee from the s3 "special" url. zk_value = args.value if zk_value: log.debug("Use provided value %s" % zk_value) else: zk_value, version = \ get_data_from_zk(args.path, mode) s3_path = zk_util.construct_s3_path(args.s3_key, zk_value) # Download data from the s3 path. try: zk_data = s3config.S3Config(AWS_KEY_FILE, S3_BUCKET, s3_endpoint=S3_ENDPOINT).get_config_string(s3_path) except ValueError as ve: # If the s3 key specified by the zk_node does not exist, this is probably due to s3 # read-after-write inconsistency. Abort updating the local file. log.error("Abort downloading from s3 key %s due to ValueError: %s" % (s3_path, str(ve))) exit(_S3_VALUE_ERROR_CODE) except Exception as e: log.error("Abort downloading from s3 key %s due to unexpected s3 exception: %s" % (s3_path, str(e))) exit(_S3_ERROR_CODE) else: if args.value: log.debug("Use provided value %s" % args.value) if mode == 'SERVERSET': # remove duplicates in the server set value endpoints_set = set(args.value.split(',')) verify_and_correct_endpoints_set(endpoints_set, args.file) zk_data = '\n'.join(endpoints_set) else: zk_data = args.value elif args.from_value_file: from_value_file = args.from_value_file log.debug("Use provided value in %s" % args.from_value_file) try: with open(from_value_file, 'r') as f: # verify the file content is good. value = f.readline().rstrip('\n') md5digest = f.readline() calculated_md5digest = zk_util.get_md5_digest(value) if calculated_md5digest != md5digest: log.error("Temp file %s content does not match md5" % from_value_file) f.close() exit(_BROKEN_VALUE_FILE) if mode == 'SERVERSET': endpoints_set = set(value.split(',')) verify_and_correct_endpoints_set(endpoints_set, args.file) zk_data = '\n'.join(endpoints_set) else: zk_data = value except: log.error("Error reading from temp file %s" % from_value_file) exit(_BROKEN_VALUE_FILE) else: zk_data, version = \ get_data_from_zk(args.path, mode) if zk_data is None: log.error("Failed to get data from zookeeper.") exit(_FAILED_TO_GET_DATA_FROM_ZK_CODE) if args.show: print(zk_data) return log.debug("Target file = %s" % (args.file)) try: with open(args.file, "r") as file: existing_content = file.read() except IOError: existing_content = None if args.report_metadata: if mode == 'SERVERSET': serverset_list = zk_data.split('\n') serverset_list.sort() santinized_zk_data = string.join(serverset_list, ',') # Get the file modification time if args.file and os.path.exists(args.file): modification_timestamp = os.path.getmtime(args.file) else: modification_timestamp = 0 report_metadata(mode, santinized_zk_data, args.path, version, notification_timestamp, modification_timestamp) if _same_content(existing_content, zk_data, mode): log.warn("The data in zookeeper is the same as the target file %s, " "ignore." % args.file) if not args.allow_same_content: # Needs to exit with the special return code so the daemon can # ignore it. exit(_SAME_CONTENT_CODE) else: exit(0) found_stale_content = False if os.environ.get('__ZK_UPDATE_MONITOR_REFRESH__', False): found_stale_content = \ _check_stale_file_content(existing_content, args.file, zk_data, mode) log.info("Generating file %s with the data from zookeeper..." % (args.file)) tmp_dir = tempfile.gettempprefix() tmp_filename = os.path.join('/', tmp_dir, args.file.replace("/", "_")) try: with open(tmp_filename, 'w') as f: f.write(zk_data) except: log.exception("Failed to generate file %s from config in zookeeper. " "Data to write: %s" % (tmp_filename, zk_data)) exit(_UNHANDLED_EXCPETION) shutil.copy2(tmp_filename, args.file) log.info("File is generated.") # Report the file modification time. report_file_modification(mode, args.path, time.time()) if found_stale_content: exit(_STALE_LOCAL_FILE_CODE)