Ejemplo n.º 1
0
def main() -> None:
    # Configure command line flags.
    parser = argparse.ArgumentParser(
        description='Validate TZ zone files with ZoneSpecifier.')

    # Extractor flags.
    parser.add_argument('--input_dir',
                        help='Location of the input directory',
                        required=True)

    # Transformer flags.
    parser.add_argument(
        '--scope',
        # basic: 241 of the simpler time zones for BasicZoneSpecifier
        # extended: all 348 time zones for ExtendedZoneSpecifier
        choices=['basic', 'extended'],
        help='Size of the generated database (basic|extended)',
        required=True,
    )
    parser.add_argument(
        '--start_year',
        help='Start year of Zone Eras (default: 2000)',
        type=int,
        default=2000,
    )
    parser.add_argument(
        '--until_year',
        help='Until year of Zone Eras (default: 2038)',
        type=int,
        default=2038,
    )

    parser.add_argument(
        '--granularity',
        help=(
            'If given, overrides the other granularity flags to '
            'truncate UNTIL, AT, STDOFF (offset), SAVE (delta) and '
            'RULES (rulesDelta) fields to this many seconds (default: None)'),
        type=int,
    )
    parser.add_argument(
        '--until_at_granularity',
        help=(
            'Truncate UNTIL and AT fields to this many seconds (default: 60)'),
        type=int,
    )
    parser.add_argument(
        '--offset_granularity',
        help=('Truncate STDOFF (offset) fields to this many seconds'
              '(default: 900 (basic), 60 (extended))'),
        type=int,
    )
    parser.add_argument(
        '--delta_granularity',
        help=('Truncate SAVE (delta) and RULES (rulesDelta) field to this many'
              'seconds (default: 900)'),
        type=int,
    )

    parser.add_argument(
        '--strict',
        help='Remove zones and rules not aligned at granularity time boundary',
        action='store_true',
        default=True,
    )
    parser.add_argument(
        '--nostrict',
        help='Retain zones and rules not aligned at granularity time boundary',
        action='store_false',
        dest='strict',
    )

    # Validator flags.
    parser.add_argument(
        '--zone',
        help='Name of time zone to validate (default: all zones)',
    )
    parser.add_argument(
        '--year',
        help='Year to validate (default: start_year, until_year)',
        type=int,
    )
    parser.add_argument('--validate_buffer_size',
                        help='Validate the transition buffer size',
                        action="store_true")
    parser.add_argument('--validate_test_data',
                        help='Validate the TestDataGenerator with pytz',
                        action="store_true")
    parser.add_argument(
        '--validate_dst_offset',
        # Not enabled by default because pytz DST seems to be buggy.
        help='Validate the DST offset as well as the total UTC offset',
        action="store_true")
    parser.add_argument('--debug_validator',
                        help='Enable debug output from Validator',
                        action="store_true")

    # ZoneSpecifier flags
    parser.add_argument(
        '--viewing_months',
        help='Number of months to use for calculations (13, 14, 36)',
        type=int,
        default=14)
    parser.add_argument('--debug_specifier',
                        help='Enable debug output from ZoneSpecifier',
                        action="store_true")
    parser.add_argument(
        '--in_place_transitions',
        help='Use in-place Transition array to determine Active Transitions',
        action="store_true")
    parser.add_argument('--optimize_candidates',
                        help='Optimize the candidate transitions',
                        action='store_true')

    # TestDataGenerator flag.
    #
    # pytz cannot handle dates after the end of 32-bit Unix time_t type
    # (2038-01-19T03:14:07Z), see
    # https://answers.launchpad.net/pytz/+question/262216, so the
    # validation_until_year cannot be greater than 2038.
    parser.add_argument(
        '--validation_start_year',
        help='Start year of ZoneSpecifier validation (default: start_year)',
        type=int,
        default=0)
    parser.add_argument(
        '--validation_until_year',
        help='Until year of ZoneSpecifier validation (default: 2038)',
        type=int,
        default=0)

    # Parse the command line arguments
    args = parser.parse_args()

    # Configure logging. This should normally be executed after the
    # parser.parse_args() because it allows us set the logging.level using a
    # flag.
    logging.basicConfig(level=logging.INFO)

    # Define scope-dependent granularity if not overridden by flag
    if args.granularity:
        until_at_granularity = args.granularity
        offset_granularity = args.granularity
        delta_granularity = args.granularity
    else:
        if args.until_at_granularity:
            until_at_granularity = args.until_at_granularity
        else:
            until_at_granularity = 60

        if args.offset_granularity:
            offset_granularity = args.offset_granularity
        else:
            if args.scope == 'basic':
                offset_granularity = 900
            else:
                offset_granularity = 60

        if args.delta_granularity:
            delta_granularity = args.delta_granularity
        else:
            delta_granularity = 900

    logging.info('Granularity for UNTIL/AT: %d', until_at_granularity)
    logging.info('Granularity for STDOFF (offset): %d', offset_granularity)
    logging.info(
        'Granularity for RULES (rulesDelta) and SAVE (delta): %d',
        delta_granularity,
    )

    # Extract the TZ files
    logging.info('======== Extracting TZ Data files')
    extractor = Extractor(args.input_dir)
    extractor.parse()
    extractor.print_summary()
    policies_map, zones_map, links_map = extractor.get_data()

    # Create initial TransformerResult
    tresult = TransformerResult(
        zones_map=zones_map,
        policies_map=policies_map,
        links_map=links_map,
        removed_zones={},
        removed_policies={},
        removed_links={},
        notable_zones={},
        notable_policies={},
        notable_links={},
        zone_ids={},
        letters_per_policy={},
        letters_map={},
        formats_map={},
    )

    # Transform the TZ zones and rules
    logging.info('======== Transforming Zones and Rules')
    logging.info('Extracting years [%d, %d)', args.start_year, args.until_year)
    transformer = Transformer(
        tresult=tresult,
        scope=args.scope,
        start_year=args.start_year,
        until_year=args.until_year,
        until_at_granularity=until_at_granularity,
        offset_granularity=offset_granularity,
        delta_granularity=delta_granularity,
        strict=args.strict,
    )
    transformer.transform()
    transformer.print_summary()
    tresult = transformer.get_data()

    # Generate internal versions of zone_infos and zone_policies
    # so that ZoneSpecifier can be created.
    logging.info('======== Generating inlined zone_infos and zone_policies')
    inline_zone_info = InlineZoneInfo(tresult.zones_map, tresult.policies_map)
    zone_infos, zone_policies = inline_zone_info.generate_zonedb()
    logging.info('Inlined zone_infos=%d; zone_policies=%d', len(zone_infos),
                 len(zone_policies))

    # Set the defaults for validation_start_year and validation_until_year
    # if they were not specified.
    validation_start_year = (args.start_year if args.validation_start_year == 0
                             else args.validation_start_year)
    validation_until_year = (args.until_year if args.validation_until_year == 0
                             else args.validation_until_year)

    validate(
        zone_infos=zone_infos,
        zone_policies=zone_policies,
        zone=args.zone,
        year=args.year,
        start_year=validation_start_year,
        until_year=validation_until_year,
        validate_buffer_size=args.validate_buffer_size,
        validate_test_data=args.validate_test_data,
        viewing_months=args.viewing_months,
        validate_dst_offset=args.validate_dst_offset,
        debug_validator=args.debug_validator,
        debug_specifier=args.debug_specifier,
        in_place_transitions=args.in_place_transitions,
        optimize_candidates=args.optimize_candidates,
    )

    logging.info('======== Finished processing TZ Data files.')
Ejemplo n.º 2
0
def main() -> None:
    """
    Main driver for TZ Database compiler which parses the IANA TZ Database files
    located at the --input_dir and generates zoneinfo files and validation
    datasets for unit tests at --output_dir.

    Usage:
        tzcompiler.py [flags...]
    """
    # Configure command line flags.
    parser = argparse.ArgumentParser(description='Generate Zone Info.')

    # Extractor flags.
    parser.add_argument('--input_dir',
                        help='Location of the input directory',
                        required=True)

    # Transformer flags.
    parser.add_argument(
        '--scope',
        # basic: 241 of the simpler time zones for BasicZoneSpecifier
        # extended: all 348 time zones for ExtendedZoneSpecifier
        choices=['basic', 'extended'],
        help='Size of the generated database (basic|extended)',
        required=True)
    parser.add_argument('--start_year',
                        help='Start year of Zone Eras (default: 2000)',
                        type=int,
                        default=2000)
    parser.add_argument('--until_year',
                        help='Until year of Zone Eras (default: 2038)',
                        type=int,
                        default=2038)

    parser.add_argument(
        '--granularity',
        help=(
            'If given, overrides the other granularity flags to '
            'truncate UNTIL, AT, STDOFF (offset), SAVE (delta) and '
            'RULES (rulesDelta) fields to this many seconds (default: None)'),
        type=int,
    )
    parser.add_argument(
        '--until_at_granularity',
        help=(
            'Truncate UNTIL and AT fields to this many seconds (default: 60)'),
        type=int,
    )
    parser.add_argument(
        '--offset_granularity',
        help=('Truncate STDOFF (offset) fields to this many seconds'
              '(default: 900 (basic), 60 (extended))'),
        type=int,
    )
    parser.add_argument(
        '--delta_granularity',
        help=('Truncate SAVE (delta) and RULES (rulesDelta) field to this many'
              'seconds (default: 900)'),
        type=int,
    )

    # Make --strict the default, --nostrict optional.
    parser.add_argument(
        '--strict',
        help='Remove zones and rules not aligned at granularity time boundary',
        action='store_true',
        default=True,
    )
    parser.add_argument(
        '--nostrict',
        help='Retain zones and rules not aligned at granularity time boundary',
        action='store_false',
        dest='strict',
    )

    # Data pipeline selectors. Reduced down to a single 'zonedb' option which
    # is the default.
    parser.add_argument(
        '--action',
        help='Action to perform (zonedb)',
        default='zonedb',
    )

    # Language selector (for --action zonedb).
    parser.add_argument(
        '--language',
        help='Comma-separated list of target languages '
        '(arduino|python|json|zonelist)',
        default='',
    )

    # C++ namespace names for '--language arduino'. If not specified, it will
    # automatically be set to 'zonedb' or 'zonedbx' depending on the 'scope'.
    parser.add_argument(
        '--db_namespace',
        help='C++ namespace for the zonedb files (default: zonedb or zonedbx)',
    )

    # For language=json, specify the output file.
    parser.add_argument(
        '--json_file',
        help='The JSON output file (default: zonedb.json)',
        default='zonedb.json',
    )

    # The tz_version does not affect any data processing. Its value is
    # copied into the various generated files and usually placed in the
    # comments section to describe the source of the data that generated the
    # various files.
    parser.add_argument(
        '--tz_version',
        help='Version string of the TZ files',
        required=True,
    )

    # Target location of the generated files.
    parser.add_argument(
        '--output_dir',
        help='Location of the output directory',
        default='',
    )

    # Flag to ignore max_buf_size check. Needed on ExtendedHinnantDateTest if we
    # want to test the extended year range from 1974 to 2050, because one of the
    # zones requires a buf_size=9, but ExtendedZoneProcessor only supports 8.
    parser.add_argument(
        '--ignore_buf_size_too_large',
        help='Ignore transition buf size too large',
        action='store_true',
    )

    # Parse the command line arguments
    args = parser.parse_args()

    # Manually parse the comma-separated --action.
    languages = set(args.language.split(','))
    allowed_languages = set(['arduino', 'python', 'json', 'zonelist'])
    if not languages.issubset(allowed_languages):
        print(f'Invalid --language: {languages - allowed_languages}')
        sys.exit(1)

    # Configure logging. This should normally be executed after the
    # parser.parse_args() because it allows us set the logging.level using a
    # flag.
    logging.basicConfig(level=logging.INFO)

    # How the script was invoked
    invocation = ' '.join(sys.argv)

    # Define scope-dependent granularity if not overridden by flag
    if args.granularity:
        until_at_granularity = args.granularity
        offset_granularity = args.granularity
        delta_granularity = args.granularity
    else:
        if args.until_at_granularity:
            until_at_granularity = args.until_at_granularity
        else:
            until_at_granularity = 60

        if args.offset_granularity:
            offset_granularity = args.offset_granularity
        else:
            if args.scope == 'basic':
                offset_granularity = 900
            else:
                offset_granularity = 60

        if args.delta_granularity:
            delta_granularity = args.delta_granularity
        else:
            delta_granularity = 900

    logging.info('======== TZ Compiler settings')
    logging.info(f'Scope: {args.scope}')
    logging.info(
        f'Start year: {args.start_year}; Until year: {args.until_year}')
    logging.info(f'Strict: {args.strict}')
    logging.info(f'TZ Version: {args.tz_version}')
    logging.info('Ignore too large transition buf_size: '
                 f'{args.ignore_buf_size_too_large}')
    logging.info('Granularity for UNTIL/AT: %d', until_at_granularity)
    logging.info('Granularity for STDOFF (offset): %d', offset_granularity)
    logging.info(
        'Granularity for RULES (rulesDelta) and SAVE (delta): %d',
        delta_granularity,
    )

    # Extract the TZ files
    logging.info('======== Extracting TZ Data files')
    extractor = Extractor(args.input_dir)
    extractor.parse()
    extractor.print_summary()
    policies_map, zones_map, links_map = extractor.get_data()

    # Create initial TransformerResult
    tresult = TransformerResult(
        zones_map=zones_map,
        policies_map=policies_map,
        links_map=links_map,
        removed_zones={},
        removed_policies={},
        removed_links={},
        notable_zones={},
        notable_policies={},
        notable_links={},
        zone_ids={},
        letters_per_policy={},
        letters_map={},
        formats_map={},
    )

    # Transform the TZ zones and rules
    logging.info('======== Transforming Zones and Rules')
    logging.info('Extracting years [%d, %d)', args.start_year, args.until_year)
    transformer = Transformer(
        tresult=tresult,
        scope=args.scope,
        start_year=args.start_year,
        until_year=args.until_year,
        until_at_granularity=until_at_granularity,
        offset_granularity=offset_granularity,
        delta_granularity=delta_granularity,
        strict=args.strict,
    )
    transformer.transform()
    transformer.print_summary()
    tresult = transformer.get_data()

    # Generate the fields for the Arduino zoneinfo data.
    logging.info('======== Transforming to Arduino Zones and Rules')
    arduino_transformer = ArduinoTransformer(
        tresult=tresult,
        scope=args.scope,
        start_year=args.start_year,
        until_year=args.until_year,
    )
    arduino_transformer.transform()
    arduino_transformer.print_summary()
    tresult = arduino_transformer.get_data()

    # Estimate the buffer size of ExtendedZoneProcessor.TransitionStorage.
    logging.info('======== Estimating transition buffer sizes')
    logging.info('Checking years in [%d, %d)', args.start_year,
                 args.until_year)
    estimator = BufSizeEstimator(
        zones_map=tresult.zones_map,
        policies_map=tresult.policies_map,
        start_year=args.start_year,
        until_year=args.until_year,
    )
    buf_size_info: BufSizeInfo = estimator.estimate()

    # Check if the estimated buffer size is too big
    if buf_size_info['max_buf_size'] > EXTENDED_ZONE_PROCESSOR_MAX_TRANSITIONS:
        msg = (f"Max buffer size={buf_size_info['max_buf_size']} "
               f"is larger than ExtendedZoneProcessor.kMaxTransitions="
               f"{EXTENDED_ZONE_PROCESSOR_MAX_TRANSITIONS}")
        if args.ignore_buf_size_too_large:
            logging.warning(msg)
        else:
            raise Exception(msg)

    # Collect TZ DB data into a single JSON-serializable object.
    zidb = create_zone_info_database(
        tz_version=args.tz_version,
        tz_files=Extractor.ZONE_FILES,
        scope=args.scope,
        start_year=args.start_year,
        until_year=args.until_year,
        until_at_granularity=until_at_granularity,
        offset_granularity=offset_granularity,
        delta_granularity=delta_granularity,
        strict=args.strict,
        tresult=tresult,
        buf_size_info=buf_size_info,
    )

    if args.action == 'zonedb':
        logging.info('======== Generating zonedb files')
        for language in languages:
            generate_zonedb(
                invocation=invocation,
                db_namespace=args.db_namespace,
                language=language,
                output_dir=args.output_dir,
                zidb=zidb,
                json_file=args.json_file,
            )
    else:
        logging.error(f"Unrecognized action '{args.action}'")
        sys.exit(1)

    logging.info('======== Finished processing TZ Data files.')