def handle_declare(self, declare): # Split out the official coding name and its aliases. if isinstance(declare, str): official = declare aliases = () else: official = declare[0] aliases = [recode.cleaned_alias(alias) for alias in declare[1:]] coding = recode.cleaned_alias(official) # Use coding's cluster, create it if necessary. The first element # of a cluster is always the official name, other elements are clean. if coding not in self.clusters: self.clusters[coding] = [official] cluster = self.clusters[coding] # Add aliases or clusters to the current cluster. for alias in aliases: if alias in self.clusters: # Add this cluster to the pre-existing alias' cluster. # Clean the first element of cluster into a mere alias. existing = self.clusters[alias] element = recode.cleaned_alias(cluster[0]) existing.append(element) self.clusters[element] = existing for element in cluster[1:]: existing.append(element) self.clusters[element] = existing cluster = existing else: # Add alias to the current cluster. cluster.append(alias) self.clusters[alias] = cluster
def produce_counts(self): self.recode_calls = 0 self.original = Stats() self.shrunk = Stats() # Get the list of charsets. if self.charset_options: befores = [(charset, charset) for charset in self.charset_options] afters = befores else: befores = {} afters = {} for before, after in recode.registry.methods: if recode.TRIVIAL_SURFACE not in (before, after): befores[recode.cleaned_alias(before)] = before afters[recode.cleaned_alias(after)] = after befores = befores.items() befores.sort() afters = afters.items() afters.sort() # Recode in all combinations. sys.stderr.write("Attempting %d (%d x %d) recodings.\n" % (len(befores)*len(afters), len(befores), len(afters))) count = 0 for _, before in befores: count += 1 sys.stderr.write(" %d/%d. %s..*\n" % (count, len(befores), before)) for _, after in afters: if after != before: request = '%s..%s' % (before, after) arcs = recode.Recodec(request).encoding_arcs() self.recode_calls += 1 self.original.count_request(arcs) self.shrunk.count_request(arcs)
def produce_counts(self): self.recode_calls = 0 self.original = Stats() self.shrunk = Stats() # Get the list of charsets. if self.charset_options: befores = [(charset, charset) for charset in self.charset_options] afters = befores else: befores = {} afters = {} for before, after in recode.registry.methods: if recode.TRIVIAL_SURFACE not in (before, after): befores[recode.cleaned_alias(before)] = before afters[recode.cleaned_alias(after)] = after befores = befores.items() befores.sort() afters = afters.items() afters.sort() # Recode in all combinations. sys.stderr.write( "Attempting %d (%d x %d) recodings.\n" % (len(befores) * len(afters), len(befores), len(afters))) count = 0 for _, before in befores: count += 1 sys.stderr.write(" %d/%d. %s..*\n" % (count, len(befores), before)) for _, after in afters: if after != before: request = '%s..%s' % (before, after) arcs = recode.Recodec(request).encoding_arcs() self.recode_calls += 1 self.original.count_request(arcs) self.shrunk.count_request(arcs)
def main(self, *arguments): # Decode options. import getopt options, arguments = getopt.getopt(arguments, 'v') for option, value in options: if option == '-v': self.verbose = True # Import all modules. modules = [getattr(__import__('Recode.' + module_name), module_name) for module_name in arguments] # Register aliases into clusters. self.clusters = {} self.handle_declare(recode.UNICODE_STRING) self.handle_declare((recode.TRIVIAL_SURFACE, 'Data')) for module in modules: try: declares = module.declares except AttributeError: sys.stderr.write("No `declares' in `%s'\n" % module.__file__) else: for declare in declares: self.handle_declare(declare) # Register implied surfaces. self.implied = {} for module in modules: if hasattr(module, 'implied_surfaces'): for alias, surface in module.implied_surfaces: self.implied[recode.cleaned_alias(alias)] = ( recode.cleaned_alias(surface)) # Register recode methods. self.methods = {} for module, module_name in zip(modules, arguments): for name in dir(module): codec = getattr(module, name) if (hasattr(codec, 'internal_coding') and hasattr(codec, 'external_coding')): self.handle_codec(module_name, name, codec) # Write out the Python source. write = common.Output('preset.py', 'Python').write write('\n' 'aliases = {\n') items = self.clusters.items() items.sort() for alias, cluster in items: write(' %r: (%r, %r),\n' % (alias, cluster[0], self.implied.get(alias))) write(' }\n' '\n' 'methods = {\n') items = self.methods.items() items.sort() for (before, after), (module_name, codec_name, use_encode) in items: write(' (%r, %r): (%r, %r, %r),\n' % (before, after, module_name, codec_name, use_encode)) write(' }\n')
def handle_codec(self, module_name, codec_name, codec): internal = self.clusters[recode.cleaned_alias(codec.internal_coding)][0] external = self.clusters[recode.cleaned_alias(codec.external_coding)][0] for check, before, after, direction in ( (codec.encode, internal, external, True), (codec.decode, external, internal, False)): if check is not None: if (before, after) in self.methods: if module_name == 'builtin': if self.verbose: sys.stderr.write( "Overriding `%s' by `%s' for `%s..%s'.\n" % (self.methods[before, after][0], module_name, before, after)) else: if self.verbose: sys.stderr.write( "Overriding `%s' by `%s' for `%s..%s'.\n" % (module_name, self.methods[before, after][0], before, after)) continue self.methods[before, after] = ( module_name, codec_name, direction)
def handle_codec(self, module_name, codec_name, codec): internal = self.clusters[recode.cleaned_alias(codec.internal_coding)][0] external = self.clusters[recode.cleaned_alias(codec.external_coding)][0] for check, before, after, direction in ( (codec.encode, internal, external, True), (codec.decode, external, internal, False), ): if check is not None: if (before, after) in self.methods: if module_name == "builtin": if self.verbose: sys.stderr.write( "Overriding `%s' by `%s' for `%s..%s'.\n" % (self.methods[before, after][0], module_name, before, after) ) else: if self.verbose: sys.stderr.write( "Overriding `%s' by `%s' for `%s..%s'.\n" % (module_name, self.methods[before, after][0], before, after) ) continue self.methods[before, after] = (module_name, codec_name, direction)
def save_alias(self, base, alias): if base not in self.aliases: self.aliases[base] = [] if recode.cleaned_alias(alias) != recode.cleaned_alias(base): self.aliases[base].append(alias)
def main(self, *arguments): assert not arguments, arguments # Rewrite strip data, merging common strips as we go. self.strips = [] self.strip_index = {} self.add_strip(u'\uFFFF' * recode.STRIP_SIZE) strip_data = [] for charset, data, indices in common.all_strip_data(): strip_data.append( (recode.cleaned_alias(charset), charset, [self.add_strip(data[index:index+recode.STRIP_SIZE]) for index in indices])) # Write the strip pool. write = common.Output('strip.c', 'C').write write('\n' '#include \"common.h\"\n' '\n' 'const recode_ucs2 ucs2_data_pool[%d] =\n' ' {' % (len(self.strips) * recode.STRIP_SIZE)) count = 0 for strip in self.strips: for character in strip: if count % 8 == 0: if count != 0: write(',') write('\n /* %4d */ ' % count) else: write(', ') write('0x%0.4X' % ord(character)) count += 1 write('\n' ' };\n') # Write out all strip codecs. strip_data.sort() ordinal = 0 for key, charset, indices in strip_data: write('\n' '/* %s */\n' '\n' 'static struct strip_data data_%d =\n' ' {\n' ' ucs2_data_pool,\n' ' {\n' % (charset, ordinal)) count = 0 for indice in indices: if count % 12 == 0: if count != 0: write(',\n') write(' ') else: write(', ') write('%4d' % indice) count += 1 write('\n' ' }\n' ' };\n') ordinal += 1 # Print the collectable initialisation function. write('\n' 'bool\n' 'module_strips (struct recode_outer *outer)\n' '{\n' ' RECODE_ALIAS alias;\n') charsets = {} for key, charset, indices in strip_data: charsets[charset] = [] for alias, (charset, surface) in recode.registry.aliases.iteritems(): if charset in charsets: charsets[charset].append((alias, surface)) ordinal = 0 for key, charset, indices in strip_data: write('\n' ' if (!declare_strip_data (outer, &data_%d, "%s"))\n' ' return false;\n' % (ordinal, charset)) for alias, surface in charsets[charset]: if surface is None: write(' if (!declare_alias (outer, "%s", "%s"))\n' ' return false;\n' % (alias, charset)) else: write(' if (alias = declare_alias (outer, "%s", "%s"),' ' !alias)\n' ' return false;\n' % (alias, charset)) write(' if (!declare_implied_surface (outer, alias,' ' outer->%s_surface))\n' ' return false;\n' % surface) ordinal += 1 write('\n' ' return true;\n' '}\n')