def build(self, outfile_name, infile_names, _, context): assert infile_names[0].endswith('.handlebars'), infile_names assert self.should_compile(infile_names[0]), infile_names[0] self._compile_file_to_python(self.abspath(infile_names[0]), self.abspath(outfile_name)) log.v3("Compiled handlebars: %s -> %s", infile_names[0], outfile_name)
def _write_pofile(po_entries, filename, write_debug_file_to=None): """Write a polib.POFile to filename. The po-file format is nicely human-readable, but slow to parse. The mo-file format is faster to parse, but loses important information. So we introduce a *third* format: pickled polib.POFile. Whenever we save a pofile to disk, we save a pickled form of the python data structure (polib.POFile). We also normalize the po-entries before writing the file, to minimize diffs. Arguments: po_entries: a list of of POEntry objects. filename: an absolute path to write the pofile to. write_debug_file_to: if not None, a filename to write the po_entries as a (human-readable) po-file, rather than a po.pickle file. """ from intl import polib_util output_pot = polib_util.pofile() output_pot.extend(po_entries) # sort the po-entries in a canonical order, to make diff-ing # easier, but that tries to keep content close together in the # file if it's close together in real life. We sort by first # occurrence (alphabetically), which is good for most content, # but not for datastore entities, which all have the same # occurrence (_DATASTORE_FILE:1). For them, we sort by first # url-they-appear-in. For entities that match on all of these # things, we depend on the fact python's sorts are stable to # keep them in input order (that is, the order that we extracted # them from the input ifle). url_re = re.compile('<http[^>]*>') output_pot.sort(key=lambda e: (e.occurrences[0][0], int(e.occurrences[0][ 1]), sorted(url_re.findall(e.comment))[:1])) log.v2('Writing to %s', filename) with open(filename, 'w') as f: cPickle.dump(output_pot, f, protocol=cPickle.HIGHEST_PROTOCOL) if write_debug_file_to: log.v2('Also writing to %s', write_debug_file_to) with open(write_debug_file_to, 'w') as f: polib_util.write_pofile(output_pot, f) log.v3('Done!')
def build(self, outfile_name, infile_names, _, context): assert len(infile_names) == 1, ( "Each hash should be computed over a single version of Perseus") infile_name = infile_names[0] log.v3("Reading from Perseus build: %s" % infile_name) with open(self.abspath(infile_name)) as f: full_content = f.read() # We use just the first six characters of the hash. Per # compile_js_css_manifest.py: "Even if every deploy had a new md5, # this would give us a good 8 years between collisions." perseus_md5sum = md5.new(full_content).hexdigest()[:6] log.v3("Writing Perseus hash: %s" % perseus_md5sum) with open(self.abspath(outfile_name), 'w') as f: f.write(perseus_md5sum)
def build(self, outfile_name, infile_names, changed, context): # We import here so the kake system doesn't require these # imports unless they're actually used. import third_party.babel.messages.extract assert len(infile_names) == 1, infile_names keywords = third_party.babel.messages.extract.DEFAULT_KEYWORDS.copy() keywords['_js'] = keywords['_'] # treat _js() like _() # <$_> in jsx expands to $_({varmap}, "string", ...), so kw-index is 2. keywords['$_'] = (2, ) # used in .jsx files as <$_> keywords['mark_for_translation'] = None # used in .py files keywords['cached_gettext'] = keywords['gettext'] # used in .py files keywords['cached_ngettext'] = keywords['ngettext'] # used in .py files comment_tags = ['I18N:'] options = { 'newstyle_gettext': 'true', # used by jinja/ext.py 'encoding': 'utf-8' } # used by jinja/ext.py extractor = _extractor(infile_names[0]) # fn extracting strings log.v3('Extracting from %s (via %s)' % (infile_names[0], extractor)) with open(self.abspath(infile_names[0])) as fileobj: nltext_data = third_party.babel.messages.extract.extract( extractor, fileobj, keywords=keywords, comment_tags=comment_tags, options=options, strip_comment_tags=True) # Create 'pseudo' polib entries, with sets instead of lists to # make merging easier. We'll convert to real polib entries later. po_entries = collections.OrderedDict() for (lineno, message, comments, context) in nltext_data: _add_poentry(po_entries, infile_names[0], lineno, message, comments, context) # This turns the 'pseudo' polib entries back into real polib # entries and writes them as a pickled pofile to disk. _write_pofile(po_entries.itervalues(), self.abspath(outfile_name))
def _update_image_url_info(css_filename, image_url_info): """Given css_filenames relative to ka-root, update _IMAGE_URL_INFO. Returns: A list of image filenames, relative to ka-root, mentioned in this css-filename. """ # First, we need to delete all old references to css_filenames. for file_info in image_url_info.itervalues(): new_files = [f for f in file_info[0] if f != css_filename] if len(new_files) < len(file_info[0]): # We go through this contortion so we can edit the list in place. del file_info[0][:] file_info[0].extend(new_files) # If the file no longer exists (has been deleted), we're done! if not os.path.exists(ka_root.join(css_filename)): log.v3("removing image-url info for %s: it's been deleted", css_filename) return # Then, we need to add updated references, based on the current # file contents. log.v2('Parsing image-urls from %s', css_filename) with open(ka_root.join(css_filename)) as f: content = f.read() retval = [] for (img_url, img_relpath, img_size) in (_image_urls_and_file_info(content)): image_url_info.setdefault(img_url, ([], img_relpath, img_size)) image_url_info[img_url][0].append(css_filename) retval.append(img_relpath) log.v4('Image-url info: %s', retval) return retval
def build(self, outfile_name, infile_names, changed, context): """Merge the pofiles and approved pofiles & build pickle and chunks. We export from crowdin twice for each language. One time to get all the translated strings which winds up in intl/translation/pofile/{lang}.(rest|datastore).po files and another time to get just the approved translations which winds up in the intl/translation/approved_pofile/{lang}.(rest|datastore).po files. This merges them all together, preferring an entry in the approved pofile over the unapproved one, and adding a flag to the approved entries. We then create our own specially formatted files that use less space. There is the genfiles/translations/{lang}/index.pickle that gets created, and a bunch of genfiles/translations/{lang}/chunk.# files that the index file points to and holds the actual translations. """ # We import here so the kake system doesn't require these # imports unless they're actually used. import intl.translate from intl import polib_util full_content = '' for infile in sorted([n for n in infile_names if "approved_pofiles" not in n]): with open(self.abspath(infile)) as f: log.v3("Reading %s" % infile) full_content += f.read() approved_full_content = '' for infile in sorted([n for n in infile_names if "approved_pofiles" in n]): with open(self.abspath(infile)) as f: log.v3("Reading %s" % infile) approved_full_content += f.read() log.v3("Calculating md5 to get translation file version for %s" % context['{lang}']) # The output files need a version string. We'll use an # md5sum of the input files. version_md5sum = md5.new( full_content + approved_full_content).hexdigest() version = 'compile_po_%s' % version_md5sum translate_writer = intl.translate.TranslateWriter( os.path.dirname(outfile_name), context['{lang}'], version) # Now lets combine the two po files and add a flag to the approved # pofile entries. log.v3("Creating .index and .chunk translation files for %s" % context['{lang}']) approved_msgids = set() def add_approved_entry(po_entry): po_entry.flags.append('approved') approved_msgids.add(po_entry.msgid) translate_writer.add_poentry(po_entry) def add_unapproved_entry(po_entry): if po_entry.msgid not in approved_msgids: translate_writer.add_poentry(po_entry) _ = polib_util.streaming_pofile( approved_full_content.decode('utf-8'), callback=add_approved_entry) # called on each input POEntry. unapproved_pofile = polib_util.streaming_pofile( full_content.decode('utf-8'), callback=add_unapproved_entry) # This adds in the metadata (and only the metadata). translate_writer.add_pofile(unapproved_pofile) translate_writer.commit()