def printTokenFrequency(self): "Print the result generated by computeTokenFrequency in a human-readable manner" print(black("Statistics for document index", bold=True)) self._printFrequencyMap(self.computeTokenFrequency(self.rwDB.iterateDocumentIndex)) print(black("\nStatistics for entity index", bold=True)) self._printFrequencyMap(self.computeTokenFrequency(self.rwDB.iterateEntityIndex))
def importRulesForLanguage(lang, basedir="."): """Import ruleset from the language-specific python file""" moduleName = "rules.{0}".format(lang) print(black("Reading rules from {0}".format(moduleName), bold=True)) langModule = importlib.import_module(moduleName) print(black("Found {0} rules for language {1} ({2} in compatibility mode)".format(len(langModule.rules), lang, reCompiler.numCompatRegex), bold=True)) return langModule.rules, langModule.rule_errors
def printTokenFrequency(self): "Print the result generated by computeTokenFrequency in a human-readable manner" print(black("Statistics for document index", bold=True)) self._printFrequencyMap( self.computeTokenFrequency(self.rwDB.iterateDocumentIndex)) print(black("\nStatistics for entity index", bold=True)) self._printFrequencyMap( self.computeTokenFrequency(self.rwDB.iterateEntityIndex))
def checkBoardOutline(self, filepath): filename = os.path.basename(filepath) #Basic gerber checks checkGerberFile(self, filepath) #Compute board outline millLines = readFileLines(filepath) # Find factors to get absolute coordinates: x_factor, y_factor = findCoordinateFormat(millLines) #We can only interpret the file if coordinates are absolute if not "G90*" in millLines: print (yellow("Mill coordinates in %s don't seem to be absolute (G90 missing!)" % filename)) return #Determine coordinate units unit = parseGerberUnit(millLines) if unit is None: #Neither inch nor mm found print (yellow("Could not find coordinate units (mm/in) in %s" % filename)) return #Parse the aperture list apertures = parseGerberApertures(millLines) selectApertureRegex = re.compile(r"(D\d+)\*") move2DRegex = re.compile(r"X(\d+)Y(\d+)D(\d)\*") #Move (D2) or draw (D1) move1DRegex = re.compile(r"([XY])(\d+)D(\d)\*") #With only one coordinate #Try to interpret gerber file minCoords = (sys.maxsize, sys.maxsize) maxCoords = (0, 0) lastCoords = (0, 0) currentAperture = None for line in millLines: if selectApertureRegex.match(line): apertureCode = selectApertureRegex.match(line).group(1) currentAperture = findAperture(apertures, apertureCode) elif move2DRegex.match(line): match = move2DRegex.match(line) x = int(match.group(1)) / x_factor y = int(match.group(2)) / y_factor elif move1DRegex.match(line): match = move1DRegex.match(line) if match.group(1) == "X": x = int(match.group(2)) / x_factor y = lastCoords[1] elif match.group(1) == "Y": x = lastCoords[0] y = int(match.group(2)) / y_factor else: raise Exception("Internal error: Invalid coordinate type in 1D move: %s" % match.group(1)) else: continue #Compute min/max coordinates lastCoords = (x, y) minCoords = (min(minCoords[0], lastCoords[0]), min(minCoords[1], lastCoords[1])) maxCoords = (max(maxCoords[0], lastCoords[0]), max(maxCoords[1], lastCoords[1])) #Compute board size (minimum enclosing rectangle) boardSize = (maxCoords[0] - minCoords[0], maxCoords[1] - minCoords[1]) #Print info print (black("\tGerber offset: ({1:.2f} {0}, {2:.2f} {0})".format(unit, minCoords[0], minCoords[1]))) print (black("\tBoard size (minimum rectangle): %.1f %s x %.1f %s" % \ (boardSize[0], unit, boardSize[1], unit)))
def importRulesForLanguage(lang, basedir="."): """Import ruleset from the language-specific python file""" moduleName = "rules.{}".format(lang) print(black("Reading rules from {}".format(moduleName), bold=True)) langModule = importlib.import_module(moduleName) print( black( "Found {} rules for language {} ({} in compatibility mode)".format( len(langModule.rules), lang, reCompiler.numCompatRegex), bold=True)) return langModule.rules, langModule.rule_errors
def interval_to_string(interval): """ Returns a string representation of an interval. :param interval: is the interval to evaluate. """ # turn numbers into a single string nums = str(interval.start) + " " + str(interval.duration) + " " + str( interval.amplitude) # put brackets on the ends bracket_nums = black("[", bold=True) + nums + black("]", bold=True) return bracket_nums
def fetchVideoMap(pool, lang): "Deprecated: Fetch individual video dubs" print(black("Fetching dubbed video list for lang {0}".format(lang), bold=True)) dubbedVideoIDs = findDubbedVideos(lang) print(black("Fetching {0} dubbed video URLs for lang {1}" .format(len(dubbedVideoIDs), lang), bold=True)) fn = functools.partial(getTranslatedVideoId, lang=lang) videoURLs = pool.map(fn, dubbedVideoIDs) # Remap return {videoId: url for videoId, url in zip(dubbedVideoIDs, videoURLs) if url is not None}
def export_lang_to_db(lang, filt): count = 0 for file in findXLIFFFiles("cache/{}".format(lang), filt=filt): # e.g. '1_high_priority_platform/about.donate.xliff' canonicalFilename = "/".join(file.split("/")[2:]) section = canonicalFilename.partition("/")[0] # Dont index drafts if "learn.draft.xliff" in canonicalFilename: print(green("Skipping {}".format(canonicalFilename), bold=True)) # relevant_for_live relevant_for_live = False if canonicalFilename in relevant_for_live_files: relevant_for_live = True print(black(file, bold=True)) soup = parse_xliff_file(file) for entry in process_xliff_soup(soup, also_approved=True): obj = { "id": int(entry.ID), "source": entry.Source, "target": entry.Translated, "source_length": len(entry.Source), "is_translated": entry.IsTranslated, "is_approved": entry.IsApproved, "translation_source": "Crowdin", "file": canonicalFilename, "fileid": entry.FileID, "relevant_for_live": relevant_for_live } # Async write executor.submit(write_entry, obj, lang) # Stats count += 1 if count % 1000 == 0: print("Processed {} records".format(count))
def print_timeline(timeline, steps): """ Prints a graphical representation of the timeline to the console. :param timeline: is the 2D array of intervals to read. """ print() # newline # count number of digits of left column to align numbers pad_length = len(str(len(timeline))) # print the row for channel in range(0, len(timeline)): current_index = 0 amp = 0 print(str(channel).zfill(pad_length), " > ", sep='', end='') # print the columns for current_time in range(0, steps): (amp, current_index) = read_channel(timeline, channel, current_index, current_time) # make the zeros stand out. easier to look at that way if amp == 0: print(black(str(amp), bold=True), " ", sep='', end='') else: print(amp, " ", sep='', end='') print() # newline print() # newline
def updateLintFromGoogleGroups(lang="de"): url = getLatestLintDownloadLink() response = requests.get(url) filename = os.path.join("cache", "{0}-lint.csv".format(lang)) with open(filename, "w") as outfile: outfile.write(response.text) print(black("Updated %s" % filename, bold=True))
def loadTranslations(conn, recordTable=1, indexTable=2): """ Loads all PO strings in the database and builds a Polyglott Index """ print(black("Deleting old tables...", bold=True)) # Delete old tables (implicitly closes tables) conn.deleteRange(recordTable, startKey=None, endKey=None) conn.deleteRange(indexTable, startKey=None, endKey=None) print(black("Deleting old tables...", bold=True)) # Table 1 stores msgid => NUL-separated list of records # Record: Langcode (KA-style) + ASCII record separator (0x1D) + msgstr conn.openTable(recordTable, mergeOperator="NULAPPEND") # Table 2 stores key => msgid # where "key" is any msgid or msgstr. # The appropriate values can be looked up in table 1 using the msgid key conn.openTable(indexTable, mergeOperator="REPLACE") for lang, langpath in findAvailableLanguages().items(): print(black("Reading PO files for language {}".format(lang, bold=True))) for filename in findPOFiles(langpath): # NOTE: This loop writes one value dict per file per file to # avoid tons of Python function calls. # The large values are be handled in C++ code efficiently. print("\tProcessing {}".format(filename)) po = polib.pofile(filename) # Write table 1 values = { entry.msgid: lang + "\x1D" + entry.msgstr for entry in po if entry.msgstr.strip() } conn.put(recordTable, values) # Write table 2 (index) values = { entry.msgstr: entry.msgid for entry in po if entry.msgstr.strip() } values2 = {entry.msgid: entry.msgid for entry in po} conn.put(indexTable, values) conn.put(indexTable, values2) # Perform anticipatory compation on both tables print(black("Compacting language table...", bold=True)) conn.compact(recordTable) print(black("Compacting index table...", bold=True)) conn.compact(indexTable)
def buildPolyglottIndex(args): import YakDB print(black("Connecting to YakDB...", bold=True)) conn = YakDB.Connection() conn.connect("tcp://localhost:7100") loadTranslations(conn, args.table, args.table + 1)
def updateTranslationFilemapCache(lang="de"): """Re-download the translation filemap cache""" print(black("Updating translation filemap for {0}".format(lang), bold=True)) filename = translationFilemapCacheFilename(lang) with open(filename, "w") as outfile: translation_filemap = downloadTranslationFilemap(lang) json.dump(translation_filemap, outfile) return translation_filemap
def findAllLanguages(): """ Acquire a dictionary language -> lang ID from Crowdin """ "Find a list of Crowdin language codes to which KA is translated to" # Acquire language list print(black("Fetching language list...", bold=True)) response = requests.get("https://crowdin.com/project/khanacademy") txt = response.text langs = re.findall(r"https?://[a-z0-9]*\.cloudfront\.net/images/flags/([^\.]+)\.png", txt) print(black("Fetching language IDs...", bold=True)) # English is the source language if "en-US" in langs: langs.remove("en-US") # Fetch lang IDs in parallel pool = Pool(32) return dict(zip(langs, pool.map(fetchLanguageID, langs)))
def fetchVideoMap(pool, lang): "Deprecated: Fetch individual video dubs" print( black("Fetching dubbed video list for lang {0}".format(lang), bold=True)) dubbedVideoIDs = findDubbedVideos(lang) print( black("Fetching {0} dubbed video URLs for lang {1}".format( len(dubbedVideoIDs), lang), bold=True)) fn = functools.partial(getTranslatedVideoId, lang=lang) videoURLs = pool.map(fn, dubbedVideoIDs) # Remap return { videoId: url for videoId, url in zip(dubbedVideoIDs, videoURLs) if url is not None }
def updateVideoMap(args): """ Generates VideoTranslations.json from Khan Academy data """ pool = Pool(48) languages = list(sorted(list(findAllLanguages()))) print(black("Fetching language videomaps", bold=True)) langresults = pool.map(fetchVideoTranslationsCSV, languages) videoMap = defaultdict(dict) for language, langresult in zip(languages, langresults): # Insert results into main language map for slug, url, orig_url in langresult: videoMap[slug][language] = url videoMap[slug]["en"] = orig_url print(black("Writing JSON videomap...", bold=True)) with open(os.path.join("cache", "VideoMap.json"), "w") as outfile: json.dump(videoMap, outfile)
def loadTranslations(conn, recordTable=1, indexTable=2): """ Loads all PO strings in the database and builds a Polyglott Index """ print(black("Deleting old tables...", bold=True)) # Delete old tables (implicitly closes tables) conn.deleteRange(recordTable, startKey=None, endKey=None) conn.deleteRange(indexTable, startKey=None, endKey=None) print(black("Deleting old tables...", bold=True)) # Table 1 stores msgid => NUL-separated list of records # Record: Langcode (KA-style) + ASCII record separator (0x1D) + msgstr conn.openTable(recordTable, mergeOperator="NULAPPEND") # Table 2 stores key => msgid # where "key" is any msgid or msgstr. # The appropriate values can be looked up in table 1 using the msgid key conn.openTable(indexTable, mergeOperator="REPLACE") for lang, langpath in findAvailableLanguages().items(): print(black("Reading PO files for language {}".format(lang, bold=True))) for filename in findPOFiles(langpath): # NOTE: This loop writes one value dict per file per file to # avoid tons of Python function calls. # The large values are be handled in C++ code efficiently. print("\tProcessing {}".format(filename)) po = polib.pofile(filename) # Write table 1 values = {entry.msgid: lang + "\x1D" + entry.msgstr for entry in po if entry.msgstr.strip()} conn.put(recordTable, values) # Write table 2 (index) values = {entry.msgstr: entry.msgid for entry in po if entry.msgstr.strip()} values2 = {entry.msgid: entry.msgid for entry in po} conn.put(indexTable, values) conn.put(indexTable, values2) # Perform anticipatory compation on both tables print(black("Compacting language table...", bold=True)) conn.compact(recordTable) print(black("Compacting index table...", bold=True)) conn.compact(indexTable)
def performRender(args): # Download / update if requested if args.download: download() # Create directory if not args.outdir: args.outdir = "output-{0}".format(args.language) if not os.path.isdir(args.outdir): os.mkdir(args.outdir) renderer = HTMLHitRenderer(args.outdir, args.language) # Generate HTML if not args.no_lint: print(black("Rendering lint...", bold=True)) success = False for i in range(25): try: renderer.renderLintHTML() success = True break except NoResultException: print(red("Lint fetch error, retrying...")) if not success: print(red("Lint fetch error (retries exhausted)", bold=True)) if not args.only_lint: # Import potDir = os.path.join("cache", args.language) print(black("Reading files from {0} folder...".format(potDir), bold=True)) poFiles = readPOFiles(potDir) print(black("Read {0} files".format(len(poFiles)), bold=True)) # Compute hits print(black("Computing rules...", bold=True)) renderer.computeRuleHitsForFileSet(poFiles) # Ensure the HUGE po stuff goes out of scope ASAP poFiles = None # Generate HTML print(black("Rendering HTML...", bold=True)) renderer.hitsToHTML() # Generate filestats.json print (black("Generating JSON API files...", bold=True)) renderer.writeStatsJSON() # If data is present, generate subtitle information videosJSONPath = os.path.join("cache", "videos.json") if os.path.isfile(videosJSONPath): print (black("Rendering subtitles overview...", bold=True)) with open(videosJSONPath) as infile: exercises = json.load(infile) subtitleTemplate = renderer.env.get_template("subtitles.html") writeToFile(os.path.join(args.outdir, "subtitles.html"), subtitleTemplate.render(exercises=exercises))
def performRender(args): # Download / update if requested if args.download: download() # Create directory if not args.outdir: args.outdir = "output" os.makedirs(args.outdir, exist_ok=True) renderer = JSONHitRenderer(args.outdir, args.language, args.num_processes) # Import potDir = os.path.join("cache", args.language) xliffFiles = findXLIFFFiles(potDir, filt=args.filter) print( black("Reading {} files from {} folder...".format( len(xliffFiles), potDir), bold=True)) # Compute hits print(black("Computing rules...", bold=True)) renderer.computeRuleHitsForFileSet(xliffFiles) # Generate HTML print(black("Rendering HTML...", bold=True)) renderer.exportHitsAsJSON() # Generate filestats.json print(black("Generating JSON API files...", bold=True)) renderer.writeStatsJSON() # If data is present, generate subtitle information videosJSONPath = os.path.join("cache", "videos.json") if os.path.isfile(videosJSONPath): print(black("Rendering subtitles overview...", bold=True)) with open(videosJSONPath) as infile: exercises = json.load(infile) subtitleTemplate = renderer.env.get_template("subtitles.html") writeToFile(os.path.join(args.outdir, "subtitles.html"), subtitleTemplate.render(exercises=exercises))
def ensureFileIsPresent(asset, directory, forceDownload=False): (filename, url) = asset filepath = os.path.join(directory, filename) if url is None: # --> no need to download return if not os.path.isfile(filepath) or forceDownload: #Create directory if required dirname = os.path.dirname(filepath) if not os.path.exists(dirname): os.makedirs(dirname) #Perform download print(black("Downloading %s" % filename, bold=True)) urllib.request.urlretrieve(url, filepath)
def autobuild_python_package(nameorurl, suffix, version=None, depends=[], build_depends=[], py2=True, py3=True, remove_pyc=False): """ Automatically build a python package. Takes either a git+http(s) or git:// URL or a package name. In case of a package name, the package is downloaded from PyPI """ if nameorurl.startswith(("git+https://", "git+http://", "git://")) or \ (nameorurl.startswith(("http://", "https://")) and nameorurl.endswith(".git")): # Git package download pkgname = nameorurl.rpartition("/")[2].rstrip(".git") print(black("Cloning {} {}...".format(pkgname, "(branch {})".format(version) if version else ""), bold=True)) set_name(pkgname.lower()) git_clone(nameorurl, branch=version) else: # Normal package download set_name(nameorurl) remove_old_buildtree() print(black("Fetching latest PyPI revision of {}".format(nameorurl), bold=True)) pkgversion, url = find_latest_pypi_version(get_name(), forced_version=version) # Download and extract archive print(black("Downloading & extracting {}-{}".format( nameorurl, pkgversion), bold=True)) wget_download(url) # Remove pyc (workaround) if enabled if remove_pyc: cmd("find . -name '*.pyc' -print0 | xargs -0 rm") # Perform build of deb package using stdeb print(black("Building deb package", bold=True)) build_stdeb(suffix, py2, py3, depends=depends, build_depends=build_depends) # Move deb packages to current directory print(black("Moving build result", bold=True)) cmd("find deb_dist -maxdepth 1 -type f -exec mv {} .. \;")
def computeRuleHits(self, po, filename="[unknown filename]"): """ Compute all rule hits for a single parsed PO file and return a list of futures that return (filename, rule, results tuples). po must be a function that returns a pofile object """ def _wrapper(rule, po, filename): # print(" {} => Rule {}".format(filename, rule)) return (filename, rule, list(rule.apply_to_po(po, filename=filename))) # Actually read PO file print(black("Reading {} ...".format(filename), bold=True)) po = po() futures = [self.executor.submit(_wrapper, rule, po, filename) for rule in self.rules] return futures
def delayedIndexPattern(lang, pattern, delay=15): if pattern in currently_indexing: print(black("Ignoring index request for '{}', already in queue".format(pattern), bold=True)) return # Avoid duplicate indexing currently_indexing.append(pattern) # Delay time.sleep(delay) # Allow DB to sync # Index with both relevant_for_live settings executor.submit(index_pattern, client, lang, pattern, True) executor.submit(index_pattern, client, lang, pattern, False) # Remove from queue try: currently_indexing.remove(pattern) except ValueError: pass
def export_lang_to_db(lang, filt): count = 0 indexer = TextTagIndexer(lang) for file in findXLIFFFiles("cache/{}".format(lang), filt=filt): # e.g. '1_high_priority_platform/about.donate.xliff' canonicalFilename = "/".join(file.split("/")[2:]) print(black(file, bold=True)) soup = parse_xliff_file(file) for entry in process_xliff_soup(soup, also_approved=True): indexer.add(entry.Source, entry.Translated, file.rpartition("/")[2], entry.IsApproved) # Stats count += 1 if count % 1000 == 0: print("Processed {} records".format(count)) return ttt(lang, list(indexer._convert_to_json()))
def renderLint(outdir, kalangcode): "Parse & render lint" # Map from KA code to crowdin code langMap = {"pt": "pt-BR", "pt-pt": "pt-PT", "ur": "ur-PK", "es": "es-ES", "hy": "hy-AM"} lang = langMap[kalangcode] if kalangcode in langMap else kalangcode print(black("Rendering lint for {0} ({1})".format(lang, kalangcode), bold=True)) # Generate lint report lintFilename = os.path.join("cache", "{0}-lint.csv".format(kalangcode)) if os.path.isfile(lintFilename): lintEntries = list(readAndMapLintEntries(lintFilename, lang)) # Write JSON jsonEntries = list(map(operator.methodcaller("_asdict"), lintEntries)) os.makedirs(os.path.join(outdir, lang), exist_ok=True) writeJSONToFile(os.path.join(outdir, lang, "lint.json"), jsonEntries) else: print("Skipping lint ({0} does not exist)".format(lintFilename))
def _translate(entry, translator, force=False, tries_left=5): engl = entry["english"] if not force and entry["translated"] is not None and entry["translated"] == "": return entry # leave as is try: transl = translator[tries_left].translate(engl) if transl is None: if tries_left > 0: print("Retrying '{}'".format(engl)) return _translate(entry, translator, force, tries_left - 1) else: print(red("Wont retry '{}'".format(engl))) except: print(black("Autotranslate fail for string '{}'".format(engl), bold=True)) traceback.print_exception(*sys.exc_info()) return entry entry["translated"] = transl #print("{} ==> {}".format(engl, entry["translated"])) return entry
def checkExcellonMetric(self, filepath): "Check if a given file is a metric excellon file" filename = os.path.basename(filepath) lines = readFileLines(filepath) #Check for excellon header if lines[0] != "M48": print red("Can't find Excellon drill header (M48) in %s" % filename, bold="True") #Check for metric dimension: Line like METRIC,0000.00 if lines[1].partition(",")[0] != "METRIC": print red("Excellon drill program %s does not seem to be metric" % filename, bold="True") # # Drill statistics # toolStats = extractToolStatistics(lines) print(black(self.name + ":", bold=True)) for diameter, numDrills in toolStats.iteritems(): print("\t%d through holes of diameter %.2fmm" % (numDrills, diameter)) #Print "None" if there are no holes in this file if not toolStats: print "\tNone"
def checkExcellonMetric(self, filepath): "Check if a given file is a metric excellon file" filename = os.path.basename(filepath) lines = readFileLines(filepath) #Check for excellon header if lines[0] != "M48": print(red("Can't find Excellon drill header (M48) in %s" % filename, bold="True")) #Check for metric dimension: Line like METRIC,0000.00 if lines[1].partition(",")[0] != "METRIC": print(red("Excellon drill program %s does not seem to be metric" % filename, bold="True")) # # Drill statistics # toolStats = extractToolStatistics(lines) print(black(self.name + ":", bold=True)) for diameter, numDrills in toolStats.items(): print("\t%d through holes of diameter %.2fmm" % (numDrills, diameter)) #Print "None" if there are no holes in this file if not toolStats: print("\tNone")
def parse_check_format(intv, verbose=False): """ Checks if a string could contain an interval. This function just exists to shorten read_timeline() to something that Python won't throw warnings about. There's no need to call this outside that function. :param intv: is the string to check. :param verbose: is an optional flag. When true, extra parsing information is printed to the console. Defaults to false. """ # if the string is shorter than 5 characters, it can't be [#] [#] [#], since that's # 3 numbers + 2 spaces. if it is less than 5, it's not an interval if len(intv) < 5: if verbose: print(black("\tinvalid length > ", bold=True), end='') print(add_quotes(intv)) return False # haven't ruled it out yet if verbose: print(blue("\tpossible interval > ", bold=True), end='') print(add_quotes(intv)) return True
def renderLint(outdir, kalangcode): "Parse & render lint" # Map from KA code to crowdin code langMap = { "pt": "pt-BR", "pt-pt": "pt-PT", "ur": "ur-PK", "es": "es-ES", "hy": "hy-AM" } lang = langMap[kalangcode] if kalangcode in langMap else kalangcode print( black("Rendering lint for {0} ({1})".format(lang, kalangcode), bold=True)) # Generate lint report lintFilename = os.path.join("cache", "{0}-lint.csv".format(kalangcode)) if os.path.isfile(lintFilename): lintEntries = list(readAndMapLintEntries(lintFilename, lang)) # Write JSON jsonEntries = list(map(operator.methodcaller("_asdict"), lintEntries)) os.makedirs(os.path.join(outdir, lang), exist_ok=True) writeJSONToFile(os.path.join(outdir, lang, "lint.json"), jsonEntries) else: print("Skipping lint ({0} does not exist)".format(lintFilename))
def performRender(args): # Download / update if requested if args.download: download() # Create directory if not args.outdir: args.outdir = "output" os.makedirs(args.outdir, exist_ok=True) renderer = JSONHitRenderer(args.outdir, args.language) # Import potDir = os.path.join("cache", args.language) print(black("Reading files from {0} folder...".format(potDir), bold=True)) poFiles = readPOFiles(potDir) print(black("Read {0} files".format(len(poFiles)), bold=True)) # Compute hits print(black("Computing rules...", bold=True)) renderer.computeRuleHitsForFileSet(poFiles) # Ensure the HUGE po stuff goes out of scope ASAP del poFiles # Generate HTML print(black("Rendering HTML...", bold=True)) renderer.exportHitsAsJSON() # Generate filestats.json print (black("Generating JSON API files...", bold=True)) renderer.writeStatsJSON() # If data is present, generate subtitle information videosJSONPath = os.path.join("cache", "videos.json") if os.path.isfile(videosJSONPath): print (black("Rendering subtitles overview...", bold=True)) with open(videosJSONPath) as infile: exercises = json.load(infile) subtitleTemplate = renderer.env.get_template("subtitles.html") writeToFile(os.path.join(args.outdir, "subtitles.html"), subtitleTemplate.render(exercises=exercises))
def add_quotes(message): """ Adds grey quotes around text. :param message: is the string to put quotes around. """ return black("\"", bold=True) + str(message) + black("\"", bold=True)
try: slug, orig_vid, vid = row[8], row[5], row[6] # Ignore non translated videos if not vid: continue url_tpl = "https://www.youtube.com/watch?v={0}" url = url_tpl.format(vid) orig_url = url_tpl.format(orig_vid) result.append((slug, url, orig_url)) except IndexError: continue return result if __name__ == "__main__": pool = Pool(48) languages = list(sorted(list(findAllLanguages()))) print(black("Fetching language videomaps", bold=True)) langresults = pool.map(fetchVideoTranslationsCSV, languages) videoMap = defaultdict(dict) for language, langresult in zip(languages, langresults): # Insert results into main language map for slug, url, orig_url in langresult: videoMap[slug][language] = url videoMap[slug]["en"] = orig_url print(black("Writing JSON videomap...", bold=True)) with open("VideoMap.json", "w") as outfile: json.dump(videoMap, outfile)
ExpectedFile(".top", "Top copper layer", "RS-274X", checkCopperLayer), ExpectedFile(".bot", "Bottom copper layer", "RS-274X", checkCopperLayer), ExpectedFile(".smt", "Solder mask top", "RS-274X", checkGerberFile), ExpectedFile(".smb", "Solder mask bottom", "RS-274X", checkGerberFile), ExpectedFile(".plt", "Silk screen top", "RS-274X", checkGerberFile), ExpectedFile(".mil", "Board outline", "RS-274X", checkBoardOutline), #Drilling ExpectedFile(".pth", "Plated through holes", "Excellon", checkExcellonMetric), ExpectedFile(".npth", "Non-plated through holes", "Excellon", checkExcellonMetric), ] if __name__ == "__main__": #Parse commandline arguments import argparse parser = argparse.ArgumentParser() parser.add_argument("directory", help="The directory to scan for project Gerber file") parser.add_argument("--gerbv", action="store_true", help="Run gerbv on the files") args = parser.parse_args() #Perform check files = os.listdir(args.directory) projectName = extractProjectPrefix(files) print black("Project name: %s" % projectName) checkedFiles = [checkFile(args.directory, f, projectName) for f in expectedFiles] unknownFiles = set(files) - set(checkedFiles) if unknownFiles: print red("Found unknown files: %s" % ",".join(unknownFiles)) #Open viewer if enabled if args.gerbv: filePaths = [os.path.join(args.directory, f) for f in files] subprocess.call(["gerbv"] + filePaths)
def read_timeline(filename, verbose=False): """ Reads a timeline of intervals from a text file. Returns the timeline and if there are errors. :param filename: is the path to the timeline file. :param verbose: is an optional flag. When true, extra parsing information is printed to the console. Defaults to false. """ # make timeline array timeline = [] steps = 0 count, total = 0, 0 # Yes, this is the second part where this is checked. Gotta be sure. if not file_exists(filename): if verbose: print(add_quotes(filename), "is not a file") return False errors = False with open(filename) as lines: for num, line in enumerate(lines): line = line.strip() if verbose: print("reading line {} >".format(num + 1), add_quotes(line)) if not line: if verbose: print(black("Skipping blank line\n", bold=True)) continue if line.startswith("#"): if verbose: print(black("Skipping comment line\n", bold=True)) continue # if you find the comment symbol, ignore everything after it comment_pos = line.find("#") if comment_pos != -1: if verbose: print(black("\tRemoving comment > ", bold=True), end='') print(add_quotes(line[comment_pos:])) print(black("\tParsing remaining line > ", bold=True), end='') print(add_quotes(line[:comment_pos])) line = line[:comment_pos] if count == 0 and steps == 0: found_steps = False try: nums_in_line = re.findall(r'\d+', line) if len(nums_in_line) == 1: steps = int(nums_in_line[0]) found_steps = True except (ValueError, IndexError): pass if verbose: if found_steps: print(green("\tFound step count:", bold=True), steps) else: print( yellow( "\tFailed to find step count before first interval.", bold=True)) print( black("\t\tSetting step count to default:", bold=True), DEFAULT_STEPS_IN_TIMELINE) if found_steps: continue intvs = line.split(",") timeline.append([]) for intv in intvs: intv = intv.strip() # see if it's in the form [#] [#] [#] if not parse_check_format(intv, verbose): errors = True continue params = intv.split() # check that each number is legit if not parse_check_numbers(params, verbose): errors = True continue # use those valid numbers to make an interval new_interval = Interval(int(params[0]), int(params[1]), int(params[2])) if verbose: print(green("\t\tinterval >"), interval_to_string(new_interval)) total = total + 1 timeline[count].append(new_interval) # if it's run through the line and not added any intervals... if not timeline[count]: if verbose: print(yellow("no intervals found. Skipping line.")) del timeline[-1] else: if verbose: print(green("intervals found:"), len(timeline[count])) count = count + 1 if verbose: print() # newline if verbose: print("reached end of file.") print("found {} intervals across {} channels.".format( total, len(timeline))) if steps == 0: steps = DEFAULT_STEPS_IN_TIMELINE return timeline, steps, errors
ExpectedFile(".top", "Top copper layer", "RS-274X", checkCopperLayer), ExpectedFile(".bot", "Bottom copper layer", "RS-274X", checkCopperLayer), ExpectedFile(".smt", "Solder mask top", "RS-274X", checkGerberFile), ExpectedFile(".smb", "Solder mask bottom", "RS-274X", checkGerberFile), ExpectedFile(".plt", "Silk screen top", "RS-274X", checkGerberFile), ExpectedFile(".mil", "Board outline", "RS-274X", checkBoardOutline), #Drilling ExpectedFile(".pth", "Plated through holes", "Excellon", checkExcellonMetric), ExpectedFile(".npth", "Non-plated through holes", "Excellon", checkExcellonMetric), ] if __name__ == "__main__": #Parse commandline arguments import argparse parser = argparse.ArgumentParser() parser.add_argument("directory", help="The directory to scan for project Gerber file") parser.add_argument("--gerbv", action="store_true", help="Run gerbv on the files") args = parser.parse_args() #Perform check files = os.listdir(args.directory) projectName = extractProjectPrefix(files) print(black("Project name: %s" % projectName)) checkedFiles = [checkFile(args.directory, f, projectName) for f in expectedFiles] unknownFiles = set(files) - set(checkedFiles) if unknownFiles: print(red("Found unknown files: %s" % ",".join(unknownFiles))) #Open viewer if enabled if args.gerbv: filePaths = [os.path.join(args.directory, f) for f in files] subprocess.call(["gerbv"] + filePaths)
black("Fetching {0} dubbed video URLs for lang {1}".format( len(dubbedVideoIDs), lang), bold=True)) fn = functools.partial(getTranslatedVideoId, lang=lang) videoURLs = pool.map(fn, dubbedVideoIDs) # Remap return { videoId: url for videoId, url in zip(dubbedVideoIDs, videoURLs) if url is not None } if __name__ == "__main__": #Download exercise list print(black("Downloading master exercise list...", bold=True)) exercises = getExercises() #Download videos print( black("Downloading exercise video list for {0} exercises...".format( len(exercises)), bold=True)) pool = Pool(32) exVideos = pool.map(getExerciseVideos, [e["name"] for e in exercises]) with open("fo.json", "w") as outf: json.dump(exVideos, outf) # Perform mapping print(black("Mapping videos...", bold=True)) result = [] allVideoIDs = set() # Need that for subtitle mapping for exercise, videos in zip(exercises, exVideos):
def checkBoardOutline(self, filepath): filename = os.path.basename(filepath) #Basic gerber checks checkGerberFile(self, filepath) #Compute board outline millLines = readFileLines(filepath) # Find factors to get absolute coordinates: x_factor, y_factor = findCoordinateFormat(millLines) # Initialize X & Y x, y = 0, 0 #We can only interpret the file if coordinates are absolute if not "G90*" in millLines: print(yellow("Mill coordinates in %s don't seem to be absolute (G90 missing!)" % filename)) return #Determine coordinate units unit = parseGerberUnit(millLines) if unit is None: #Neither inch nor mm found print(yellow("Could not find coordinate units (mm/in) in %s" % filename)) return #Parse the aperture list apertures = parseGerberApertures(millLines) selectApertureRegex = re.compile(r"(D\d+)\*") move2DRegex = re.compile(r"X(-?\d+)Y(-?\d+)D(\d+)\*") #Move (D2) or draw (D1) move1DRegex = re.compile(r"([XY])(-?\d+)D(\d+)\*") #With only one coordinate #Try to interpret gerber file minCoords = (sys.maxsize, sys.maxsize) maxCoords = (0, 0) lastCoords = (0, 0) currentAperture = None apertureUseCount = Counter() for line in millLines: if selectApertureRegex.match(line): apertureCode = selectApertureRegex.match(line).group(1) currentAperture = findAperture(apertures, apertureCode) elif move2DRegex.match(line): match = move2DRegex.match(line) x = int(match.group(1)) / x_factor y = int(match.group(2)) / y_factor apertureUseCount[currentAperture] += 1 elif move1DRegex.match(line): match = move1DRegex.match(line) apertureUseCount[currentAperture] += 1 if match.group(1) == "X": x = int(match.group(2)) / x_factor y = lastCoords[1] elif match.group(1) == "Y": x = lastCoords[0] y = int(match.group(2)) / y_factor else: raise Exception("Internal error: Invalid coordinate type in 1D move: %s" % match.group(1)) else: continue #Compute min/max coordinates lastCoords = (x, y) minCoords = (min(minCoords[0], lastCoords[0]), min(minCoords[1], lastCoords[1])) maxCoords = (max(maxCoords[0], lastCoords[0]), max(maxCoords[1], lastCoords[1])) #Compute board size (minimum enclosing rectangle) boardSize = (maxCoords[0] - minCoords[0], maxCoords[1] - minCoords[1]) # Compute size of most common aperture mostCommonAperture = apertureUseCount.most_common(1)[0][0] # info print(black("\tGerber offset: ({1:.2f} {0}, {2:.2f} {0})".format(unit, minCoords[0], minCoords[1]))) print(black("\tBoard size (minimum rectangle): %.1f %s x %.1f %s" % \ (boardSize[0], unit, boardSize[1], unit)))
def process_xliff_soup(filename, soup, autotranslator, indexer, autotranslate=True, preindex=False, overwrite=False, postproc=identity): """ Remove both untranslated and notes from the given soup. For the untranslated elements, in """ overall_count = 0 untranslated_count = 0 translated_count = 0 autotranslated_count = 0 # Iterate over all translatable strings body = soup.xliff.file.body # Resulting elements results = [] indexFN = indexer.preindex if preindex else indexer.add for trans_unit in body.children: #body.find_all("trans-unit"): # Ignore strings if not isinstance(trans_unit, bs4.element.Tag): continue # Ignore other tags if trans_unit.name != "trans-unit": print("Encountered wrong tag: {}".format(trans_unit.name)) continue overall_count += 1 source = trans_unit.source target = trans_unit.target # Broken XLIFF? if target is None: print(trans_unit.prettify()) continue note = trans_unit.note is_untranslated = ("state" in target.attrs and target["state"] == "needs-translation") is_approved = ("approved" in trans_unit.attrs and trans_unit["approved"] == "yes") can_overwrite = not is_untranslated and not is_approved will_overwrite = overwrite and can_overwrite engl = source.text translated = target.text # Index tags in the indexer (e.g. to extract text tags) # This is done even if they are translated # NOTE: This does index or preindex (chosen outside of the loop) indexFN(engl, None if is_untranslated else translated, filename=filename, approved=is_approved) # For indexing run, ignore autotranslator altogether if not autotranslate and not will_overwrite: trans_unit.decompose() continue # Remove entire tag if translated (or suggested) if not is_untranslated and not will_overwrite: trans_unit.decompose() translated_count += 1 continue # Dont try to autotranslate etc untranslated_count += 1 # Remove HUGE note text to save space if note: note.decompose() # Remove empty text inside the <trans-unit> element to save space for c in trans_unit.contents: c.extract() # Now we can try to autotranslate try: autotrans = postproc(autotranslator.translate(engl)) except: print( black("Autotranslate fail for string '{}'".format(engl), bold=True)) traceback.print_exception(*sys.exc_info()) autotrans = None if autotrans is None: # Could not translate # Remove from output file to conserve space trans_unit.decompose() else: # Could autotranslate # Store autotranslation in XML target["state"] = "translated" target.string = autotrans autotranslated_count += 1 # Add to result list results.append(trans_unit.extract()) # Remove empty text content of the body to conserve spce body.contents = results # Print stats if autotranslate: if untranslated_count != 0: # Don't print "0 of 0 strings" print( black( "Autotranslated {} of {} untranslated strings ({} total) in {}" .format(autotranslated_count, untranslated_count, overall_count, os.path.basename(filename)))) else: print( black("{} {} strings in {}".format( "Preindexed" if preindex else "Indexed", overall_count, os.path.basename(filename)))) return autotranslated_count
print(black("Fetching dubbed video list for lang {0}".format(lang), bold=True)) dubbedVideoIDs = findDubbedVideos(lang) print(black("Fetching {0} dubbed video URLs for lang {1}" .format(len(dubbedVideoIDs), lang), bold=True)) fn = functools.partial(getTranslatedVideoId, lang=lang) videoURLs = pool.map(fn, dubbedVideoIDs) # Remap return {videoId: url for videoId, url in zip(dubbedVideoIDs, videoURLs) if url is not None} if __name__ == "__main__": #Download exercise list print(black("Downloading master exercise list...", bold=True)) exercises = getExercises() #Download videos print(black("Downloading exercise video list for {0} exercises..." .format(len(exercises)), bold=True)) pool = Pool(32) exVideos = pool.map(getExerciseVideos, [e["name"] for e in exercises]) with open("fo.json","w") as outf:json.dump(exVideos, outf) # Perform mapping print(black("Mapping videos...", bold=True)) result = [] allVideoIDs = set() # Need that for subtitle mapping for exercise, videos in zip(exercises, exVideos): current_videos = [] for video in videos: current_videos.append({