def launchForACorpusGoogle(inPath, outPath=None, continueWhereWeLeftOf=True): outPath = outPath if outPath is not None else re.sub(r"[._]en", ".fr2en", re.sub(r"[._]en", ".en2fr", inPath)) with open(inPath) as cn10k: if continueWhereWeLeftOf is not True: with open(outPath, "w") as out10k: out10k.write("") lastSeenInd = float("-inf") else: with open(outPath) as out10k: lastSeenInd = 0 ouLn = out10k.readline() while ouLn: lastSeenInd += 1 ouLn = out10k.readline() with open(outPath, "a") as out10k: session = webdriver.Firefox(executable_path=u"/u/alfonsda/progs/geckoDriver/geckodriver") session.get("https://translate.google.ca/") counter = 0 cnLn = cn10k.readline() start = utilsOs.countTime() while cnLn: if counter >= lastSeenInd: cnLn = cnLn.replace("\n", "") session = chooseLangGoogleTrans(session) translation, session = writeSrcGetTrgt(session, cnLn) out10k.write("{0}\n".format(translation)) # take a coffee break if it's time if utilsOs.countTime(start) >= 600: session.close() time.sleep(random.uniform(20, 60)) start = utilsOs.countTime() # open the driver try: session = webdriver.Firefox(executable_path=u"/u/alfonsda/progs/geckoDriver/geckodriver") except OSError: time.sleep(600) session = webdriver.Firefox(executable_path=u"/u/alfonsda/progs/geckoDriver/geckodriver") session.get("https://translate.google.ca/") # next cnLn = cn10k.readline() counter += 1 session.close()
def launchForACorpusDeepL(inPath, outPath=None, continueWhereWeLeftOf=True): outPath = outPath if outPath is not None else re.sub(r"[._]en", ".fr2en", re.sub(r"[._]en", ".en2fr", inPath)) with open(inPath) as cn10k: if continueWhereWeLeftOf is not True: with open(outPath, "w") as out10k: out10k.write("") lastSeenInd = float("-inf") else: with open(outPath) as out10k: lastSeenInd = 0 ouLn = out10k.readline() while ouLn: lastSeenInd += 1 ouLn = out10k.readline() with open(outPath, "a") as out10k: session = webdriver.Firefox(executable_path=u"/u/alfonsda/progs/geckoDriver/geckodriver") session.get("https://www.deepl.com/translator") counter = 0 cnLn = cn10k.readline() start = utilsOs.countTime() while cnLn: if counter >= lastSeenInd: cnLn = cnLn.replace("\n", "") session, enFrTranslAndAlt, timeStampEn = translateOneLang(session, u"en", cnLn, len(cnLn.split(" ")), []) out10k.write("{0}\n".format(enFrTranslAndAlt[0])) # take a coffee break if it's time if utilsOs.countTime(start) >= 600: session.close() time.sleep(random.uniform(20, 60)) start = utilsOs.countTime() # open the driver try: session = webdriver.Firefox() except OSError: time.sleep(600) session = webdriver.Firefox() session.get("https://www.deepl.com/translator") # next cnLn = cn10k.readline() counter += 1 session.close()
dictCount[u"total"] += 1 if prediction is None: dictCount[u"silences"] += 1 elif prediction == 0: dictCount[u"zeros"] += 1 elif prediction == 1: dictCount[u"ones"] += 1 # next line scLn = scFile.readline() print(dictCount) # count the time the algorithm takes to run startTime = utilsOs.countTime() # extract the very problematic # print("PROBLEMATIC - FLAGGED") # extractVeryProblematic(folderPaths=[u'ALIGNMENT-QUALITY', u'MISALIGNED', u'QUALITY']) # print("PROBLEMATIC - NOT-FLAGGED") # extractVeryProblematic(folderPaths=[u'NOT-FLAGGED']) # extract the not problematic at all # print("NOT-PROBLEMATIC - FLAGGED") # extractVeryNonProblematic(folderPaths=[u'ALIGNMENT-QUALITY', u'MISALIGNED', u'QUALITY']) # print("NOT-PROBLEMATIC - NOT-FLAGGED") # extractVeryNonProblematic(folderPaths=[u'NOT-FLAGGED'])
def launchForOneDay(tokLimit=4000, outputFolderPath=u"/data/rali5/Tmp/alfonsda/workRali/004tradBureau/017deepLTranslatedCorpus/", coffeeBreak=1650): """ launches the deepL bot for one day's worth :param tokLimit: maximum number of tokens to treat in the day :param outputFolderPath: path to the folder where will be output the files :param coffeeBreak: time in seconds when to take a break and start a new deppL session :return: tokCount: number of total tokens translated """ start = utilsOs.countTime() # path to the referencer, indicating where we left off: path and last index worked referencerPath = u"/data/rali5/Tmp/alfonsda/workRali/004tradBureau/017deepLTranslatedCorpus/trRef" # info deepLUrl = u"https://www.deepl.com/translator" mUser, mPass, sUser, sPass = b000path.getDeepLProfileInfo() # for each user for user, passw in zip([sUser, mUser], [sPass, mPass]): tokCount = 0 # open the driver session = webdriver.Firefox() session.get(deepLUrl) time.sleep(random.uniform(1.3, 3.1)) # log to deepL session = authentificateBtUseSelenium(user, passw, session) # while we have not gone over the daily limit iterCount = 0 while tokCount < (tokLimit-10): # get the sp sp, filePath, fileIndex, refLns = getANewSpWhereWeLeftOff(referencerPath) session, nbOfTok, enFrTranslAndAlt, frEnTranslAndAlt, timeEn, timeFr = translateSpGetResult(session, sp) # dump the referencer lines utilsOs.dumpRawLines(refLns, referencerPath, addNewline=False, rewrite=True) # dump original sp utilsOs.appendLineToFile(sp[0], u"{0}originalSent.en".format(outputFolderPath), addNewLine=True) utilsOs.appendLineToFile(sp[1], u"{0}originalSent.fr".format(outputFolderPath), addNewLine=True) # dump translation and variants utilsOs.appendLineToFile(enFrTranslAndAlt, u"{0}translated.en2fr".format(outputFolderPath), addNewLine=True) utilsOs.appendLineToFile(frEnTranslAndAlt, u"{0}translated.fr2en".format(outputFolderPath), addNewLine=True) # dump reference utilsOs.appendLineToFile(u"{0}\t{1}\n".format(filePath, fileIndex), u"{0}reference.tsv".format(outputFolderPath), addNewLine=False) # dump timestamp utilsOs.appendLineToFile(u"{0}\tlocal time: {1}".format(timeEn, transformTimeToLocalTime(timeEn)), u"{0}timestamp.en".format(outputFolderPath), addNewLine=True) utilsOs.appendLineToFile(u"{0}\tlocal time: {1}".format(timeFr, transformTimeToLocalTime(timeFr)), u"{0}timestamp.fr".format(outputFolderPath), addNewLine=True) # add number of tokens tokCount += nbOfTok # add nb of iterations iterCount += 1 # take a coffee break if it's time if coffeeBreak is not None and utilsOs.countTime(start) >= coffeeBreak: session.close() time.sleep(random.uniform(60, 80)) start = utilsOs.countTime() # open the driver session = webdriver.Firefox() session.get(deepLUrl) time.sleep(random.uniform(1.3, 3.1)) # log to deepL session = authentificateBtUseSelenium(user, passw, session) time.sleep(random.uniform(1.0, 1.5)) # close the driver session.close() time.sleep(random.uniform(10.0, 15.0)) return tokCount, iterCount