def suffixStyleNaming(input_file: Path, style_suffix: str): print(F"adding suffix '{style_suffix}' to styles in: {input_file.name}") output_tmp = input_file.append_stem("_tmp") new_file = [] with input_file.open('r', encoding='utf-8-sig') as f: for line in f: line = line.strip() m = DIALOGUE_RE.search(line) if ASS_STYLE_RE.match(line): styleDict = mkvstuff.style_to_dict(line) styleDict["name"] = styleDict["name"] + "_" + style_suffix new_file.append(mkvstuff.dict_to_style(styleDict)) elif m: gd = m.groupdict() newLine = gd["part1"] + gd["style_name"] + "_" + style_suffix + gd["part2"] new_file.append(newLine) elif "Dialogue:" in line or "Comment:" in line: print("Failed to rx parse: \n " + line) print("The subfile is probably f****d, lack of proper QA in fansubs..") exit(1) else: new_file.append(line) with output_tmp.open('w', encoding='utf-8-sig') as nf: nf.write("\n".join(new_file)) input_file.unlink() output_tmp.move(input_file) return input_file
def replaceSubFileWith(input_file: Path, sub_file: Path, output_folder: Path): output_tmp1 = output_folder.joinpath(input_file.append_stem('_fxd_sub_tmp1').name) cmd = [ 'ffmpeg', '-y', '-i', F'{input_file}', '-map_metadata', '0', '-sn', '-c', 'copy', F'{output_tmp1}' ] common.run_process(cmd, silent=True) output_tmp = output_folder.joinpath(input_file.append_stem('_fxd_sub').name) cmd = [ 'ffmpeg', '-y', '-i', F'{output_tmp1}', '-i', F'{sub_file}', '-map_metadata', '0', '-c', 'copy', F'{output_tmp}' ] common.run_process(cmd, silent=True) output_tmp1.unlink() return output_tmp
def buildSegmentList(self, input_file: Path, segmentList: dict, outputDirectory: Path = None, fullOutputDirectory: Path = None): if not outputDirectory and not fullOutputDirectory: raise Exception( "mergeSegmentsIntoFile requires either outputDirectory or fullOutputDirectory to be set" ) split_times = [] for i in sorted(segmentList.keys()): seg = segmentList[i] if seg["segment_uid"] is None and ( segmentList[i + 1]["segment_uid"] if (i + 1) < len(segmentList) else True): if "time_end" not in seg: if i + 1 in segmentList: split_times.append( (i, segmentList[i + 1]["time_start"])) elif i - 1 in segmentList and 'time_end' in segmentList[i - 1]: split_times.append((i, segmentList[i - 1]["time_end"])) else: split_times.append((i, seg["time_end"])) else: if seg["segment_uid"] in self.sourceFiles: if self.args.re_encode: print( F'Re-encoding {self.sourceFiles[seg["segment_uid"]]}' ) segmentList[i]["file_path"] = mkvstuff.reEncodeFile( self.sourceFiles[seg["segment_uid"]], outputDirectory) else: segmentList[i]["file_path"] = self.sourceFiles[ seg["segment_uid"]] output_file: Path = None if fullOutputDirectory: output_file = fullOutputDirectory else: output_file = outputDirectory.joinpath( input_file.change_stem("parts").name) split_files = mkvstuff.splitFilesByTimeCodes( input_file, split_times, output_file, viaFfmpeg=self.args.via_ffmpeg, reEncode=self.args.re_encode) print(F"Files are: {','.join(str(x) for x in split_files)}") for i, seg in segmentList.items(): if i in split_files: segmentList[i]["file_path"] = split_files[i] return segmentList
def extract_chapter(input_file: Path, output_folder: Path = Path(".")) -> Path: cmd = ['mkvextract.exe'] cmd.append(str(input_file)) cmd.append('chapters') full_output_path = output_folder.joinpath(input_file.with_suffix(".xml").name) cmd.append(str(full_output_path)) common.run_process(cmd, silent=True) return full_output_path
class PyMergeMKVLinks(): def __init__(self, args): self.args = args if self.args.re_encode: self.args.via_ffmpeg = True self.sourceFolder: Path = self.args.sourceDir[0] self.outputFolder: Path = self.args.destDir[0] self.outputFolder.mkdir(exist_ok=True) self.sourceFiles = self.generateFileList() if not len(self.sourceFiles): print(F"Found no files with segments in: {self.sourceFolder}") exit(1) self.tmpDir = Path("_unlink_temp/") self.tmpDir.mkdir(exist_ok=True) self.processFiles() def generateFileList(self): segs = {} for f in self.sourceFolder.listfiles(): js = mkvstuff.mkvJson(f) if "properties" in js["container"]: segs[js["container"]["properties"]["segment_uid"]] = f return segs def processFiles(self): for i, (segmentUid, sourceFile) in enumerate(self.sourceFiles.items()): sourceFile: Path tmpOutDir = self.tmpDir.joinpath(str(i)) if segments := mkvstuff.getChapterDict(sourceFile): if not len([x for x in segments.values() if x["segment_uid"]]): self.plainCopy(sourceFile, self.outputFolder.joinpath(sourceFile.name)) continue tmpOutDir.mkdir(parents=True, exist_ok=True) if segmentList := self.buildSegmentList( sourceFile, segments, tmpOutDir): new_chapter = None if self.args.chapters: new_chapter = tmpOutDir.joinpath("new_chapter.xml") with new_chapter.open('w', encoding='utf-8') as f: f.write( mkvstuff.segmentListToChapterFile(segmentList)) self.buildMkvFromSegments(segmentList, self.outputFolder.joinpath( sourceFile.name), self.tmpDir.joinpath(str(i)), chapter=new_chapter)
def __init__(self, args): self.args = args if self.args.re_encode: self.args.via_ffmpeg = True self.sourceFolder: Path = self.args.sourceDir[0] self.outputFolder: Path = self.args.destDir[0] self.outputFolder.mkdir(exist_ok=True) self.sourceFiles = self.generateFileList() if not len(self.sourceFiles): print(F"Found no files with segments in: {self.sourceFolder}") exit(1) self.tmpDir = Path("_unlink_temp/") self.tmpDir.mkdir(exist_ok=True) self.processFiles()
def getStylesFromAssFile(input_file: Path): styles = [] with input_file.open("r", encoding="utf-8-sig") as f: for line in f: if ASS_STYLE_RE.match(line): styles.append(line.strip()) return styles
def extract_first_subtitle(input_file: Path, output_folder: Path = None): output = input_file.change_suffix(".ass") if output_folder: output = output_folder.joinpath(input_file.change_suffix(".ass").name) cmd = [ 'ffmpeg', '-y', '-i', F'{input_file}', '-vn', '-an', F'{output}' ] common.run_process(cmd, silent=True) return output
def get_attachment_list(p): dat = mkvstuff.mkvJson(p) flist = [] if 'attachments' not in dat: return flist for a in dat['attachments']: p = Path(a['file_name']) flist.append({'id': a['id'], "type": "attachments", "ext": p.suffix, 'delay': 0, 'lang': 'eng', 'codec': a['content_type'], 'name': a['file_name']}) return flist
def replaceAssStylesWithList(input_file: Path, style_list: dict): print(F"replacing styles in: {input_file.name}") output_tmp = input_file.append_stem("_tmp") new_file = [] with input_file.open('r', encoding='utf-8-sig') as f: for line in f: line = line.strip() if re.match(r'^Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding$', line): new_file.append(line) for styleName, styleDict in style_list.items(): new_file.append(mkvstuff.dict_to_style(styleDict)) elif not ASS_STYLE_RE.match(line): new_file.append(line) with output_tmp.open('w', encoding='utf-8-sig') as nf: nf.write("\n".join(new_file)) input_file.unlink() output_tmp.move(input_file) return input_file
for i, seg in segmentList.items(): if i in split_files: segmentList[i]["file_path"] = split_files[i] return segmentList if __name__ == "__main__": parser = argparse.ArgumentParser(description="") parser.set_defaults(which="main_p") parser.add_argument("sourceDir", nargs="+", type=common.folderArgument) parser.add_argument("destDir", nargs="*", type=common.folderArgument, default=(Path("./_output/"), )) parser.add_argument("-c", "--with-chapters", action="store_true", dest="chapters", default=False) parser.add_argument("-f", "--via-ffmpeg", action="store_true", dest="via_ffmpeg", default=False) parser.add_argument("-r", "--re-encode", action="store_true", dest="re_encode", default=False)
def splitFilesByTimeCodes(input_file: Path, split_times: list, output_file: Path, viaFfmpeg=True, reEncode=False) -> List[Path]: print([x for x in split_times]) if viaFfmpeg: ffmpegTimeStamps = [] ffmpegStartTime = "" parts = {} for x in split_times: if not len(ffmpegTimeStamps): ffmpegStartTime = "" ffmpegTimeStamps ffmpegTimeStamps.append((x[0], ffmpegStartTime, x[1])) ffmpegStartTime = x[1] print(F"Splitting {input_file.name} into parts...") output_file.parent.mkdir(exist_ok=True, parents=True) for fts in ffmpegTimeStamps: partId = fts[0] cmd = [ "ffmpeg", "-y" ] cmd.append("-i") cmd.append(F"{input_file}") if fts[1] != "": cmd.append("-ss") cmd.append(fts[1]) cmd.append("-to") cmd.append(fts[2]) if not reEncode: cmd.append('-c') cmd.append('copy') else: cmd.append('-map') cmd.append('0') cmd.append('-g') cmd.append('1') cmd.append('-pix_fmt') cmd.append('yuv420p') cmd.append('-c:v') cmd.append('h264_nvenc') cmd.append('-c:a') cmd.append('pcm_s16le') cmd.append('-b:v') cmd.append('2M') cmd.append('-s') cmd.append('1280x720') _output_file = output_file.parent.joinpath(F"{output_file.stem}_{partId}{output_file.suffix}") cmd.append(_output_file) print(F"\rSplitting{' and re-encoding' if reEncode else ''} Part ID#{partId}...", end="") common.run_process(cmd, silent=True) print(F"\rSplitting{' and re-encoding' if reEncode else ''} Part ID#{partId}, DONE!", end="\n") parts[fts[0]] = _output_file return parts else: ids = [x[0] for x in split_times] timeCodes = ",".join(x[1] for x in split_times) # print(timeCodes) cmd = [ "mkvmerge", "--ui-language", "en", "--output", F"{output_file}", "(", F"{input_file}", ")", "--split", F"timestamps:{timeCodes}", ] print(F"Splitting {input_file} into parts...") common.run_process(cmd, silent=True) assumed_files = {} for x in range(1, len(ids) + 1): _af = output_file.parent.joinpath(output_file.append_stem(F"-{x:03}").name) if not _af.is_file(): raise Exception(F"Assumed split part:' {_af} does not exist. Good luck.") assumed_files[ids[x-1]] = _af print(assumed_files) return assumed_files
def plainCopy(self, sourceFile: Path, destinationFile: Path): print(F"Plain copying {sourceFile} to {destinationFile}") sourceFile.copy(destinationFile)
def build_font_list(folder: Path): return folder.listfiles(('.ttf', '.otf'))
def buildMkvFromSegments(self, segmentList, output_file: Path, tmpDir: Path, chapter=None): concat_file = Path("concat.txt") font_dir = tmpDir.joinpath("fonts") font_dir.mkdir(exist_ok=True) style_list = {} for i in sorted(segmentList.keys()): seg = segmentList[i] if 'file_path' not in seg: continue mkvstuff.ext_all_fonts_to_dir(seg['file_path'], font_dir) sub_file = mkvstuff.extract_first_subtitle(seg['file_path'], tmpDir) sub_file = mkvstuff.suffixStyleNaming( sub_file, F"partid_{i}") # silly double up for styleStr in mkvstuff.getStylesFromAssFile(sub_file): styleDict = mkvstuff.style_to_dict(styleStr) style_list[styleDict["name"]] = styleDict sub_file.unlink() for i in sorted(segmentList.keys()): seg = segmentList[i] if 'file_path' not in seg: continue _fixed_sub = mkvstuff.extract_first_subtitle( seg["file_path"], tmpDir) _fixed_sub = mkvstuff.suffixStyleNaming( _fixed_sub, F"partid_{i}" ) # silly double up, but too lazy to save in memory _fixed_sub = mkvstuff.replaceAssStylesWithList( _fixed_sub, style_list) _fixed_sub_mkv = mkvstuff.replaceSubFileWith( seg["file_path"], _fixed_sub, tmpDir) segmentList[i]["file_path"] = _fixed_sub_mkv with concat_file.open("w", encoding="utf-8") as f: for i in sorted(segmentList.keys()): seg = segmentList[i] if 'file_path' not in seg: continue f.write(F"file '{seg['file_path']}'\n") _fixed_sub.unlink() output_file_tmp = output_file.append_stem('_tmp') cmd = [ 'ffmpeg', '-y', '-f', 'concat', '-safe', '0', '-i', F'{concat_file}', ] if self.args.re_encode: cmd.append('-map') cmd.append('0') cmd.append('-g') cmd.append('1') cmd.append('-pix_fmt') cmd.append('yuv420p') cmd.append('-c:v') cmd.append('h264_nvenc') cmd.append('-c:a') cmd.append('pcm_s16le') cmd.append('-b:v') cmd.append('2M') cmd.append('-s') cmd.append('1280x720') else: cmd.append('-c') cmd.append('copy') cmd.append(F'{output_file_tmp}') fonts_list = mkvstuff.build_font_list(font_dir) print( F"Merging into{' (with re-encode)' if self.args.re_encode else ''}: {output_file_tmp}" ) common.run_process(cmd, silent=True) concat_file.unlink() output_file = output_file.parent.joinpath( common.strip_crc(output_file.stem) + output_file.suffix) print(F"Merging (with chapter & fonts) into: {output_file}") cmd = [ "mkvmerge", "--ui-language", "en", "--output", F"{output_file}", "(", F"{output_file_tmp}", ")", ] if chapter: cmd.extend([ "--chapter-language", "eng", "--chapters", F"{chapter}", ]) for font in fonts_list: cmd.extend([ "--attachment-name", F"{font.name}", "--attachment-mime-type", F"{font.mime}", "--attach-file", F"{font.resolve()}" ]) common.run_process(cmd, silent=True) output_file_tmp.unlink() print("\rCalculating and appending CRC-Sum...", end='') csum = common.crc32f(output_file) output_file.move(output_file.append_stem(F' [{csum}]')) print(F"\rCalculating and appending CRC-Sum, OK: {csum}")
def crc32f(p: Path): prev = 0 for eachLine in p.open("rb"): prev = zlib.crc32(eachLine, prev) return "%08X" % (prev & 0xFFFFFFFF)
def folderArgument(v) -> Path: p = Path(v) if p.exists(): return p else: raise argparse.ArgumentTypeError('Folder %s does not exist' % (v))