예제 #1
0
def parse_metadata(metadatapath):
    _, ext = common.get_extension(metadatapath)
    accepted = common.config['accepted_formats']
    if ext not in accepted:
        raise MetaDataException('"%s" is not an accepted format, convert to: %s' % (
            metadatapath, ', '.join(accepted)))

    app = App()
    app.metadatapath = metadatapath
    app.id, _ = common.get_extension(os.path.basename(metadatapath))

    with open(metadatapath, 'r') as mf:
        if ext == 'txt':
            parse_txt_metadata(mf, app)
        elif ext == 'json':
            parse_json_metadata(mf, app)
        elif ext == 'xml':
            parse_xml_metadata(mf, app)
        elif ext == 'yaml':
            parse_yaml_metadata(mf, app)
        else:
            raise MetaDataException('Unknown metadata format: %s' % metadatapath)

    post_metadata_parse(app)
    return app
예제 #2
0
 def is_forbidden(self, request):
     """Returns whether this request is permitted by checking URL extension and regex
     XXX head request for mime?
     """
     forbidden = False
     url = common.to_unicode(request.url().toString().toUtf8().data()).encode('utf-8')
     if common.get_extension(url) in self.banned_extensions:
         forbidden = True
     elif re.match(self.allowed_regex, url) is None:
         forbidden = True
     return forbidden
예제 #3
0
파일: v5.py 프로젝트: w4lker/Antix
 def is_forbidden(self, request):
     """Returns whether this request is permitted by checking URL extension and regex
     XXX head request for mime?
     """
     forbidden = False
     url = common.to_unicode(request.url().url())
     if common.get_extension(url) in self.banned_extensions:
         forbidden = True
     elif re.match(self.allowed_regex, url) is None:
         forbidden = True
     return forbidden
예제 #4
0
def get_default_app_info(metadatapath=None):
    if metadatapath is None:
        appid = None
    else:
        appid, _ = common.get_extension(os.path.basename(metadatapath))

    app = App()
    app.metadatapath = metadatapath
    if appid is not None:
        app.id = appid

    return app
예제 #5
0
 def save_as(self, url, filename=None, save_dir="images"):
     """Download url and save into disk.
     """
     if url:
         _bytes = self.get(url, num_redirects=0)
         if _bytes:
             if not os.path.exists(save_dir):
                 os.makedirs(save_dir)
             save_path = os.path.join(
                 save_dir, filename or "%s.%s" % (hashlib.md5(url).hexdigest(), common.get_extension(url))
             )
             open(save_path, "wb").write(_bytes)
             return save_path
예제 #6
0
파일: download.py 프로젝트: w4lker/Antix
 def save_as(self, url, filename=None, save_dir='images'):
     """Download url and save into disk.
     """
     if url:
         _bytes = self.get(url, num_redirects=0)
         if _bytes:
             if not os.path.exists(save_dir):
                 os.makedirs(save_dir)
             save_path = os.path.join(
                 save_dir, filename or '%s.%s' %
                 (hashlib.md5(url).hexdigest(), common.get_extension(url)))
             open(save_path, 'wb').write(_bytes)
             return save_path
예제 #7
0
def main():

    global config, options

    # Parse command line...
    parser = ArgumentParser(usage="%(prog)s [options] [APPID [APPID ...]]")
    common.setup_global_opts(parser)
    parser.add_argument("-l", "--list", action="store_true", default=False,
                        help="List files that would be reformatted")
    parser.add_argument("-t", "--to", default=None,
                        help="Rewrite to a specific format")
    parser.add_argument("appid", nargs='*', help="app-id in the form APPID")
    options = parser.parse_args()

    config = common.read_config(options)

    # Get all apps...
    allapps = metadata.read_metadata(xref=True)
    apps = common.read_app_args(options.appid, allapps, False)

    if options.list and options.to is not None:
        parser.error("Cannot use --list and --to at the same time")

    supported = ['txt', 'yaml']

    if options.to is not None and options.to not in supported:
        parser.error("Must give a valid format to --to")

    for appid, app in apps.iteritems():
        base, ext = common.get_extension(app.metadatapath)
        if not options.to and ext not in supported:
            logging.info("Ignoring %s file at '%s'" % (ext, app.metadatapath))
            continue

        to_ext = ext
        if options.to is not None:
            to_ext = options.to

        if options.list:
            if not proper_format(app):
                print app.metadatapath
            continue

        with open(base + '.' + to_ext, 'w') as f:
            metadata.write_metadata(to_ext, f, app)

        if ext != to_ext:
            os.remove(app.metadatapath)

    logging.debug("Finished.")
예제 #8
0
 def valid(link):
     """Check if should crawl this link
     """
     # check if a media file
     if common.get_extension(link) not in common.MEDIA_EXTENSIONS:
         # check if a proper HTTP link
         if link.lower().startswith('http'):
             # only crawl within website
             if common.same_domain(domain, link):
                 # passes regex
                 if self.allowed_urls.match(link) and not self.banned_urls.match(link):
                     # not blocked by robots.txt
                     if not self.robots or self.robots.can_fetch(settings.user_agent, link):
                         # allowed to recrawl
                         if self.crawl_existing or (D.cache and link not in D.cache):
                             return True
     return False
예제 #9
0
    def save_as(self, url, filename=None, save_dir='images'):
        """Download url and save to disk

        url:
            the webpage to download
        filename:
            Output file to save to. If not set then will save to file based on URL
        """
        _bytes = self.get(url, num_redirects=0)
        if _bytes:
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)
            save_path = os.path.join(
                save_dir, filename or '%s.%s' %
                (hashlib.md5(url).hexdigest(), common.get_extension(url)))
            open(save_path, 'wb').write(_bytes)
            return save_path
예제 #10
0
 def valid(link):
     """Check if should crawl this link
     """
     # check if a media file
     if common.get_extension(link) not in common.MEDIA_EXTENSIONS:
         # check if a proper HTTP link
         if link.lower().startswith('http'):
             # only crawl within website
             if common.same_domain(domain, link):
                 # passes regex
                 if self.allowed_urls.match(link) and not self.banned_urls.match(link):
                     # not blocked by robots.txt
                     if not self.robots or self.robots.can_fetch(settings.user_agent, link):
                         # allowed to recrawl
                         if self.crawl_existing or (D.cache and link not in D.cache):
                             return True
     return False
예제 #11
0
    def demux(self):
        if self._write_chapters:
            with open(self._chapters_output_path, "w") as output_file:
                output_file.write(chapters.format_ogm_chapters(self.chapters))

        if self._make_keyframes:
            SCXviD.make_keyframes(self._path, self._keyframes_output_path)

        ffargs = {}
        if self._demux_audio:
            ffargs['audio_stream'] = self._audio_stream.id
            ffargs['audio_path'] = self._audio_output_path
            ffargs['audio_rate'] = self._audio_sample_rate
        if self._demux_subs:
            ffargs['script_stream'] = self._script_stream.id
            ffargs['script_path'] = self._script_output_path

        if self._make_timecodes:

            def set_ffmpeg_timecodes():
                ffargs['video_stream'] = self._mi.video[0].id
                ffargs['timecodes_path'] = self._timecodes_output_path

            if get_extension(self._path).lower() == '.mkv':
                try:
                    MkvToolnix.extract_timecodes(
                        self._path,
                        stream_idx=self._mi.video[0].id,
                        output_path=self._timecodes_output_path)
                except OSError as e:
                    if e.errno == 2:
                        set_ffmpeg_timecodes()
                    else:
                        raise
            else:
                set_ffmpeg_timecodes()

        if ffargs:
            FFmpeg.demux_file(self._path, **ffargs)
예제 #12
0
    def save_as(self, url, filename=None, save_dir='images', override=False):
        """Download url and save to disk if does not already exist

        url:
            the webpage to download
        filename:
            output file to save to if not set then will save to file based on URL
        override:
            whether to download if output file already exists
        """
        save_path = os.path.join(
            save_dir, filename or '%s.%s' %
            (hashlib.md5(url).hexdigest(), common.get_extension(url)))
        if not os.path.exists(save_path) or override:
            # need to download
            _bytes = self.get(url, num_redirects=0, write_cache=False)
            if _bytes:
                if not os.path.exists(save_dir):
                    os.makedirs(save_dir)
                open(save_path, 'wb').write(_bytes)
            else:
                return None
        return save_path
예제 #13
0
파일: demux.py 프로젝트: shinchiro/Sushi
    def demux(self):
        if self._write_chapters:
            with open(self._chapters_output_path, "w") as output_file:
                output_file.write(chapters.format_ogm_chapters(self.chapters))

        if self._make_keyframes:
            SCXviD.make_keyframes(self._path, self._keyframes_output_path)

        ffargs = {}
        if self._demux_audio:
            ffargs['audio_stream'] = self._audio_stream.id
            ffargs['audio_path'] = self._audio_output_path
            ffargs['audio_rate'] = self._audio_sample_rate
        if self._demux_subs:
            ffargs['script_stream'] = self._script_stream.id
            ffargs['script_path'] = self._script_output_path

        if self._make_timecodes:
            def set_ffmpeg_timecodes():
                ffargs['video_stream'] = self._mi.video[0].id
                ffargs['timecodes_path'] = self._timecodes_output_path

            if get_extension(self._path).lower() == '.mkv':
                try:
                    MkvToolnix.extract_timecodes(self._path,
                                                 stream_idx=self._mi.video[0].id,
                                                 output_path=self._timecodes_output_path)
                except OSError as e:
                    if e.errno == 2:
                        set_ffmpeg_timecodes()
                    else:
                        raise
            else:
                set_ffmpeg_timecodes()

        if ffargs:
            FFmpeg.demux_file(self._path, **ffargs)
예제 #14
0
 def for_paravis(self):
     """Check if file object can be viewed in an editor."""
     return self.valid and not self.is_reference and self.exists \
         and get_extension(self.filename) in ("med", "rmed", "mmed")
예제 #15
0
    def save_as(self, url, filename=None, save_dir='images'):
        """Download url and save to disk

        url:
            the webpage to download
        filename:
            Output file to save to. If not set then will save to file based on URL
        """
        _bytes = self.get(url, num_redirects=0)
        if _bytes:
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)
            save_path = os.path.join(save_dir, filename or '%s.%s' % (hashlib.md5(url).hexdigest(), common.get_extension(url)))
            open(save_path, 'wb').write(_bytes)
            return save_path
예제 #16
0
파일: demux.py 프로젝트: shinchiro/Sushi
 def __init__(self, path):
     super(Demuxer, self).__init__()
     self._path = path
     self._is_wav = get_extension(self._path) == '.wav'
     self._mi = None if self._is_wav else FFmpeg.get_media_info(self._path)
     self._demux_audio = self._demux_subs = self._make_timecodes = self._make_keyframes = self._write_chapters = False
예제 #17
0
 def __init__(self, path):
     super(Demuxer, self).__init__()
     self._path = path
     self._is_wav = get_extension(self._path) == '.wav'
     self._mi = None if self._is_wav else FFmpeg.get_media_info(self._path)
     self._demux_audio = self._demux_subs = self._make_timecodes = self._make_keyframes = self._write_chapters = False
예제 #18
0
파일: sushi.py 프로젝트: tp7/Sushi
def run(args):
    ignore_chapters = args.chapters_file is not None and args.chapters_file.lower() == 'none'
    write_plot = plot_enabled and args.plot_path
    if write_plot:
        plt.clf()
        plt.ylabel('Shift, seconds')
        plt.xlabel('Event index')

    # first part should do all possible validation and should NOT take significant amount of time
    check_file_exists(args.source, 'Source')
    check_file_exists(args.destination, 'Destination')
    check_file_exists(args.src_timecodes, 'Source timecodes')
    check_file_exists(args.dst_timecodes, 'Source timecodes')
    check_file_exists(args.script_file, 'Script')

    if not ignore_chapters:
        check_file_exists(args.chapters_file, 'Chapters')
    if args.src_keyframes not in ('auto', 'make'):
        check_file_exists(args.src_keyframes, 'Source keyframes')
    if args.dst_keyframes not in ('auto', 'make'):
        check_file_exists(args.dst_keyframes, 'Destination keyframes')

    if (args.src_timecodes and args.src_fps) or (args.dst_timecodes and args.dst_fps):
        raise SushiError('Both fps and timecodes file cannot be specified at the same time')

    src_demuxer = Demuxer(args.source)
    dst_demuxer = Demuxer(args.destination)

    if src_demuxer.is_wav and not args.script_file:
        raise SushiError("Script file isn't specified")

    if (args.src_keyframes and not args.dst_keyframes) or (args.dst_keyframes and not args.src_keyframes):
        raise SushiError('Either none or both of src and dst keyframes should be provided')

    create_directory_if_not_exists(args.temp_dir)

    # selecting source audio
    if src_demuxer.is_wav:
        src_audio_path = args.source
    else:
        src_audio_path = format_full_path(args.temp_dir, args.source, '.sushi.wav')
        src_demuxer.set_audio(stream_idx=args.src_audio_idx, output_path=src_audio_path, sample_rate=args.sample_rate)

    # selecting destination audio
    if dst_demuxer.is_wav:
        dst_audio_path = args.destination
    else:
        dst_audio_path = format_full_path(args.temp_dir, args.destination, '.sushi.wav')
        dst_demuxer.set_audio(stream_idx=args.dst_audio_idx, output_path=dst_audio_path, sample_rate=args.sample_rate)

    # selecting source subtitles
    if args.script_file:
        src_script_path = args.script_file
    else:
        stype = src_demuxer.get_subs_type(args.src_script_idx)
        src_script_path = format_full_path(args.temp_dir, args.source, '.sushi'+ stype)
        src_demuxer.set_script(stream_idx=args.src_script_idx, output_path=src_script_path)

    script_extension = get_extension(src_script_path)
    if script_extension not in ('.ass', '.srt'):
        raise SushiError('Unknown script type')

    # selection destination subtitles
    if args.output_script:
        dst_script_path = args.output_script
        dst_script_extension = get_extension(args.output_script)
        if dst_script_extension != script_extension:
            raise SushiError("Source and destination script file types don't match ({0} vs {1})"
                             .format(script_extension, dst_script_extension))
    else:
        dst_script_path = format_full_path(args.temp_dir, args.destination, '.sushi' + script_extension)

    # selecting chapters
    if args.grouping and not ignore_chapters:
        if args.chapters_file:
            if get_extension(args.chapters_file) == '.xml':
                chapter_times = chapters.get_xml_start_times(args.chapters_file)
            else:
                chapter_times = chapters.get_ogm_start_times(args.chapters_file)
        elif not src_demuxer.is_wav:
            chapter_times = src_demuxer.chapters
            output_path = format_full_path(args.temp_dir, src_demuxer.path, ".sushi.chapters.txt")
            src_demuxer.set_chapters(output_path)
        else:
            chapter_times = []
    else:
        chapter_times = []

    # selecting keyframes and timecodes
    if args.src_keyframes:
        def select_keyframes(file_arg, demuxer):
            auto_file = format_full_path(args.temp_dir, demuxer.path, '.sushi.keyframes.txt')
            if file_arg in ('auto', 'make'):
                if file_arg == 'make' or not os.path.exists(auto_file):
                    if not demuxer.has_video:
                        raise SushiError("Cannot make keyframes for {0} because it doesn't have any video!"
                                         .format(demuxer.path))
                    demuxer.set_keyframes(output_path=auto_file)
                return auto_file
            else:
                return file_arg

        def select_timecodes(external_file, fps_arg, demuxer):
            if external_file:
                return external_file
            elif fps_arg:
                return None
            elif demuxer.has_video:
                path = format_full_path(args.temp_dir, demuxer.path, '.sushi.timecodes.txt')
                demuxer.set_timecodes(output_path=path)
                return path
            else:
                raise SushiError('Fps, timecodes or video files must be provided if keyframes are used')

        src_keyframes_file = select_keyframes(args.src_keyframes, src_demuxer)
        dst_keyframes_file = select_keyframes(args.dst_keyframes, dst_demuxer)
        src_timecodes_file = select_timecodes(args.src_timecodes, args.src_fps, src_demuxer)
        dst_timecodes_file = select_timecodes(args.dst_timecodes, args.dst_fps, dst_demuxer)

    # after this point nothing should fail so it's safe to start slow operations
    # like running the actual demuxing
    src_demuxer.demux()
    dst_demuxer.demux()

    try:
        if args.src_keyframes:
            src_timecodes = Timecodes.cfr(args.src_fps) if args.src_fps else Timecodes.from_file(src_timecodes_file)
            src_keytimes = [src_timecodes.get_frame_time(f) for f in keyframes.parse_keyframes(src_keyframes_file)]

            dst_timecodes = Timecodes.cfr(args.dst_fps) if args.dst_fps else Timecodes.from_file(dst_timecodes_file)
            dst_keytimes = [dst_timecodes.get_frame_time(f) for f in keyframes.parse_keyframes(dst_keyframes_file)]

        script = AssScript.from_file(src_script_path) if script_extension == '.ass' else SrtScript.from_file(src_script_path)
        script.sort_by_time()

        src_stream = WavStream(src_audio_path, sample_rate=args.sample_rate, sample_type=args.sample_type)
        dst_stream = WavStream(dst_audio_path, sample_rate=args.sample_rate, sample_type=args.sample_type)

        search_groups = prepare_search_groups(script.events,
                                              source_duration=src_stream.duration_seconds,
                                              chapter_times=chapter_times,
                                              max_ts_duration=args.max_ts_duration,
                                              max_ts_distance=args.max_ts_distance)

        calculate_shifts(src_stream, dst_stream, search_groups,
                         normal_window=args.window,
                         max_window=args.max_window,
                         rewind_thresh=args.rewind_thresh if args.grouping else 0)

        events = script.events

        if write_plot:
            plt.plot([x.shift for x in events], label='From audio')

        if args.grouping:
            if not ignore_chapters and chapter_times:
                groups = groups_from_chapters(events, chapter_times)
                for g in groups:
                    fix_near_borders(g)
                    smooth_events([x for x in g if not x.linked], args.smooth_radius)
                groups = split_broken_groups(groups)
            else:
                fix_near_borders(events)
                smooth_events([x for x in events if not x.linked], args.smooth_radius)
                groups = detect_groups(events)

            if write_plot:
                plt.plot([x.shift for x in events], label='Borders fixed')

            for g in groups:
                start_shift = g[0].shift
                end_shift = g[-1].shift
                avg_shift = average_shifts(g)
                logging.info(u'Group (start: {0}, end: {1}, lines: {2}), '
                             u'shifts (start: {3}, end: {4}, average: {5})'
                             .format(format_time(g[0].start), format_time(g[-1].end), len(g), start_shift, end_shift,
                                     avg_shift))

            if args.src_keyframes:
                for e in (x for x in events if x.linked):
                    e.resolve_link()
                for g in groups:
                    snap_groups_to_keyframes(g, chapter_times, args.max_ts_duration, args.max_ts_distance, src_keytimes,
                                             dst_keytimes, src_timecodes, dst_timecodes, args.max_kf_distance, args.kf_mode)
        else:
            fix_near_borders(events)
            if write_plot:
                plt.plot([x.shift for x in events], label='Borders fixed')

            if args.src_keyframes:
                for e in (x for x in events if x.linked):
                    e.resolve_link()
                snap_groups_to_keyframes(events, chapter_times, args.max_ts_duration, args.max_ts_distance, src_keytimes,
                                         dst_keytimes, src_timecodes, dst_timecodes, args.max_kf_distance, args.kf_mode)

        for event in events:
            event.apply_shift()

        script.save_to_file(dst_script_path)

        if write_plot:
            plt.plot([x.shift + (x._start_shift + x._end_shift)/2.0 for x in events], label='After correction')
            plt.legend(fontsize=5, frameon=False, fancybox=False)
            plt.savefig(args.plot_path, dpi=300)

    finally:
        if args.cleanup:
            src_demuxer.cleanup()
            dst_demuxer.cleanup()
예제 #19
0
def run(args):
    ignore_chapters = args.chapters_file is not None and args.chapters_file.lower(
    ) == 'none'
    write_plot = plot_enabled and args.plot_path
    if write_plot:
        plt.clf()
        plt.ylabel('Shift, seconds')
        plt.xlabel('Event index')

    # first part should do all possible validation and should NOT take significant amount of time
    check_file_exists(args.source, 'Source')
    check_file_exists(args.destination, 'Destination')
    check_file_exists(args.src_timecodes, 'Source timecodes')
    check_file_exists(args.dst_timecodes, 'Source timecodes')
    check_file_exists(args.script_file, 'Script')

    if not ignore_chapters:
        check_file_exists(args.chapters_file, 'Chapters')
    if args.src_keyframes not in ('auto', 'make'):
        check_file_exists(args.src_keyframes, 'Source keyframes')
    if args.dst_keyframes not in ('auto', 'make'):
        check_file_exists(args.dst_keyframes, 'Destination keyframes')

    if (args.src_timecodes and args.src_fps) or (args.dst_timecodes
                                                 and args.dst_fps):
        raise SushiError(
            'Both fps and timecodes file cannot be specified at the same time')

    src_demuxer = Demuxer(args.source)
    dst_demuxer = Demuxer(args.destination)

    if src_demuxer.is_wav and not args.script_file:
        raise SushiError("Script file isn't specified")

    if (args.src_keyframes
            and not args.dst_keyframes) or (args.dst_keyframes
                                            and not args.src_keyframes):
        raise SushiError(
            'Either none or both of src and dst keyframes should be provided')

    create_directory_if_not_exists(args.temp_dir)

    # selecting source audio
    if src_demuxer.is_wav:
        src_audio_path = args.source
    else:
        src_audio_path = format_full_path(args.temp_dir, args.source,
                                          '.sushi.wav')
        src_demuxer.set_audio(stream_idx=args.src_audio_idx,
                              output_path=src_audio_path,
                              sample_rate=args.sample_rate)

    # selecting destination audio
    if dst_demuxer.is_wav:
        dst_audio_path = args.destination
    else:
        dst_audio_path = format_full_path(args.temp_dir, args.destination,
                                          '.sushi.wav')
        dst_demuxer.set_audio(stream_idx=args.dst_audio_idx,
                              output_path=dst_audio_path,
                              sample_rate=args.sample_rate)

    # selecting source subtitles
    if args.script_file:
        src_script_path = args.script_file
    else:
        stype = src_demuxer.get_subs_type(args.src_script_idx)
        src_script_path = format_full_path(args.temp_dir, args.source,
                                           '.sushi' + stype)
        src_demuxer.set_script(stream_idx=args.src_script_idx,
                               output_path=src_script_path)

    script_extension = get_extension(src_script_path)
    if script_extension not in ('.ass', '.srt'):
        raise SushiError('Unknown script type')

    # selection destination subtitles
    if args.output_script:
        dst_script_path = args.output_script
        dst_script_extension = get_extension(args.output_script)
        if dst_script_extension != script_extension:
            raise SushiError(
                "Source and destination script file types don't match ({0} vs {1})"
                .format(script_extension, dst_script_extension))
    else:
        dst_script_path = format_full_path(args.temp_dir, args.destination,
                                           '.sushi' + script_extension)

    # selecting chapters
    if args.grouping and not ignore_chapters:
        if args.chapters_file:
            if get_extension(args.chapters_file) == '.xml':
                chapter_times = chapters.get_xml_start_times(
                    args.chapters_file)
            else:
                chapter_times = chapters.get_ogm_start_times(
                    args.chapters_file)
        elif not src_demuxer.is_wav:
            chapter_times = src_demuxer.chapters
            output_path = format_full_path(args.temp_dir, src_demuxer.path,
                                           ".sushi.chapters.txt")
            src_demuxer.set_chapters(output_path)
        else:
            chapter_times = []
    else:
        chapter_times = []

    # selecting keyframes and timecodes
    if args.src_keyframes:

        def select_keyframes(file_arg, demuxer):
            auto_file = format_full_path(args.temp_dir, demuxer.path,
                                         '.sushi.keyframes.txt')
            if file_arg in ('auto', 'make'):
                if file_arg == 'make' or not os.path.exists(auto_file):
                    if not demuxer.has_video:
                        raise SushiError(
                            "Cannot make keyframes for {0} because it doesn't have any video!"
                            .format(demuxer.path))
                    demuxer.set_keyframes(output_path=auto_file)
                return auto_file
            else:
                return file_arg

        def select_timecodes(external_file, fps_arg, demuxer):
            if external_file:
                return external_file
            elif fps_arg:
                return None
            elif demuxer.has_video:
                path = format_full_path(args.temp_dir, demuxer.path,
                                        '.sushi.timecodes.txt')
                demuxer.set_timecodes(output_path=path)
                return path
            else:
                raise SushiError(
                    'Fps, timecodes or video files must be provided if keyframes are used'
                )

        src_keyframes_file = select_keyframes(args.src_keyframes, src_demuxer)
        dst_keyframes_file = select_keyframes(args.dst_keyframes, dst_demuxer)
        src_timecodes_file = select_timecodes(args.src_timecodes, args.src_fps,
                                              src_demuxer)
        dst_timecodes_file = select_timecodes(args.dst_timecodes, args.dst_fps,
                                              dst_demuxer)

    # after this point nothing should fail so it's safe to start slow operations
    # like running the actual demuxing
    src_demuxer.demux()
    dst_demuxer.demux()

    try:
        if args.src_keyframes:
            src_timecodes = Timecodes.cfr(
                args.src_fps) if args.src_fps else Timecodes.from_file(
                    src_timecodes_file)
            src_keytimes = [
                src_timecodes.get_frame_time(f)
                for f in parse_keyframes(src_keyframes_file)
            ]

            dst_timecodes = Timecodes.cfr(
                args.dst_fps) if args.dst_fps else Timecodes.from_file(
                    dst_timecodes_file)
            dst_keytimes = [
                dst_timecodes.get_frame_time(f)
                for f in parse_keyframes(dst_keyframes_file)
            ]

        script = AssScript.from_file(
            src_script_path
        ) if script_extension == '.ass' else SrtScript.from_file(
            src_script_path)
        script.sort_by_time()

        src_stream = WavStream(src_audio_path,
                               sample_rate=args.sample_rate,
                               sample_type=args.sample_type)
        dst_stream = WavStream(dst_audio_path,
                               sample_rate=args.sample_rate,
                               sample_type=args.sample_type)

        calculate_shifts(
            src_stream,
            dst_stream,
            script.events,
            chapter_times=chapter_times,
            window=args.window,
            max_window=args.max_window,
            rewind_thresh=args.rewind_thresh if args.grouping else 0,
            max_ts_duration=args.max_ts_duration,
            max_ts_distance=args.max_ts_distance)

        events = script.events

        if write_plot:
            plt.plot([x.shift for x in events], label='From audio')

        if args.grouping:
            if not ignore_chapters and chapter_times:
                groups = groups_from_chapters(events, chapter_times)
                for g in groups:
                    fix_near_borders(g)
                    smooth_events([x for x in g if not x.linked],
                                  args.smooth_radius)
                groups = split_broken_groups(groups, args.min_group_size)
            else:
                fix_near_borders(events)
                smooth_events([x for x in events if not x.linked],
                              args.smooth_radius)
                groups = detect_groups(events, args.min_group_size)

            if write_plot:
                plt.plot([x.shift for x in events], label='Borders fixed')

            for g in groups:
                start_shift = g[0].shift
                end_shift = g[-1].shift
                avg_shift = average_shifts(g)
                logging.info(
                    u'Group (start: {0}, end: {1}, lines: {2}), '
                    u'shifts (start: {3}, end: {4}, average: {5})'.format(
                        format_time(g[0].start), format_time(g[-1].end),
                        len(g), start_shift, end_shift, avg_shift))

            if args.src_keyframes:
                for e in (x for x in events if x.linked):
                    e.resolve_link()
                for g in groups:
                    snap_groups_to_keyframes(
                        g, chapter_times, args.max_ts_duration,
                        args.max_ts_distance, src_keytimes, dst_keytimes,
                        src_timecodes, dst_timecodes, args.max_kf_distance,
                        args.kf_mode)

            if args.write_avs:
                write_shift_avs(dst_script_path + '.avs', groups,
                                src_audio_path, dst_audio_path)
        else:
            fix_near_borders(events)
            if write_plot:
                plt.plot([x.shift for x in events], label='Borders fixed')

            if args.src_keyframes:
                for e in (x for x in events if x.linked):
                    e.resolve_link()
                snap_groups_to_keyframes(events, chapter_times,
                                         args.max_ts_duration,
                                         args.max_ts_distance, src_keytimes,
                                         dst_keytimes, src_timecodes,
                                         dst_timecodes, args.max_kf_distance,
                                         args.kf_mode)

        for event in events:
            event.apply_shift()

        script.save_to_file(dst_script_path)

        if write_plot:
            plt.plot([
                x.shift + (x._start_shift + x._end_shift) / 2.0 for x in events
            ],
                     label='After correction')
            plt.legend(fontsize=5, frameon=False, fancybox=False)
            plt.savefig(args.plot_path, dpi=300)

    finally:
        if args.cleanup:
            src_demuxer.cleanup()
            dst_demuxer.cleanup()
예제 #20
0
def scan_source(build_dir, root_dir, build):

    count = 0

    # Common known non-free blobs (always lower case):
    usual_suspects = {
        exp: re.compile(r'.*' + exp, re.IGNORECASE) for exp in [
            r'flurryagent',
            r'paypal.*mpl',
            r'google.*analytics',
            r'admob.*sdk.*android',
            r'google.*ad.*view',
            r'google.*admob',
            r'google.*play.*services',
            r'crittercism',
            r'heyzap',
            r'jpct.*ae',
            r'youtube.*android.*player.*api',
            r'bugsense',
            r'crashlytics',
            r'ouya.*sdk',
            r'libspen23',
        ]
    }

    def suspects_found(s):
        for n, r in usual_suspects.iteritems():
            if r.match(s):
                yield n

    gradle_mavenrepo = re.compile(r'maven *{ *(url)? *[\'"]?([^ \'"]*)[\'"]?')

    allowed_repos = [re.compile(r'^https?://' + re.escape(repo) + r'/*') for repo in [
        'repo1.maven.org/maven2',  # mavenCentral()
        'jcenter.bintray.com',     # jcenter()
        'jitpack.io',
        'repo.maven.apache.org/maven2',
        'oss.sonatype.org/content/repositories/snapshots',
        'oss.sonatype.org/content/repositories/releases',
        'oss.sonatype.org/content/groups/public',
        'clojars.org/repo',  # Clojure free software libs
        's3.amazonaws.com/repo.commonsware.com',  # CommonsWare
        'plugins.gradle.org/m2',  # Gradle plugin repo
        ]
    ]

    scanignore = common.getpaths_map(build_dir, build.scanignore)
    scandelete = common.getpaths_map(build_dir, build.scandelete)

    scanignore_worked = set()
    scandelete_worked = set()

    def toignore(fd):
        for k, paths in scanignore.iteritems():
            for p in paths:
                if fd.startswith(p):
                    scanignore_worked.add(k)
                    return True
        return False

    def todelete(fd):
        for k, paths in scandelete.iteritems():
            for p in paths:
                if fd.startswith(p):
                    scandelete_worked.add(k)
                    return True
        return False

    def ignoreproblem(what, fd, fp):
        logging.info('Ignoring %s at %s' % (what, fd))
        return 0

    def removeproblem(what, fd, fp):
        logging.info('Removing %s at %s' % (what, fd))
        os.remove(fp)
        return 0

    def warnproblem(what, fd):
        if toignore(fd):
            return
        logging.warn('Found %s at %s' % (what, fd))

    def handleproblem(what, fd, fp):
        if toignore(fd):
            return ignoreproblem(what, fd, fp)
        if todelete(fd):
            return removeproblem(what, fd, fp)
        logging.error('Found %s at %s' % (what, fd))
        return 1

    def is_executable(path):
        return os.path.exists(path) and os.access(path, os.X_OK)

    textchars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7f})

    def is_binary(path):
        d = None
        with open(path, 'rb') as f:
            d = f.read(1024)
        return bool(d.translate(None, textchars))

    # False positives patterns for files that are binary and executable.
    safe_paths = [re.compile(r) for r in [
        r".*/drawable[^/]*/.*\.png$",  # png drawables
        r".*/mipmap[^/]*/.*\.png$",    # png mipmaps
        ]
    ]

    def safe_path(path):
        for sp in safe_paths:
            if sp.match(path):
                return True
        return False

    gradle_compile_commands = get_gradle_compile_commands(build)

    def is_used_by_gradle(line):
        return any(command.match(line) for command in gradle_compile_commands)

    # Iterate through all files in the source code
    for r, d, f in os.walk(build_dir, topdown=True):

        # It's topdown, so checking the basename is enough
        for ignoredir in ('.hg', '.git', '.svn', '.bzr'):
            if ignoredir in d:
                d.remove(ignoredir)

        for curfile in f:

            if curfile in ['.DS_Store']:
                continue

            # Path (relative) to the file
            fp = os.path.join(r, curfile)

            if os.path.islink(fp):
                continue

            fd = fp[len(build_dir) + 1:]
            _, ext = common.get_extension(fd)

            if ext == 'so':
                count += handleproblem('shared library', fd, fp)
            elif ext == 'a':
                count += handleproblem('static library', fd, fp)
            elif ext == 'class':
                count += handleproblem('Java compiled class', fd, fp)
            elif ext == 'apk':
                removeproblem('APK file', fd, fp)

            elif ext == 'jar':
                for name in suspects_found(curfile):
                    count += handleproblem('usual supect \'%s\'' % name, fd, fp)
                warnproblem('JAR file', fd)

            elif ext == 'java':
                if not os.path.isfile(fp):
                    continue
                for line in file(fp):
                    if 'DexClassLoader' in line:
                        count += handleproblem('DexClassLoader', fd, fp)
                        break

            elif ext == 'gradle':
                if not os.path.isfile(fp):
                    continue
                with open(fp, 'r') as f:
                    lines = f.readlines()
                for i, line in enumerate(lines):
                    if is_used_by_gradle(line):
                        for name in suspects_found(line):
                            count += handleproblem('usual supect \'%s\' at line %d' % (name, i + 1), fd, fp)
                noncomment_lines = [l for l in lines if not common.gradle_comment.match(l)]
                joined = re.sub(r'[\n\r\s]+', ' ', ' '.join(noncomment_lines))
                for m in gradle_mavenrepo.finditer(joined):
                    url = m.group(2)
                    if not any(r.match(url) for r in allowed_repos):
                        count += handleproblem('unknown maven repo \'%s\'' % url, fd, fp)

            elif ext in ['', 'bin', 'out', 'exe']:
                if is_binary(fp):
                    count += handleproblem('binary', fd, fp)

            elif is_executable(fp):
                if is_binary(fp) and not safe_path(fd):
                    warnproblem('possible binary', fd)

    for p in scanignore:
        if p not in scanignore_worked:
            logging.error('Unused scanignore path: %s' % p)
            count += 1

    for p in scandelete:
        if p not in scandelete_worked:
            logging.error('Unused scandelete path: %s' % p)
            count += 1

    return count