Example #1
0
    def parse(self):
        """Parsing nwt videos"""
        self.work_dir = expandpath(self.work_dir)
        self.input = expandpath(self.input)
        self.books = [int(bk) for bk in self.book.split(',')]
        self.chapters = [int(chp) for chp in self.chapter.split(',')]

        self._get_db()
        print('This may take several minutes', flush=True)
        verse_videos = self.get_cutup_verses()
        match_videos = self.get_match_videos()
        self.num_bookname = parse_num_book(
            get_nwt_video_info(match_videos[0], 'lang'))
        add_numeration(self.work_dir, self.num_bookname)
        print(f'Getting chapter marks from {self.input}', end='\t-> ')
        result = []
        for video in match_videos:
            booknum = get_nwt_video_info(video, 'booknum')
            json_markers = probe_markers(video)
            markers = parse_markers_nwt(json_markers,
                                        video,
                                        bookname=self.num_bookname[booknum])
            for mark in markers:
                # print(mark['title'], end='\t')
                if self.db.get(woext(video)) == os.stat(video).st_size and \
                        verse_videos.get(mark['title']):
                    # verse it exist and is the latest version. do nothing
                    pass
                    # print('already exists')
                else:
                    result.append(mark)
            self.db[woext(video)] = os.stat(video).st_size
        self.write_json(self.db)
        print(f'{len(result)} found\n')
        return result
Example #2
0
    def raw_parse(self):
        """Parsing any video"""
        self._get_db()
        result = []
        match_videos = self.get_match_videos()
        verse_videos = self.get_cutup_verses()
        for video in match_videos:
            json_markers = probe_markers(video)
            markers = parse_markers_raw(json_markers, video)

            for mark in markers:
                # print(mark['title'], end='\t')
                print(verse_videos.get(mark['title']), mark['title'])
                if self.db.get(woext(video)) == os.stat(video).st_size and \
                        verse_videos.get(mark['title']):
                    pass
                else:
                    result.append(mark)
            self.db[woext(video)] = os.stat(video).st_size
        self.write_json(self.db)
        print('raw', verse_videos)
        return result
Example #3
0
    def get_cutup_verses(self):
        print(f'Getting verses videos from {self.work_dir}',
              end='\t-> ',
              flush=True)
        path = pj(self.work_dir, 'db', 'ready.json')
        try:
            with open(path, 'r', encoding='utf-8') as jsonfile:
                self.ready = json.load(jsonfile)

        except (FileNotFoundError, UnsupportedOperation, JSONDecodeError):
            self.ready = {}

        versiculos = {}
        for dirpath, dirnames, filenames in os.walk(self.work_dir):
            if dirpath[len(self.work_dir):].count(
                    os.sep) < 2:  # nivel prinicpal y un nivel de subdirectorio
                for filename in sorted(filenames):
                    if (filename.endswith('.mp4') or filename.endswith('.m4v')
                        ) and not filename.startswith('nwt'):
                        if self.ready.get(woext(filename)) == os.stat(
                                pj(dirpath, filename)).st_size:
                            versiculos.update(
                                {woext(filename): pj(dirpath, filename)})
                            # print(f'...fast...{filename}')
                        elif 'vbastianpc' in ffprobe_signature(
                                pj(dirpath, filename)):
                            versiculos.update(
                                {woext(filename): pj(dirpath, filename)})
                            self.ready.update({
                                woext(filename):
                                os.stat(pj(dirpath, filename)).st_size
                            })
                        # print(f'...slow...{filename}')

        with open(path, 'w', encoding='utf-8') as jsonfile:
            json.dump(self.ready, jsonfile, ensure_ascii=False, indent=4)
        print(f'{len(versiculos)} found')
        return versiculos
Example #4
0
    def download_media(self, media, directory, check_only=False):
        """Download media file and check it.

        Download file, check MD5 sum and size, delete file if it missmatches.

        :param media: a Media instance
        :param directory: dir to save the files to
        :param check_only: bool, True means no downloading
        :return: filename, or None if unsuccessful
        """
        if not os.path.exists(directory) and not self.download:
            return None

        os.makedirs(directory, exist_ok=True)

        base = urllib.parse.urlparse(media.url).path
        if self.title:
            file_extension = os.path.splitext(os.path.basename(base))[-1]
            title = media.name.replace('"', "'").replace(':', '.')
            base = ''.join(c if c.isalnum() or c in ".-_()¡!¿';, " else '' \
                           for c in title \
                           ) + file_extension
        else:
            base = os.path.basename(base)

        # Delete files if same basename in main dir
        if self.type == 'video':
            for path, dirnames, filenames in os.walk(directory):
                for filename in filenames:
                    if filename == base:
                        pass
                    elif woext(filename) == woext(base):
                        os.remove(os.path.join(path, filename))
                        print('deleted:', os.path.join(path, filename))
                break
        file = os.path.join(directory, base)
        # Only try resuming and downloading once
        resumed = False
        downloaded = False
        progressbar = False if self.subtitles else True
        while True:

            if os.path.exists(file):  # os.path.exists(file):

                # Set timestamp to date of publishing
                # NOTE: Do this before checking _checked_files since
                # this is not done for newly renamed .part files!
                if media.date:
                    os.utime(file, (media.date, media.date))

                if os.path.getsize(file) == media.size or not media.size:
                    # File size is OK or unknown - Validate checksum
                    if self.checksums and media.md5 and _md5(
                            file) != media.md5:
                        # Checksum is bad - Remove
                        if self.quiet < 2:
                            msg('checksum mismatch, deleting: {}'.format(base))
                        os.remove(file)
                    else:
                        # Checksum is correct
                        return file
                else:
                    # File size is bad - Delete
                    msg('size mismatch, deleting: {}'.format(base))
                    os.remove(file)

            elif check_only:
                # The rest of this method is only applicable in download mode

                return None

            elif os.path.exists(file + '.part'):

                fsize = os.path.getsize(file + '.part')

                if fsize == media.size or not media.size:
                    # File size is OK - Validate checksum
                    if self.checksums and media.md5 and _md5(
                            file + '.part') != media.md5:
                        # Checksum is bad - Remove
                        if self.quiet < 2:
                            msg('checksum mismatch, deleting: {}'.format(
                                base + '.part'))
                        os.remove(file + '.part')
                    else:
                        # Checksum is correct or unknown - Move and approve
                        os.rename(file + '.part', file)
                        return file
                elif fsize < media.size and not resumed:
                    # File is smaller - Resume download once
                    resumed = True
                    if self.quiet < 2:
                        msg('resuming: {} ({})'.format(base + '.part',
                                                       media.name))
                    _curl(
                        media.url,
                        file + '.part',
                        resume=True,
                        rate_limit=self.rate_limit,
                        curl_path=self.curl_path,
                        progress=progressbar,
                    )
                else:
                    # File size is bad - Remove
                    msg('size mismatch, deleting: {}'.format(base + '.part'))
                    os.remove(file + '.part')

            else:
                # Download whole file once
                if not downloaded:
                    msg('downloading: {} ({})'.format(base, media.name))
                    _curl(
                        media.url,
                        file + '.part',
                        rate_limit=self.rate_limit,
                        curl_path=self.curl_path,
                        progress=progressbar,
                    )
                    downloaded = True
                else:
                    # If we get here, all tests have failed.
                    # Resume and regular download too.
                    # There is nothing left to do.
                    msg('failed to download: {} ({})'.format(base, media.name))
                    return None