Esempio n. 1
0
    def _FetchData(self, url, referral_url=None, temp_path=None):

        self._EnsureLoggedIn()

        network_job = self._network_job_factory('GET',
                                                url,
                                                referral_url=referral_url,
                                                temp_path=temp_path)

        HG.client_controller.network_engine.AddJob(network_job)

        try:

            network_job.WaitUntilDone()

        except Exception as e:

            HydrusData.Print('The url ' + url + ' gave the following problem:')
            HydrusData.PrintException(e)

            raise

        if temp_path is None:

            return network_job.GetContent()
Esempio n. 2
0
def GetMime(path):

    size = os.path.getsize(path)

    if size == 0:

        raise HydrusExceptions.SizeException('File is of zero length!')

    with open(path, 'rb') as f:

        f.seek(0)

        bit_to_check = f.read(256)

    for (offset, header, mime) in header_and_mime:

        offset_bit_to_check = bit_to_check[offset:]

        if offset_bit_to_check.startswith(header):

            if mime == HC.UNDETERMINED_WM:

                if HydrusVideoHandling.HasVideoStream(path):

                    return HC.VIDEO_WMV

                # we'll catch and verify wma later

            elif mime == HC.UNDETERMINED_PNG:

                if HydrusVideoHandling.HasVideoStream(path):

                    return HC.IMAGE_APNG

                else:

                    return HC.IMAGE_PNG

            else:

                return mime

    try:

        mime = HydrusVideoHandling.GetMime(path)

        if mime != HC.APPLICATION_UNKNOWN:

            return mime

    except HydrusExceptions.MimeException:

        HydrusData.Print('FFMPEG couldn\'t figure out the mime for: ' + path)

    except Exception as e:

        HydrusData.Print('FFMPEG couldn\'t figure out the mime for: ' + path)
        HydrusData.PrintException(e, do_wait=False)

    return HC.APPLICATION_UNKNOWN
    def _TryEndModal(self, value):

        try:

            self.EndModal(value)

        except Exception as e:

            HydrusData.ShowText(
                'This dialog seems to have been unable to close for some reason. I am printing the stack to the log. The dialog may have already closed, or may attempt to close now. Please inform hydrus dev of this situation. I recommend you restart the client if you can. If the UI is locked, you will have to kill it via task manager.'
            )

            HydrusData.PrintException(e)

            import traceback

            HydrusData.DebugPrint(''.join(traceback.format_stack()))

            try:

                self.Close()

            except:

                HydrusData.ShowText('The dialog would not close on command.')

            try:

                self.Destroy()

            except:

                HydrusData.ShowText('The dialog would not destroy on command.')
Esempio n. 4
0
 def _ProcessJob( self, job ):
     
     job_type = job.GetType()
     
     ( action, args, kwargs ) = job.GetCallableTuple()
     
     in_transaction = False
     
     try:
         
         if job_type in ( 'read_write', 'write' ):
             
             self._c.execute( 'BEGIN IMMEDIATE;' )
             
             in_transaction = True
             
         
         if job_type in ( 'read', 'read_write' ): result = self._Read( action, *args, **kwargs )
         elif job_type in ( 'write' ): result = self._Write( action, *args, **kwargs )
         
         if in_transaction:
             
             self._c.execute( 'COMMIT;' )
             
             in_transaction = False
             
         
         for ( topic, args, kwargs ) in self._pubsubs:
             
             self._controller.pub( topic, *args, **kwargs )
             
         
         if job.IsSynchronous():
             
             job.PutResult( result )
             
         
     except Exception as e:
         
         if in_transaction:
             
             try:
                 
                 self._c.execute( 'ROLLBACK;' )
                 
             except Exception as rollback_e:
                 
                 HydrusData.Print( 'When the transaction failed, attempting to rollback the database failed.' )
                 
                 HydrusData.PrintException( rollback_e )
                 
             
         
         self._ManageDBError( job, e )
Esempio n. 5
0
    def _CheckWatchableURL(self):
        def status_hook(text):

            with self._lock:

                self._watcher_status = text

        def title_hook(text):

            with self._lock:

                self._subject = text

        gallery_seed = ClientImportGallerySeeds.GallerySeed(
            self._url, can_generate_more_pages=False)

        self._gallery_seed_log.AddGallerySeeds((gallery_seed, ))

        try:

            (num_urls_added, added_all_possible_urls,
             result_404) = gallery_seed.WorkOnURL(
                 self._gallery_seed_log, self._file_seed_cache, status_hook,
                 title_hook,
                 ClientImporting.GenerateWatcherNetworkJobFactory(
                     self._watcher_key),
                 self._CheckerNetworkJobPresentationContextFactory,
                 self._file_import_options)

            if num_urls_added > 0:

                ClientImporting.WakeRepeatingJob(self._files_repeating_job)

            if result_404:

                with self._lock:

                    self._checking_status = ClientImporting.CHECKER_STATUS_404

        except HydrusExceptions.NetworkException as e:

            self._DelayWork(4 * 3600, HydrusData.ToUnicode(e))

            HydrusData.PrintException(e)

        watcher_status = gallery_seed.note
        error_occurred = gallery_seed.status == CC.STATUS_ERROR
        watcher_status_should_stick = gallery_seed.status != CC.STATUS_SUCCESSFUL_AND_NEW

        self._FinishCheck(watcher_status, error_occurred,
                          watcher_status_should_stick)
Esempio n. 6
0
    def Parse(self, job_key, data, referral_url, desired_content):

        search_urls = self.ParseURLs(job_key, data, referral_url)

        content = []

        for search_url in search_urls:

            try:

                job_key.SetVariable('script_status', 'fetching ' + search_url)

                headers = {'Referer': referral_url}

                response = ClientNetworking.RequestsGet(search_url,
                                                        headers=headers)

            except HydrusExceptions.NotFoundException:

                job_key.SetVariable('script_status', '404 - nothing found')

                time.sleep(2)

                continue

            except HydrusExceptions.NetworkException as e:

                job_key.SetVariable('script_status',
                                    'Network error! Details written to log.')

                HydrusData.Print('Problem fetching ' + search_url + ':')
                HydrusData.PrintException(e)

                time.sleep(2)

                continue

            linked_data = response.text

            children_content = GetChildrenContent(job_key, self._children,
                                                  linked_data, search_url,
                                                  desired_content)

            content.extend(children_content)

            if job_key.IsCancelled():

                raise HydrusExceptions.CancelledException()

        return content
Esempio n. 7
0
    def _ProcessJob(self, job):

        job_type = job.GetType()

        (action, args, kwargs) = job.GetCallableTuple()

        try:

            if job_type in ('read_write', 'write'):

                self._BeginImmediate()

            if job_type in ('read', 'read_write'):
                result = self._Read(action, *args, **kwargs)
            elif job_type in ('write'):
                result = self._Write(action, *args, **kwargs)

            if self._in_transaction:

                self._Commit()

            for (topic, args, kwargs) in self._pubsubs:

                self._controller.pub(topic, *args, **kwargs)

            if job.IsSynchronous():

                job.PutResult(result)

        except Exception as e:

            if self._in_transaction:

                try:

                    self._Rollback()

                except Exception as rollback_e:

                    HydrusData.Print(
                        'When the transaction failed, attempting to rollback the database failed.'
                    )

                    HydrusData.PrintException(rollback_e)

            self._ManageDBError(job, e)
Esempio n. 8
0
def CanVacuum(db_path, stop_time=None):

    try:

        db = sqlite3.connect(db_path,
                             isolation_level=None,
                             detect_types=sqlite3.PARSE_DECLTYPES)

        c = db.cursor()

        (page_size, ) = c.execute('PRAGMA page_size;').fetchone()
        (page_count, ) = c.execute('PRAGMA page_count;').fetchone()
        (freelist_count, ) = c.execute('PRAGMA freelist_count;').fetchone()

        db_size = (page_count - freelist_count) * page_size

        if stop_time is not None:

            approx_vacuum_speed_mb_per_s = 1048576 * 1

            approx_vacuum_duration = db_size / approx_vacuum_speed_mb_per_s

            time_i_will_have_to_start = stop_time - approx_vacuum_duration

            if HydrusData.TimeHasPassed(time_i_will_have_to_start):

                return False

        (db_dir, db_filename) = os.path.split(db_path)

        (has_space,
         reason) = HydrusPaths.HasSpaceForDBTransaction(db_dir, db_size)

        return has_space

    except Exception as e:

        HydrusData.Print('Could not determine whether to vacuum or not:')

        HydrusData.PrintException(e)

        return False
Esempio n. 9
0
    def _CheckWatchableURL(self):
        def file_seeds_callable(file_seeds):

            return ClientImporting.UpdateFileSeedCacheWithFileSeeds(
                self._file_seed_cache, file_seeds)

        def status_hook(text):

            with self._lock:

                self._watcher_status = text

        def title_hook(text):

            with self._lock:

                self._subject = text

        gallery_seed = ClientImportGallerySeeds.GallerySeed(
            self._url, can_generate_more_pages=False)

        self._gallery_seed_log.AddGallerySeeds((gallery_seed, ))

        with self._lock:

            self._watcher_status = 'checking'

        try:

            (num_urls_added, num_urls_already_in_file_seed_cache,
             num_urls_total, result_404, added_new_gallery_pages,
             stop_reason) = gallery_seed.WorkOnURL(
                 'watcher', self._gallery_seed_log, file_seeds_callable,
                 status_hook, title_hook, self._NetworkJobFactory,
                 self._CheckerNetworkJobPresentationContextFactory,
                 self._file_import_options)

            if num_urls_added > 0:

                ClientImporting.WakeRepeatingJob(self._files_repeating_job)

            if result_404:

                with self._lock:

                    self._checking_paused = True

                    self._checking_status = ClientImporting.CHECKER_STATUS_404

            if gallery_seed.status == CC.STATUS_ERROR:

                # the [DEAD] stuff can override watcher status, so let's give a brief time for this to display the error

                with self._lock:

                    self._checking_paused = True

                    self._watcher_status = gallery_seed.note

                time.sleep(5)

        except HydrusExceptions.NetworkException as e:

            delay = HG.client_controller.new_options.GetInteger(
                'downloader_network_error_delay')

            self._DelayWork(delay, HydrusData.ToUnicode(e))

            HydrusData.PrintException(e)

        watcher_status = gallery_seed.note
        watcher_status_should_stick = gallery_seed.status != CC.STATUS_SUCCESSFUL_AND_NEW

        with self._lock:

            if self._check_now:

                self._check_now = False

            self._watcher_status = watcher_status

            self._last_check_time = HydrusData.GetNow()

            self._UpdateFileVelocityStatus()

            self._UpdateNextCheckTime()

            self._Compact()

        if not watcher_status_should_stick:

            time.sleep(5)

            with self._lock:

                self._watcher_status = ''
Esempio n. 10
0
    def _ProcessJob(self, job):

        job_type = job.GetType()

        (action, args, kwargs) = job.GetCallableTuple()

        try:

            if job_type in ('read_write', 'write'):

                self._current_status = 'db write locked'

                self._transaction_contains_writes = True

            else:

                self._current_status = 'db read locked'

            self.publish_status_update()

            if job_type in ('read', 'read_write'):

                result = self._Read(action, *args, **kwargs)

            elif job_type in ('write'):

                result = self._Write(action, *args, **kwargs)

            if self._transaction_contains_writes and HydrusData.TimeHasPassed(
                    self._transaction_started + self.TRANSACTION_COMMIT_TIME):

                self._current_status = 'db committing'

                self.publish_status_update()

                self._Commit()

                self._BeginImmediate()

            else:

                self._Save()

            for (topic, args, kwargs) in self._pubsubs:

                self._controller.pub(topic, *args, **kwargs)

            if job.IsSynchronous():

                job.PutResult(result)

        except Exception as e:

            try:

                self._Rollback()

            except Exception as rollback_e:

                HydrusData.Print(
                    'When the transaction failed, attempting to rollback the database failed.'
                )

                HydrusData.PrintException(rollback_e)

            self._ManageDBError(job, e)

        finally:

            self._pubsubs = []

            self._current_status = ''

            self.publish_status_update()
Esempio n. 11
0
    def __init__(self, controller, db_dir, db_name, no_wal=False):

        self._controller = controller
        self._db_dir = db_dir
        self._db_name = db_name
        self._no_wal = no_wal

        self._transaction_started = 0
        self._in_transaction = False
        self._transaction_contains_writes = False

        self._connection_timestamp = 0

        main_db_filename = db_name

        if not main_db_filename.endswith('.db'):

            main_db_filename += '.db'

        self._db_filenames = {}

        self._db_filenames['main'] = main_db_filename

        self._InitExternalDatabases()

        if distutils.version.LooseVersion(
                sqlite3.sqlite_version) < distutils.version.LooseVersion(
                    '3.11.0'):

            self._fast_big_transaction_wal = False

        else:

            self._fast_big_transaction_wal = True

        self._is_first_start = False
        self._is_db_updated = False
        self._local_shutdown = False
        self._loop_finished = False
        self._ready_to_serve_requests = False
        self._could_not_initialise = False

        self._jobs = Queue.PriorityQueue()
        self._pubsubs = []

        self._currently_doing_job = False
        self._current_status = ''
        self._current_job_name = ''

        self._db = None
        self._c = None

        if os.path.exists(
                os.path.join(self._db_dir, self._db_filenames['main'])):

            # open and close to clean up in case last session didn't close well

            self._InitDB()
            self._CloseDBCursor()

        self._InitDB()

        (version,
         ) = self._c.execute('SELECT version FROM version;').fetchone()

        if version < HC.SOFTWARE_VERSION - 50:

            raise Exception('Your current database version of hydrus ' +
                            str(version) +
                            ' is too old for this software version ' +
                            str(HC.SOFTWARE_VERSION) +
                            ' to update. Please try updating with version ' +
                            str(version + 45) + ' or earlier first.')

        if version < 238:

            raise Exception(
                'Unfortunately, this software cannot update your database. Please try installing version 238 first.'
            )

        while version < HC.SOFTWARE_VERSION:

            time.sleep(self.UPDATE_WAIT)

            try:

                self._BeginImmediate()

            except Exception as e:

                raise HydrusExceptions.DBAccessException(
                    HydrusData.ToUnicode(e))

            try:

                self._UpdateDB(version)

                self._Commit()

                self._is_db_updated = True

            except:

                e = Exception('Updating the ' + self._db_name +
                              ' db to version ' + str(version + 1) +
                              ' caused this error:' + os.linesep +
                              traceback.format_exc())

                try:

                    self._Rollback()

                except Exception as rollback_e:

                    HydrusData.Print(
                        'When the update failed, attempting to rollback the database failed.'
                    )

                    HydrusData.PrintException(rollback_e)

                raise e

            (version,
             ) = self._c.execute('SELECT version FROM version;').fetchone()

        self._RepairDB()

        self._CloseDBCursor()

        self._controller.CallToThreadLongRunning(self.MainLoop)

        while not self._ready_to_serve_requests:

            time.sleep(0.1)

            if self._could_not_initialise:

                raise Exception(
                    'Could not initialise the db! Error written to the log!')
Esempio n. 12
0
def CanVacuum(db_path, stop_time=None):

    try:

        db = sqlite3.connect(db_path,
                             isolation_level=None,
                             detect_types=sqlite3.PARSE_DECLTYPES)

        c = db.cursor()

        (page_size, ) = c.execute('PRAGMA page_size;').fetchone()
        (page_count, ) = c.execute('PRAGMA page_count;').fetchone()
        (freelist_count, ) = c.execute('PRAGMA freelist_count;').fetchone()

        db_size = (page_count - freelist_count) * page_size

        if stop_time is not None:

            approx_vacuum_speed_mb_per_s = 1048576 * 1

            approx_vacuum_duration = db_size / approx_vacuum_speed_mb_per_s

            time_i_will_have_to_start = stop_time - approx_vacuum_duration

            if HydrusData.TimeHasPassed(time_i_will_have_to_start):

                return False

        temp_dir = tempfile.gettempdir()
        (db_dir, db_filename) = os.path.split(db_path)

        temp_disk_usage = psutil.disk_usage(temp_dir)

        a = HydrusPaths.GetDevice(temp_dir)
        b = HydrusPaths.GetDevice(db_dir)

        if HydrusPaths.GetDevice(temp_dir) == HydrusPaths.GetDevice(db_dir):

            if temp_disk_usage.free < db_size * 2.2:

                return False

        else:

            if temp_disk_usage.free < db_size * 1.1:

                return False

            db_disk_usage = psutil.disk_usage(db_dir)

            if db_disk_usage.free < db_size * 1.1:

                return False

        return True

    except Exception as e:

        HydrusData.Print('Could not determine whether to vacuum or not:')

        HydrusData.PrintException(e)

        return False
Esempio n. 13
0
 def Parse( self, job_key, data, referral_url, desired_content ):
     
     search_urls = self.ParseURLs( job_key, data, referral_url )
     
     content = []
     
     for search_url in search_urls:
         
         job_key.SetVariable( 'script_status', 'fetching ' + search_url )
         
         network_job = ClientNetworking.NetworkJob( 'GET', search_url, referral_url = referral_url )
         
         network_job.OverrideBandwidth()
         
         HG.client_controller.network_engine.AddJob( network_job )
         
         try:
             
             network_job.WaitUntilDone()
             
         except HydrusExceptions.CancelledException:
             
             break
             
         except HydrusExceptions.NetworkException as e:
             
             if isinstance( e, HydrusExceptions.NotFoundException ):
                 
                 job_key.SetVariable( 'script_status', '404 - nothing found' )
                 
                 time.sleep( 2 )
                 
                 continue
                 
             elif isinstance( e, HydrusExceptions.NetworkException ):
                 
                 job_key.SetVariable( 'script_status', 'Network error! Details written to log.' )
                 
                 HydrusData.Print( 'Problem fetching ' + search_url + ':' )
                 HydrusData.PrintException( e )
                 
                 time.sleep( 2 )
                 
                 continue
                 
             else:
                 
                 raise
                 
             
         
         linked_data = network_job.GetContent()
         
         children_content = GetChildrenContent( job_key, self._children, linked_data, search_url, desired_content )
         
         content.extend( children_content )
         
         if job_key.IsCancelled():
             
             raise HydrusExceptions.CancelledException()
             
         
     
     return content
Esempio n. 14
0
def THREADDownloadURLs(job_key, urls, title):

    job_key.SetVariable('popup_title', title)
    job_key.SetVariable('popup_text_1', 'initialising')

    num_successful = 0
    num_redundant = 0
    num_deleted = 0
    num_failed = 0

    presentation_hashes = []
    presentation_hashes_fast = set()

    file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions(
        'loud')

    def network_job_factory(*args, **kwargs):

        network_job = ClientNetworkingJobs.NetworkJob(*args, **kwargs)

        network_job.OverrideBandwidth()

        return network_job

    network_job_presentation_context_factory = GenerateMultiplePopupNetworkJobPresentationContextFactory(
        job_key)

    for (i, url) in enumerate(urls):

        (i_paused, should_quit) = job_key.WaitIfNeeded()

        if should_quit:

            break

        job_key.SetVariable(
            'popup_text_1',
            HydrusData.ConvertValueRangeToPrettyString(i + 1, len(urls)))
        job_key.SetVariable('popup_gauge_1', (i + 1, len(urls)))

        file_seed = ClientImportFileSeeds.FileSeed(
            ClientImportFileSeeds.FILE_SEED_TYPE_URL, url)

        try:

            file_seed.DownloadAndImportRawFile(
                url, file_import_options, network_job_factory,
                network_job_presentation_context_factory)

            status = file_seed.status

            if status in CC.SUCCESSFUL_IMPORT_STATES:

                if status == CC.STATUS_SUCCESSFUL_AND_NEW:

                    num_successful += 1

                elif status == CC.STATUS_SUCCESSFUL_BUT_REDUNDANT:

                    num_redundant += 1

                if file_seed.HasHash():

                    hash = file_seed.GetHash()

                    if hash not in presentation_hashes_fast:

                        presentation_hashes.append(hash)

                    presentation_hashes_fast.add(hash)

            elif status == CC.STATUS_DELETED:

                num_deleted += 1

        except Exception as e:

            num_failed += 1

            HydrusData.Print(url + ' failed to import!')
            HydrusData.PrintException(e)

    job_key.DeleteVariable('popup_network_job')

    text_components = []

    if num_successful > 0:

        text_components.append(
            HydrusData.ToHumanInt(num_successful) + ' successful')

    if num_redundant > 0:

        text_components.append(
            HydrusData.ToHumanInt(num_redundant) + ' already in db')

    if num_deleted > 0:

        text_components.append(HydrusData.ToHumanInt(num_deleted) + ' deleted')

    if num_failed > 0:

        text_components.append(
            HydrusData.ToHumanInt(num_failed) +
            ' failed (errors written to log)')

    job_key.SetVariable('popup_text_1', ', '.join(text_components))

    if len(presentation_hashes) > 0:

        job_key.SetVariable('popup_files', (presentation_hashes, 'downloads'))

    job_key.DeleteVariable('popup_gauge_1')

    job_key.Finish()
Esempio n. 15
0
    def _WorkOnGallery(self):

        gallery_seed = self._gallery_seed_log.GetNextGallerySeed(
            CC.STATUS_UNKNOWN)

        if gallery_seed is None:

            return

        with self._lock:

            if self._AmOverFileLimit():

                self._gallery_paused = True

                self._gallery_status = ''

                return

            self._gallery_status = 'checking next page'

        def file_seeds_callable(file_seeds):

            if self._file_limit is None:

                max_new_urls_allowed = None

            else:

                max_new_urls_allowed = self._file_limit - self._num_new_urls_found

            return ClientImporting.UpdateFileSeedCacheWithFileSeeds(
                self._file_seed_cache,
                file_seeds,
                max_new_urls_allowed=max_new_urls_allowed)

        def status_hook(text):

            with self._lock:

                self._gallery_status = text

        def title_hook(text):

            return

        try:

            (num_urls_added, num_urls_already_in_file_seed_cache,
             num_urls_total, result_404, added_new_gallery_pages,
             stop_reason) = gallery_seed.WorkOnURL(
                 'download page', self._gallery_seed_log, file_seeds_callable,
                 status_hook, title_hook, self._NetworkJobFactory,
                 self._GalleryNetworkJobPresentationContextFactory,
                 self._file_import_options)

            self._num_new_urls_found += num_urls_added
            self._num_urls_found += num_urls_total

            if num_urls_added > 0:

                ClientImporting.WakeRepeatingJob(self._files_repeating_job)

            self._current_page_index += 1

        except HydrusExceptions.NetworkException as e:

            with self._lock:

                delay = HG.client_controller.new_options.GetInteger(
                    'downloader_network_error_delay')

                self._DelayWork(delay, HydrusData.ToUnicode(e))

            return

        except Exception as e:

            gallery_seed_status = CC.STATUS_ERROR
            gallery_seed_note = HydrusData.ToUnicode(e)

            gallery_seed.SetStatus(gallery_seed_status, note=gallery_seed_note)

            HydrusData.PrintException(e)

            with self._lock:

                self._gallery_paused = True

        self._gallery_seed_log.NotifyGallerySeedsUpdated((gallery_seed, ))

        with self._lock:

            self._gallery_status = ''

        return True
Esempio n. 16
0
    def _ParseImagePage(self, html, url_base):

        (search_url, search_separator, advance_by_page_num, thumb_classname,
         image_id, image_data,
         tag_classnames_to_namespaces) = self._booru.GetData()

        soup = ClientParsing.GetSoup(html)

        image_url = None

        try:

            if image_id is not None:

                image = soup.find(id=image_id)

                if image is None:

                    image_string = soup.find(text=re.compile('Save this file'))

                    if image_string is None:

                        image_string = soup.find(
                            text=re.compile('Save this video'))

                    if image_string is None:

                        # catchall for rule34hentai.net's webms

                        if image_url is None:

                            a_tags = soup.find_all('a')

                            for a_tag in a_tags:

                                href = a_tag['href']

                                if href is not None:

                                    if href.endswith('.webm'):

                                        image_url = href

                                        break

                        # catchall for rule34hentai.net's mp4s, which are loaded in a mickey-mouse flv player

                        if image_url is None:

                            magic_phrase = 'document.write("<source src=\''

                            if magic_phrase in html:

                                # /image/252605' type='video/mp4...

                                image_url_and_gumpf = html.split(
                                    magic_phrase, 1)[1]

                                image_url = image_url_and_gumpf.split('\'',
                                                                      1)[0]

                    else:

                        image = image_string.parent

                        image_url = image['href']

                else:

                    if image.name in ('img', 'video'):

                        image_url = image['src']

                        if 'Running Danbooru' in html:

                            # possible danbooru resized image

                            possible_better_image = soup.find(
                                id='image-resize-link')

                            if possible_better_image is not None:

                                image_url = possible_better_image['href']

                    elif image.name == 'a':

                        image_url = image['href']

            if image_data is not None:

                links = soup.find_all('a')

                ok_link = None
                better_link = None

                for link in links:

                    if link.string is not None:

                        if link.string.startswith(
                                image_data) or link.string.endswith(
                                    image_data):

                            ok_link = link['href']

                        if link.string.startswith('Download PNG'):

                            better_link = link['href']

                            break

                if better_link is not None:

                    image_url = better_link

                else:

                    image_url = ok_link

        except Exception as e:

            raise HydrusExceptions.DataMissing(
                'Could not parse a download link for ' + url_base + '!' +
                os.linesep + HydrusData.ToUnicode(e))

        if image_url is None:

            raise HydrusExceptions.DataMissing(
                'Could not parse a download link for ' + url_base + '!')

        image_url = urlparse.urljoin(url_base, image_url)

        if 'gelbooru.com' in url_base:

            # giving 404 on some content servers for http, no redirect for some reason
            image_url = ClientNetworkingDomain.ConvertHTTPToHTTPS(image_url)

        tags = []

        for (tag_classname, namespace) in tag_classnames_to_namespaces.items():

            tag_list_entries = soup.find_all(class_=tag_classname)

            for tag_list_entry in tag_list_entries:

                links = tag_list_entry.find_all('a')

                if tag_list_entry.name == 'a':

                    links.append(tag_list_entry)

                for link in links:

                    if link.string is None:

                        continue

                    try:

                        tag_string = HydrusData.ToUnicode(link.string)

                        tag_string = HydrusTags.CleanTag(tag_string)

                        if tag_string in (
                                '?', '-', '+', u'\xe2\x80\x93', u'\u2013'
                        ):  # last two are a couple of amusing encodings of en-dash '-' from danbooru

                            continue

                        tag = HydrusTags.CombineTag(namespace, tag_string)

                        tags.append(tag)

                    except Exception as e:

                        HydrusData.Print('Could not parse tag "' +
                                         repr(link.string) + '":')

                        HydrusData.PrintException(e)

        return (image_url, tags)
Esempio n. 17
0
def GetMime( path ):
    
    with open( path, 'rb' ) as f:
        
        f.seek( 0 )
        
        bit_to_check = f.read( 256 )
        
    
    for ( offset, header, mime ) in header_and_mime:
        
        offset_bit_to_check = bit_to_check[ offset: ]
        
        if offset_bit_to_check.startswith( header ):
            
            if mime == HC.UNDETERMINED_WM:
                
                if HydrusVideoHandling.HasVideoStream( path ):
                    
                    return HC.VIDEO_WMV
                    
                
                # we'll catch and verify wma later
                
            elif mime == HC.UNDETERMINED_PNG:
                
                return HC.IMAGE_PNG
                
                # atm (Feb 2016), ffmpeg doesn't report duration for apngs, so can't do this just yet.
                #
                #if HydrusVideoHandling.HasVideoStream( path ):
                #    
                #    return HC.VIDEO_APNG
                #    
                #else:
                #    
                #    return HC.IMAGE_PNG
                #    
                
            else:
                
                return mime
                
            
        
    
    try:
        
        mime = HydrusVideoHandling.GetMimeFromFFMPEG( path )
        
        if mime != HC.APPLICATION_UNKNOWN:
            
            return mime
            
        
    except HydrusExceptions.MimeException:
        
        HydrusData.Print( 'FFMPEG couldn\'t figure out the mime for: ' + path )
        
    except Exception as e:
        
        HydrusData.Print( 'FFMPEG couldn\'t figure out the mime for: ' + path )
        HydrusData.PrintException( e, do_wait = False )
        
    
    hsaudio_object = hsaudiotag.auto.File( path )
    
    if hsaudio_object.valid:
        
        if isinstance( hsaudio_object.original, hsaudiotag.mpeg.Mpeg ): return HC.AUDIO_MP3
        elif isinstance( hsaudio_object.original, hsaudiotag.flac.FLAC ): return HC.AUDIO_FLAC
        elif isinstance( hsaudio_object.original, hsaudiotag.ogg.Vorbis ): return HC.AUDIO_OGG
        elif isinstance( hsaudio_object.original, hsaudiotag.wma.WMADecoder ): return HC.AUDIO_WMA
        
    
    return HC.APPLICATION_UNKNOWN
Esempio n. 18
0
def GetMime(path):

    size = os.path.getsize(path)

    if size == 0:

        raise HydrusExceptions.SizeException('File is of zero length!')

    with open(path, 'rb') as f:

        f.seek(0)

        bit_to_check = f.read(256)

    for (offset, header, mime) in header_and_mime:

        offset_bit_to_check = bit_to_check[offset:]

        if offset_bit_to_check.startswith(header):

            if mime == HC.UNDETERMINED_WM:

                if HydrusVideoHandling.HasVideoStream(path):

                    return HC.VIDEO_WMV

                # we'll catch and verify wma later

            elif mime == HC.UNDETERMINED_PNG:

                if HydrusVideoHandling.HasVideoStream(path):

                    return HC.IMAGE_APNG

                else:

                    return HC.IMAGE_PNG

            else:

                return mime

    try:

        mime = HydrusVideoHandling.GetMime(path)

        if mime != HC.APPLICATION_UNKNOWN:

            return mime

    except HydrusExceptions.MimeException:

        HydrusData.Print('FFMPEG couldn\'t figure out the mime for: ' + path)

    except Exception as e:

        HydrusData.Print('FFMPEG couldn\'t figure out the mime for: ' + path)
        HydrusData.PrintException(e, do_wait=False)

    hsaudio_object = hsaudiotag.auto.File(path)

    if hsaudio_object.valid:

        if isinstance(hsaudio_object.original, hsaudiotag.mpeg.Mpeg):
            return HC.AUDIO_MP3
        elif isinstance(hsaudio_object.original, hsaudiotag.flac.FLAC):
            return HC.AUDIO_FLAC
        elif isinstance(hsaudio_object.original, hsaudiotag.ogg.Vorbis):
            return HC.AUDIO_OGG
        elif isinstance(hsaudio_object.original, hsaudiotag.wma.WMADecoder):
            return HC.AUDIO_WMA

    return HC.APPLICATION_UNKNOWN