def get_plugin_configuration(cls, queryset: QuerySet) -> "PluginConfiguration": defaults = cls._get_default_configuration() configuration = queryset.get_or_create(name=cls.PLUGIN_NAME, defaults=defaults)[0] if configuration.configuration: # Let's add a translated descriptions and labels cls._append_config_structure(configuration.configuration) return configuration
def get_plugin_configuration(cls, queryset: QuerySet = None ) -> "PluginConfiguration": if not queryset: queryset = PluginConfiguration.objects.all() defaults = cls._get_default_configuration() configuration = queryset.get_or_create(name=cls.PLUGIN_NAME, defaults=defaults)[0] cls._update_configuration_structure(configuration) if configuration.configuration: # Let's add a translated descriptions and labels cls._append_config_structure(configuration.configuration) return configuration
def wanted_generator(settings: 'Settings', ext_logger: RealLogger, attrs: QuerySet): own_settings = settings.providers[constants.provider_name] if not own_settings.api_key: ext_logger.error("Can't use {} API without an api key. Check {}/API_MANUAL.txt".format( constants.provider_name, constants.main_page )) return False queries: DataDict = {} queries_slist_params: DataDict = {} for attr in attrs.filter(name__startswith='wanted_params_'): attr_info = attr.name.replace('wanted_params_', '') query_name, attr_name = attr_info.split("_", maxsplit=1) if query_name not in queries: queries[query_name] = { 'page': 1, 'S': 'objectSearch', 'match': 0, 'order': 'added', 'flow': 'DESC' } if attr_name.startswith('slist_'): if query_name not in queries_slist_params: queries_slist_params[query_name] = [] queries_slist_params[query_name].append('{}:{}'.format(attr_name.replace('slist_', ''), attr.value)) else: queries[query_name].update({attr_name: attr.value}) for query_name, slist_params in queries_slist_params.items(): queries[query_name].update({'slist': '|'.join(slist_params)}) for query_name, query_values in queries.items(): while True: # Read the values from the newly created Provider Model, # that should be created like this (extracted from from): # wanted_params_match: Any, Sounds Like, Start With, End With, Exact -> 0, 4, 1, 2, 3 # wanted_params_age: 18+ -> blank/Y/N # wanted_params_anth: Anthology -> blank/Y/N # wanted_params_bcopy: Copybook -> blank/Y/N # wanted_params_FREE: Free -> blank/Y/N # wanted_params_flist: Type -> # blank: Any # 19: Bootleg # 18: Calendar # 12: Commercial Artbook # 8: Commercial CG # 7: Commercial Magazine # 25: Commercial Mook # 11: Commercial Novel # 10: Commercial other # 13: Commercial other book # 9: Commercial Soft # 2: Doujin CG # 24: Doujin Goods # 23: Doujin Movie # 22: Doujin Music # 21: Doujin Novel # 4: Doujin Other # 3: Doujin Soft # 1: Doujinshi # 5: Manga # 6: Manga (Part) # 17: Postcard # 16: Poster # 15: Shitajiki # 14: Telephone Card # 20: Unknown # wanted_params_date: Release date from -> yyyy-mm-dd # wanted_params_date2: Release date to -> yyyy-mm-dd # for slist parameters: # Here is the list of ALL search terms: # C: Circle # A: Author # P: Parody # H: Character # N: Convention # O: Collections # K: Content # G: Genre # T: Type # L: Publisher # I: Imprint # wanted_params_slist_C: Separated by | # wanted_params_slist_A: Separated by | # wanted_params_slist_P: Separated by | # wanted_params_slist_H: Separated by | # wanted_params_slist_K: Separated by | # wanted_params_slist_G: Separated by | # wanted_params_slist_N: One # wanted_params_slist_O: One # wanted_params_slist_L: One # wanted_params_slist_I: One # wanted_params_cont: One # wanted_params_sub: One # wanted_params_scen: Censored -> blank/Y/N new_query = urllib.parse.urlencode(query_values, doseq=True) ext_logger.info('Querying {} for auto wanted galleries, page: {}, query name: {}, query: {}'.format( constants.provider_name, query_values['page'], query_name, new_query) ) link = '{}/api/{}/?{}'.format( constants.main_page, own_settings.api_key, new_query ) provider, provider_created = Provider.objects.get_or_create( slug=constants.provider_name, defaults={'name': constants.provider_name} ) remaining_queries, int_created = attrs.get_or_create( provider=provider, name='remaining_queries', data_type='int', defaults={ 'value_int': constants.daily_requests, } ) last_query_date, date_created = attrs.get_or_create( provider=provider, name='last_query_date', data_type='date', defaults={ 'value_date': django_tz.now(), } ) if not date_created: limit_time = datetime.time(tzinfo=datetime.timezone(datetime.timedelta(hours=1))) if last_query_date.value.timetz() < limit_time < django_tz.now(): remaining_queries.value = constants.daily_requests remaining_queries.save() if remaining_queries.value <= 0: ext_logger.warning("Daily queries quota {} reached for {}. It resets at 00:00 GMT+1".format( constants.daily_requests, constants.provider_name )) return response = request_with_retries( link, { 'headers': settings.requests_headers, 'timeout': settings.timeout_timer, }, post=False, logger=ext_logger ) remaining_queries.value -= 1 remaining_queries.save() last_query_date.value = django_tz.now() last_query_date.save() if not response: ext_logger.error('Got to page {}, but did not get a response, stopping'.format(query_values['page'])) break response.encoding = 'utf-8' # Based on: https://www.doujinshi.org/API_MANUAL.txt api_galleries = convert_api_response_text_to_gallery_dicts(response.text) if not api_galleries: ext_logger.error('Server response: {}'.format(response.text)) ext_logger.error('Got to page {}, but could not parse the response into galleries, stopping'.format(query_values['page'])) break # Listen to what the server says remaining_queries.value = api_galleries[0].queries remaining_queries.save() used = Gallery.objects.filter(gid__in=[x.gid for x in api_galleries], provider=constants.provider_name) # If the amount of galleries present in database is equal to what we get from the page, # we assume we already processed everything. You can force to process everything by using: force_process, force_created = attrs.get_or_create( provider=provider, name='force_process', data_type='bool', defaults={ 'value_bool': False, } ) ext_logger.info( 'Page has {} galleries, from which {} are already present in the database.'.format( len(api_galleries), used.count() ) ) if not force_process.value and used.count() == len(api_galleries): ext_logger.info('Got to page {}, it has already been processed entirely, stopping'.format(query_values['page'])) break used_gids = used.values_list('gid', flat=True) for gallery_data in api_galleries: if gallery_data.gid not in used_gids: if not gallery_data.dl_type: gallery_data.dl_type = 'auto_wanted' gallery_data.reason = attrs.fetch_value('wanted_reason_{}'.format(query_name)) or 'backup' gallery = Gallery.objects.add_from_values(gallery_data) # We match anyways in case there's a previous WantedGallery. # Actually, we don't match since we only get metadata here, so it should not count as found. publisher_name = '' publisher = gallery.tags.filter(scope='publisher').first() if publisher: publisher_name = publisher.name search_title = format_title_to_wanted_search(gallery.title_jpn) wanted_galleries = WantedGallery.objects.filter( title_jpn=gallery.title_jpn, search_title=search_title ) if not wanted_galleries: wanted_gallery = WantedGallery.objects.create( title=gallery.title or gallery.title_jpn, title_jpn=gallery.title_jpn, search_title=search_title, book_type=gallery.category, page_count=gallery.filecount, publisher=publisher_name, add_as_hidden=True, reason=attrs.fetch_value('wanted_reason_{}'.format(query_name)) or '', public=attrs.fetch_value('wanted_public_{}'.format(query_name)) or False, should_search=attrs.fetch_value('wanted_should_search_{}'.format(query_name)) or True, keep_searching=attrs.fetch_value('wanted_keep_searching_{}'.format(query_name)) or True, provider=attrs.fetch_value('wanted_provider_{}'.format(query_name)) or '', wanted_providers=attrs.fetch_value('wanted_providers_{}'.format(query_name)) or '', category='Manga', ) for artist in gallery.tags.filter(scope='artist'): artist_obj = Artist.objects.filter(name_jpn=artist.name).first() if not artist_obj: artist_obj = Artist.objects.create(name=artist.name, name_jpn=artist.name) wanted_gallery.artists.add(artist_obj) ext_logger.info( "Created wanted gallery ({}): {}, search title: {}".format( wanted_gallery.book_type, wanted_gallery.get_absolute_url(), gallery.title_jpn ) ) wanted_galleries = [wanted_gallery] for wanted_gallery in wanted_galleries: announce, announce_created = wanted_gallery.announces.get_or_create( announce_date=gallery.create_date, release_date=gallery.posted, type='release_date', source=constants.provider_name, ) if announce_created and gallery.thumbnail: announce.copy_img(gallery.thumbnail.path) wanted_gallery.calculate_nearest_release_date() # galleries.extend(api_galleries) # API returns 25 max results per query, so if we get 24 or less, means there's no more pages. # API Manual says 25, but we get 50 results normally! if len(api_galleries) < 50: ext_logger.info( 'Got to page {}, and we got less than 50 galleries, ' 'meaning there is no more pages, stopping'.format(query_values['page']) ) break query_values['page'] += 1 ext_logger.info("{} Auto wanted ended.".format( constants.provider_name ))
def wanted_generator(settings: 'Settings', ext_logger: RealLogger, attrs: QuerySet): own_settings = settings.providers[constants.provider_name] queries: DataDict = {} for attr in attrs.filter(name__startswith='wanted_params_'): attr_info = attr.name.replace('wanted_params_', '') query_name, attr_name = attr_info.split("_", maxsplit=1) if query_name not in queries: queries[query_name] = {'page': 1} queries[query_name].update({attr_name: attr.value}) provider, provider_created = Provider.objects.get_or_create( slug=constants.provider_name, defaults={'name': constants.provider_name}) parser = settings.provider_context.get_parsers( settings, ext_logger, filter_name=constants.provider_name)[0] rounds = 0 # Values that can be set: # subpath: subpath to search. (books, tags/doujin) # container_tag: Tag for the main container for each individual link. (div, span) # container_attribute_name: Attribute name for the main container for each individual link. (class) # container_attribute_value: Attribute value for the main container for each individual link. (content-meta) # link_tag: Tag for the link container inside the container. (a, span) # link_attribute_name: Attribute name for the link container inside the container. (a, span) # link_attribute_value: Attribute value for the link container inside the container. (href, src) # url_attribute_name: Attribute name for the URL container inside the container. (href, src) # link_attribute_get_text: Boolean to specify if it should get the text inside a tag. (True, False) for query_name, query_values in queries.items(): while True: rounds += 1 if rounds > 1: time.sleep(settings.wait_timer) ext_logger.info( 'Querying {} for auto wanted galleries, query name: {}, options: {}' .format(constants.provider_name, query_name, str(query_values))) if 'subpath' not in query_values: ext_logger.error( 'Cannot query without setting a subpath for {}'.format( query_name)) break subpath = query_values['subpath'] if not { 'container_tag', 'container_attribute_name', 'container_attribute_value' }.issubset(query_values.keys()): ext_logger.error( 'Cannot query without html container definition for {}'. format(query_name)) break container_tag = query_values['container_tag'] container_attribute_name = query_values['container_attribute_name'] container_attribute_value = query_values[ 'container_attribute_value'] get_text_from_container = False link_tag = '' link_attribute_name = '' link_attribute_value = '' url_attribute_name = '' if 'link_attribute_get_text' in query_values and query_values[ 'link_attribute_get_text']: get_text_from_container = True else: if not { 'link_tag', 'link_attribute_name', 'link_attribute_value', 'url_attribute_name' }.issubset(query_values.keys()): ext_logger.error( 'Cannot query without link container definition for {}' .format(query_name)) break link_tag = query_values['link_tag'] link_attribute_name = query_values['link_attribute_name'] if link_attribute_name == 'class': link_attribute_name = 'class_' link_attribute_value = query_values['link_attribute_value'] url_attribute_name = query_values['url_attribute_name'] full_url = urllib.parse.urljoin( "{}/".format(subpath), "page/{}".format(query_values['page'])) link = urllib.parse.urljoin(constants.main_url, full_url) response = request_with_retries(link, { 'headers': settings.requests_headers, 'timeout': settings.timeout_timer, 'cookies': own_settings.cookies }, post=False, logger=ext_logger) if not response: ext_logger.error( 'Got to page {}, but did not get a response, stopping'. format(query_values['page'])) break response.encoding = 'utf-8' soup = BeautifulSoup(response.text, 'html.parser') gallery_containers = soup.find_all( container_tag, **{ container_attribute_name: re.compile(container_attribute_value) }) gallery_links: typing.List[str] = [] gallery_gids: typing.List[str] = [] for gallery_container in gallery_containers: if get_text_from_container: gallery_link = gallery_container.get_text() else: gallery_url_container = gallery_container.find( link_tag, **{ link_attribute_name: re.compile(link_attribute_value) }) if gallery_url_container.has_attr(url_attribute_name): gallery_link = gallery_url_container[ url_attribute_name] else: continue gallery_gids.append(gallery_link[1:]) if not gallery_gids: # ext_logger.error('Server response: {}'.format(response.text)) ext_logger.error( 'Got to page {}, but could not parse the response into galleries, stopping' .format(query_values['page'])) break # Listen to what the server says used = Gallery.objects.filter(gid__in=gallery_gids, provider=constants.provider_name) # If the amount of galleries present in database is equal to what we get from the page, # we assume we already processed everything. You can force to process everything by using: force_process, force_created = attrs.get_or_create( provider=provider, name='force_process', data_type='bool', defaults={ 'value_bool': False, }) ext_logger.info( 'Page has {} galleries, from which {} are already present in the database.' .format(len(gallery_gids), used.count())) if not force_process.value and used.count() == len(gallery_gids): ext_logger.info( 'Got to page {}, it has already been processed entirely, stopping' .format(query_values['page'])) break used_gids = used.values_list('gid', flat=True) for gallery_gid in gallery_gids: if gallery_gid not in used_gids: gallery_link = urllib.parse.urljoin( constants.main_url, "/" + gallery_gid) gallery_links.append(gallery_link) api_galleries = parser.fetch_multiple_gallery_data(gallery_links) if not api_galleries: # ext_logger.error('Server response: {}'.format(response.text)) ext_logger.error( 'Got to page {}, but could not parse the gallery link into GalleryData instances' .format(query_values['page'])) break for gallery_data in api_galleries: if gallery_data.gid not in used_gids: if not gallery_data.dl_type: gallery_data.dl_type = 'auto_wanted' gallery_data.reason = attrs.fetch_value( 'wanted_reason_{}'.format(query_name)) or 'backup' gallery = Gallery.objects.add_from_values(gallery_data) # We match anyways in case there's a previous WantedGallery. # Actually, we don't match since we only get metadata here, so it should not count as found. publisher_name = '' publisher = gallery.tags.filter(scope='publisher').first() if publisher: publisher_name = publisher.name search_title = format_title_to_wanted_search(gallery.title) wanted_galleries = WantedGallery.objects.filter( title=gallery.title, search_title=search_title) if not wanted_galleries: wanted_gallery = WantedGallery.objects.create( title=gallery.title, title_jpn=gallery.title_jpn, search_title=search_title, book_type=gallery.category, page_count=gallery.filecount, publisher=publisher_name, add_as_hidden=True, reason=attrs.fetch_value( 'wanted_reason_{}'.format(query_name)) or '', public=attrs.fetch_value( 'wanted_public_{}'.format(query_name)) or False, should_search=attrs.fetch_value( 'wanted_should_search_{}'.format(query_name)) or False, keep_searching=attrs.fetch_value( 'wanted_keep_searching_{}'.format(query_name)) or False, notify_when_found=attrs.fetch_value( 'wanted_notify_when_found_{}'.format( query_name)) or False, provider=attrs.fetch_value( 'wanted_provider_{}'.format(query_name)) or '', wanted_providers=attrs.fetch_value( 'wanted_providers_{}'.format(query_name)) or '', ) for artist in gallery.tags.filter(scope='artist'): artist_obj = Artist.objects.filter( name=artist.name).first() if not artist_obj: artist_obj = Artist.objects.create( name=artist.name) wanted_gallery.artists.add(artist_obj) ext_logger.info( "Created wanted gallery ({}): {}, search title: {}" .format(wanted_gallery.book_type, wanted_gallery.get_absolute_url(), wanted_gallery.search_title)) wanted_galleries = [wanted_gallery] for wanted_gallery in wanted_galleries: announce, announce_created = wanted_gallery.announces.get_or_create( announce_date=gallery.create_date, release_date=gallery.posted, type='release_date', source=constants.provider_name, ) if announce_created and gallery.thumbnail: announce.copy_img(gallery.thumbnail.path) wanted_gallery.calculate_nearest_release_date() # galleries.extend(api_galleries) # API returns 25 max results per query, so if we get 24 or less, means there's no more pages. # API Manual says 25, but we get 50 results normally! if len(api_galleries) < 1: ext_logger.info( 'Got to page {}, and we got less than 1 gallery, ' 'meaning there is no more pages, stopping'.format( query_values['page'])) break query_values['page'] += 1 ext_logger.info("{} Auto wanted ended.".format(constants.provider_name))