Ejemplo n.º 1
0
def search_zerochan(url):
    erina_log.logcaches(f'Searching for Zerochan Data...')
    zerochan_results = {}
    response = request(url)
    zerochan = BeautifulSoup(response.text, 'html.parser')
    zerochan_results['mangaka'] = 'N/A'
    zerochan_results['series'] = 'N/A'
    zerochan_results['character'] = 'N/A'
    zerochan_results['source'] = 'N/A'

    zerochan_info = json_to_dict(
        zerochan.find('script', attrs={
            'type': 'application/ld+json'
        }).get_text())

    zerochan_results['uploader'] = zerochan_info['author']
    zerochan_results['content_url'] = zerochan_info['contentUrl']
    zerochan_results['thumbnail'] = zerochan_info['thumbnail']
    zerochan_results['format'] = zerochan_info['encodingFormat']
    zerochan_results['post_date'] = zerochan_info['datePublished']
    zerochan_results['name'] = zerochan_info['name']
    zerochan_results['width'] = zerochan_info['width']
    zerochan_results['height'] = zerochan_info['height']
    zerochan_results['content_size'] = zerochan_info['contentSize']

    for tag in zerochan.find('ul', attrs={'id': "tags"}).findChildren('li'):
        if tag.get_text().find(' Mangaka') != -1:
            zerochan_results['mangaka'] = tag.get_text().replace(
                ' Mangaka', '')
        elif tag.get_text().find(' Series') != -1:
            zerochan_results['series'] = tag.get_text().replace(' Series', '')
        elif tag.get_text().find(' Character') != -1:
            zerochan_results['character'] = tag.get_text().replace(
                ' Character', '')
        elif tag.get_text().find(' Source') != -1:
            zerochan_results['source'] = tag.get_text().replace(' Source', '')

    return zerochan_results
Ejemplo n.º 2
0
def search_gelbooru(url):
    erina_log.logcaches(f'Searching for Gelbooru Data...')
    gelbooru_results = {}
    response = request(url)
    gelbooru = BeautifulSoup(response.text, 'html.parser')
    characters_containers = gelbooru.find_all(
        'li', attrs={'class': 'tag-type-character'})
    characters = []
    for character in characters_containers:
        characters.append(character.findChildren('a')[1].get_text())
    gelbooru_results['characters'] = characters

    copyright_containers = gelbooru.find_all(
        'li', attrs={'class': 'tag-type-copyright'})
    copyrights = []
    for copyright_text in copyright_containers:
        copyrights.append(copyright_text.findChildren('a')[1].get_text())
    gelbooru_results['copyrights'] = copyrights

    metadata_containers = gelbooru.find_all(
        'li', attrs={'class': 'tag-type-metadata'})
    metadatas = []
    for metadata in metadata_containers:
        metadatas.append(metadata.findChildren('a')[1].get_text())
    gelbooru_results['metadatas'] = metadatas

    tag_containers = gelbooru.find_all('li',
                                       attrs={'class': 'tag-type-general'})
    tags = []
    for tag in tag_containers:
        tags.append(tag.findChildren('a')[1].get_text())
    gelbooru_results['tags'] = tags

    gelbooru_statistics_html = response.text.split(
        '<h3>Statistics</h3>')[1].split('<h3>Options</h3>')[0]
    gelbooru_statistics = BeautifulSoup(gelbooru_statistics_html,
                                        'html.parser')
    list_items = gelbooru_statistics.find_all('li')
    gelbooru_results['id'] = 'N/A'
    gelbooru_results['size'] = 'N/A'
    gelbooru_results['source'] = 'N/A'
    gelbooru_results['rating'] = 'N/A'
    gelbooru_results['date'] = 'N/A'
    gelbooru_results['uploader'] = 'N/A'
    gelbooru_results['score'] = 'N/A'
    for item in list_items:
        if item.get_text()[:3] == 'Id:':
            gelbooru_results['id'] = item.get_text()[4:]
        elif item.get_text()[:5] == 'Size:':
            gelbooru_results['size'] = item.get_text()[6:]
        elif item.get_text()[:7] == 'Source:':
            gelbooru_results['source'] = item.find('a')['href']
        elif item.get_text()[:7] == 'Rating:':
            gelbooru_results['rating'] = item.get_text()[8:]
        elif item.get_text()[:7] == 'Posted:':
            gelbooru_results['date'] = item.get_text()[8:].split(' by ')[0]
            gelbooru_results['uploader'] = item.get_text()[8:].split(' by ')[1]
        elif item.get_text()[:6] == 'Score:':
            gelbooru_results['score'] = re.sub('[^0123456789]', '',
                                               item.get_text()[7:])
    return gelbooru_results
Ejemplo n.º 3
0
def search_iqdb(image_hash, image_url='', file_io=None):
    """
    Searches and caches IQDB for anime/manga related images.

    Erina Project - 2020\n
    © Anime no Sekai
    """

    erina_log.logcaches(f'Searching for IQDB Data...', 'iqdb', str(image_hash))
    StatsAppend(ExternalStats.iqdbCalls, "New Call")
    results = {}

    ### If a file is given, send the file to iqdb.
    if file_io is not None:
        response = requests.post('https://iqdb.org/',
                                 files={'file': ('image_to_search', file_io)})
    else:
        if image_url == '':
            erina_log.logerror('[ErinaCaches] [IQDB] No file or URL provided')
            return {'error': 'no file or url provided'}
        else:
            response = request(f'https://iqdb.org/?url={image_url}')

    ### If the image format is not supported by IQDB
    if 'Not an image or image format not supported' in response.text:
        print('Format not supported.')
        erina_log.logerror('[ErinaCaches] [IQDB] Format not supported')
        return {'error': 'format not supported'}

###### IQDB SCRAPING
    iqdb = BeautifulSoup(response.text, 'html.parser')

    ##### Search for the IQDB result
    try:
        tables = iqdb.find_all('table')
        search_result = tables[1].findChildren("th")[0].get_text()
    except Exception as e:
        erina_log.logerror(
            f'[ErinaCaches] [IQDB] Client Error, Error details: {str(e)}')
        return {'error': 'client error', 'error_details': e}

##### Verify if the result is relevant or not
    iqdb_tags = []
    if search_result == 'No relevant matches':
        erina_log.logerror('[ErinaCaches] [IQDB] No relevant matches found')
        return {'error': 'not found'}
    else:
        try:
            ### Getting the tags from IQDB
            alt_string = tables[1].findChildren("img")[0]['alt']
            iqdb_tags = alt_string.split('Tags: ')[1].split(' ')
        except:
            iqdb_tags = []

    #### Getting the Database URL from IQDB
    try:
        url = tables[1].find_all('td',
                                 attrs={'class': 'image'
                                        })[0].findChildren('a')[0]['href']
        url = 'https://' + url.split('//')[1]
    except:
        url = ''

    #### Getting the result image size
    try:
        size = tables[1].find_all('tr')[3].get_text().split(' [')[0]
    except:
        size = ''

    #### Getting the image rating (if it is NSFW or not)
    if tables[1].find_all('tr')[3].get_text().split()[1].replace(
            '[', '').replace(']', '').replace(' ', '') == 'Safe':
        is_safe = True
    else:
        is_safe = False

    #### Getting the similarity
    try:
        similarity = tables[1].find_all('tr')[4].get_text().replace(
            '% similarity', '')
    except:
        similarity = ''

    #### Adding the results to the main result variable
    results['iqdb_tags'] = iqdb_tags
    results['url'] = url
    results['size'] = size
    results['is_safe'] = is_safe
    results['similarity'] = similarity

    ############ FUNCTION DEFINITION FOR RESULTS SCRAPING
    if url.find('gelbooru.') != -1:
        results['database'] = 'gelbooru'
        results['gelbooru_results'] = search_gelbooru(url)

    elif url.find('danbooru.') != -1:
        results['database'] = 'danbooru'
        results['danbooru_results'] = search_danbooru(url)

    elif url.find('zerochan.') != -1:
        results['database'] = 'zerochan'
        results['zerochan_results'] = search_zerochan(url)

    elif url.find('konachan.') != -1:
        results['database'] = 'konachan'
        results['konachan_results'] = search_konachan(url)

    elif url.find('yande.re') != -1:
        results['database'] = 'yandere'
        results['yandere_results'] = search_yandere(url)

    elif url.find('anime-pictures.') != -1:
        results['database'] = 'anime_pictures'
        results['anime_pictures_results'] = search_animepictures(url)

    elif url.find('e-shuushuu') != -1:
        results['database'] = 'e_shuushuu'
        results['e_shuushuu_results'] = search_eshuushuu(url)

#################### CACHING ##########

    new_cache_content = []
    new_cache_content.append('   --- IQDB CACHE ---   ')
    new_cache_content.append('')

    new_cache_content.append('IQDB Tags: ' +
                             create_erina_list(results['iqdb_tags']))
    new_cache_content.append('URL: ' + results['url'])
    new_cache_content.append('Size: ' + results['size'])
    new_cache_content.append('isSafe: ' + str(results['is_safe']))
    new_cache_content.append('Similarity: ' + results['similarity'])
    new_cache_content.append('Database: ' + results['database'])
    new_cache_content.append('')

    if results['database'] == 'gelbooru':

        new_cache_content.append(
            'Gelbooru Characters: ' +
            create_erina_list(results['gelbooru_results']['characters']))
        new_cache_content.append(
            'Gelbooru Copyrights: ' +
            create_erina_list(results['gelbooru_results']['copyrights']))
        new_cache_content.append(
            'Gelbooru Metadatas: ' +
            create_erina_list(results['gelbooru_results']['metadatas']))
        new_cache_content.append(
            'Gelbooru Tags: ' +
            create_erina_list(results['gelbooru_results']['tags']))

        new_cache_content.append('Gelbooru ID: ' +
                                 results['gelbooru_results']['id'])
        new_cache_content.append('Gelbooru Size: ' +
                                 results['gelbooru_results']['size'])
        new_cache_content.append('Gelbooru Source: ' +
                                 results['gelbooru_results']['source'])
        new_cache_content.append('Gelbooru Rating: ' +
                                 results['gelbooru_results']['rating'])
        new_cache_content.append('Gelbooru Date: ' +
                                 results['gelbooru_results']['date'])
        new_cache_content.append('Gelbooru Uploader: ' +
                                 results['gelbooru_results']['uploader'])
        new_cache_content.append('Gelbooru Score: ' +
                                 results['gelbooru_results']['score'])

    elif results['database'] == 'danbooru':

        new_cache_content.append(
            'Danbooru Artists: ' +
            create_erina_list(results['danbooru_results']['artists']))
        new_cache_content.append(
            'Danbooru Characters: ' +
            create_erina_list(results['danbooru_results']['characters']))
        new_cache_content.append(
            'Danbooru Copyrights: ' +
            create_erina_list(results['danbooru_results']['copyrights']))
        new_cache_content.append(
            'Danbooru Metadatas: ' +
            create_erina_list(results['danbooru_results']['metadatas']))
        new_cache_content.append(
            'Danbooru Tags: ' +
            create_erina_list(results['danbooru_results']['tags']))

        new_cache_content.append('Danbooru ID: ' +
                                 results['danbooru_results']['id'])
        new_cache_content.append('Danbooru Uploader: ' +
                                 results['danbooru_results']['uploader'])
        new_cache_content.append('Danbooru Date: ' +
                                 results['danbooru_results']['date'])
        new_cache_content.append('Danbooru Content Size: ' +
                                 results['danbooru_results']['content_size'])
        new_cache_content.append('Danbooru Format: ' +
                                 results['danbooru_results']['format'])
        new_cache_content.append('Danbooru Size: ' +
                                 results['danbooru_results']['size'])
        new_cache_content.append('Danbooru Source: ' +
                                 results['danbooru_results']['source'])
        new_cache_content.append('Danbooru Rating: ' +
                                 results['danbooru_results']['rating'])
        new_cache_content.append('Danbooru Score: ' +
                                 results['danbooru_results']['score'])
        new_cache_content.append('Danbooru Favorites: ' +
                                 results['danbooru_results']['favorites'])
        new_cache_content.append('Danbooru Status: ' +
                                 results['danbooru_results']['status'])

    elif results['database'] == 'zerochan':
        new_cache_content.append('Zerochan ID: ' +
                                 results['zerochan_results']['id'])
        new_cache_content.append('Zerochan Uploader: ' +
                                 results['zerochan_results']['uploader'])
        new_cache_content.append('Zerochan Content URL: ' +
                                 results['zerochan_results']['content_url'])
        new_cache_content.append('Zerochan Thumbnail: ' +
                                 results['zerochan_results']['thumbnail'])
        new_cache_content.append('Zerochan Format: ' +
                                 results['zerochan_results']['format'])
        new_cache_content.append('Zerochan Post Date: ' +
                                 results['zerochan_results']['post_date'])
        new_cache_content.append('Zerochan Name: ' +
                                 results['zerochan_results']['name'])
        new_cache_content.append('Zerochan Width: ' +
                                 results['zerochan_results']['width'])
        new_cache_content.append('Zerochan Height: ' +
                                 results['zerochan_results']['height'])
        new_cache_content.append('Zerochan Content Size: ' +
                                 results['zerochan_results']['content_size'])
        new_cache_content.append('Zerochan Mangaka: ' +
                                 results['zerochan_results']['mangaka'])
        new_cache_content.append('Zerochan Series: ' +
                                 results['zerochan_results']['series'])
        new_cache_content.append('Zerochan Character: ' +
                                 results['zerochan_results']['character'])
        new_cache_content.append('Zerochan Source: ' +
                                 results['zerochan_results']['source'])

    elif results['database'] == 'konachan':

        new_cache_content.append(
            'Konachan Copyrights: ' +
            create_erina_list(results['konachan_results']['copyrights']))
        new_cache_content.append(
            'Konachan Styles: ' +
            create_erina_list(results['konachan_results']['styles']))
        new_cache_content.append(
            'Konachan Artists: ' +
            create_erina_list(results['konachan_results']['artists']))
        new_cache_content.append(
            'Konachan Characters: ' +
            create_erina_list(results['konachan_results']['characters']))
        new_cache_content.append(
            'Konachan Tags: ' +
            create_erina_list(results['konachan_results']['tags']))
        new_cache_content.append(
            'Konachan Favorited By: ' +
            create_erina_list(results['konachan_results']['favorited_by']))

        new_cache_content.append('Konachan ID: ' +
                                 results['konachan_results']['id'])
        new_cache_content.append('Konachan Size: ' +
                                 results['konachan_results']['size'])
        new_cache_content.append('Konachan Source: ' +
                                 results['konachan_results']['source'])
        new_cache_content.append('Konachan Rating: ' +
                                 results['konachan_results']['rating'])
        new_cache_content.append('Konachan Date: ' +
                                 results['konachan_results']['date'])
        new_cache_content.append('Konachan Uploader: ' +
                                 results['konachan_results']['uploader'])
        new_cache_content.append('Konachan Score: ' +
                                 results['konachan_results']['score'])

    elif results['database'] == 'yandere':

        new_cache_content.append(
            'Yandere Copyrights: ' +
            create_erina_list(results['yandere_results']['copyrights']))
        new_cache_content.append(
            'Yandere Styles: ' +
            create_erina_list(results['yandere_results']['styles']))
        new_cache_content.append(
            'Yandere Artists: ' +
            create_erina_list(results['yandere_results']['artists']))
        new_cache_content.append(
            'Yandere Characters: ' +
            create_erina_list(results['yandere_results']['characters']))
        new_cache_content.append(
            'Yandere Tags: ' +
            create_erina_list(results['yandere_results']['tags']))
        new_cache_content.append(
            'Yandere Favorited By: ' +
            create_erina_list(results['yandere_results']['favorited_by']))

        new_cache_content.append('Yandere ID: ' +
                                 results['yandere_results']['id'])
        new_cache_content.append('Yandere Size: ' +
                                 results['yandere_results']['size'])
        new_cache_content.append('Yandere Source: ' +
                                 results['yandere_results']['source'])
        new_cache_content.append('Yandere Rating: ' +
                                 results['yandere_results']['rating'])
        new_cache_content.append('Yandere Date: ' +
                                 results['yandere_results']['date'])
        new_cache_content.append('Yandere Uploader: ' +
                                 results['yandere_results']['uploader'])
        new_cache_content.append('Yandere Score: ' +
                                 results['yandere_results']['score'])

    elif results['database'] == 'anime_pictures':

        new_cache_content.append('Anime-Pictures ID: ' +
                                 results['anime_pictures_results']['id'])
        new_cache_content.append('Anime-Pictures Uploader: ' +
                                 results['anime_pictures_results']['uploader'])
        new_cache_content.append(
            'Anime-Pictures Last Editing User: '******'anime_pictures_results']['last_editing_user'])
        new_cache_content.append(
            'Anime-Pictures Post Date: ' +
            results['anime_pictures_results']['post_date'])
        new_cache_content.append(
            'Anime-Pictures Published Date: ' +
            results['anime_pictures_results']['published_date'])
        new_cache_content.append(
            'Anime-Pictures Download Count: ' +
            results['anime_pictures_results']['download_count'])
        new_cache_content.append('Anime-Pictures Size: ' +
                                 results['anime_pictures_results']['size'])
        new_cache_content.append(
            'Anime-Pictures Aspect Ratio: ' +
            results['anime_pictures_results']['aspect_ratio'])
        new_cache_content.append(
            'Anime-Pictures Content Size: ' +
            results['anime_pictures_results']['content_size'])
        new_cache_content.append(
            'Anime-Pictures Artefacts Degree: ' +
            results['anime_pictures_results']['artefacts_degree'])
        new_cache_content.append(
            'Anime-Pictures Smooth Degree: ' +
            results['anime_pictures_results']['smoothness_degree'])
        new_cache_content.append(
            'Anime-Pictures Complexity: ' +
            results['anime_pictures_results']['complexity'])
        new_cache_content.append(
            'Anime-Pictures Copyright: ' +
            results['anime_pictures_results']['copyright'])
        new_cache_content.append('Anime-Pictures Artist: ' +
                                 results['anime_pictures_results']['artist'])
        new_cache_content.append(
            'Anime-Pictures Average Color: ' + create_erina_list(
                results['anime_pictures_results']['average_color']))
        new_cache_content.append(
            'Anime-Pictures References: ' +
            create_erina_list(results['anime_pictures_results']['references']))
        new_cache_content.append(
            'Anime-Pictures Objects: ' +
            create_erina_list(results['anime_pictures_results']['objects']))
        new_cache_content.append(
            'Anime-Pictures Similar Images: ' + create_erina_list(
                results['anime_pictures_results']['similar_images_id']))
        new_cache_content.append(
            'Anime-Pictures Artist Links: ' + create_erina_list(
                results['anime_pictures_results']['artist_links']))

    elif results['database'] == 'e_shuushuu':

        new_cache_content.append('E-Shuushuu Posting Uploader: ' +
                                 results['e_shuushuu_results']['uploader'])
        new_cache_content.append('E-Shuushuu Posting Post Date: ' +
                                 results['e_shuushuu_results']['post_date'])
        new_cache_content.append('E-Shuushuu Posting Filename: ' +
                                 results['e_shuushuu_results']['filename'])
        new_cache_content.append(
            'E-Shuushuu Posting Original Filename: ' +
            results['e_shuushuu_results']['original_filename'])
        new_cache_content.append('E-Shuushuu Posting Content Size: ' +
                                 results['e_shuushuu_results']['content_size'])
        new_cache_content.append('E-Shuushuu Posting Size: ' +
                                 results['e_shuushuu_results']['size'])
        new_cache_content.append('E-Shuushuu Posting Favorites: ' +
                                 results['e_shuushuu_results']['favorites'])
        new_cache_content.append('E-Shuushuu Posting Image Rating: ' +
                                 results['e_shuushuu_results']['image_rating'])

        new_cache_content.append(
            'E-Shuushuu Tags: ' +
            create_erina_list(results['e_shuushuu_results']['tags']))
        new_cache_content.append(
            'E-Shuushuu Sources: ' +
            create_erina_list(results['e_shuushuu_results']['sources']))
        new_cache_content.append(
            'E-Shuushuu Characters: ' +
            create_erina_list(results['e_shuushuu_results']['characters']))
        new_cache_content.append(
            'E-Shuushuu Artists: ' +
            create_erina_list(results['e_shuushuu_results']['artists']))

    new_cache_content.append('')
    new_cache_content.append('Cache Timestamp: ' +
                             str(datetime.timestamp(datetime.today())))
    new_cache_content.append('Cache Timestamp (formatted): ' + today() +
                             ' at ' + current_time())

    new_cache_destination = env_information.erina_dir + '/ErinaCaches/IQDB_Cache/'
    new_cache_filename = str(image_hash) + '.erina'
    erina_log.logcaches(f'Caching IQDB and {results["database"]} data...')
    write_file(file_title=new_cache_filename,
               text=new_cache_content,
               destination=new_cache_destination)
    return results
Ejemplo n.º 4
0
def search_eshuushuu(url):
    erina_log.logcaches(f'Searching for E-Shuushuu Data...')
    e_shuushuu_results = {}
    response = request(url)
    e_shuushuu = BeautifulSoup(response.text, 'html.parser')

    e_shuushuu_results['uploader'] = ''
    e_shuushuu_results['post_date'] = ''
    e_shuushuu_results['filename'] = ''
    e_shuushuu_results['original_filename'] = ''
    e_shuushuu_results['content_size'] = ''
    e_shuushuu_results['size'] = ''
    e_shuushuu_results['favorites'] = ''

    meta_element = e_shuushuu.find('div', attrs={'class': 'meta'})
    for metadata in meta_element.find_all('dt'):

        if metadata.get_text() == 'Submitted By:':
            e_shuushuu_results['uploader'] = metadata.findNext('dd').get_text()

        if metadata.get_text() == 'Submitted On:':
            e_shuushuu_results['post_date'] = metadata.findNext(
                'dd').get_text()

        if metadata.get_text() == 'Filename:':
            e_shuushuu_results['filename'] = metadata.findNext('dd').get_text()

        if metadata.get_text() == 'Original Filename:':
            e_shuushuu_results['original_filename'] = metadata.findNext(
                'dd').get_text()

        if metadata.get_text() == 'File size:':
            e_shuushuu_results['content_size'] = remove_space_before_and_after(
                metadata.findNext('dd').get_text()).replace('\n', '').replace(
                    '\t', '')

        if metadata.get_text() == 'Dimensions:':
            e_shuushuu_results['size'] = metadata.findNext('dd').get_text()

        if metadata.get_text() == 'Favorites:':
            e_shuushuu_results['favorites'] = metadata.findNext(
                'dd').get_text()

    e_shuushuu_results['tags'] = []
    e_shuushuu_results['sources'] = []
    e_shuushuu_results['characters'] = []
    e_shuushuu_results['artists'] = []
    e_shuushuu_results['image_rating'] = ''

    for metadata in meta_element.find_all('dd'):
        previous_sibling = metadata.find_previous_sibling('dt').get_text()

        if previous_sibling.find('Tags:') != -1:
            for tag in metadata.find_all('span', attrs={'class': 'tag'}):
                e_shuushuu_results['tags'].append(
                    remove_space_before_and_after(tag.get_text()).replace(
                        '"', ''))

        elif previous_sibling.find('Source:') != -1:
            for tag in metadata.find_all('span', attrs={'class': 'tag'}):
                e_shuushuu_results['sources'].append(
                    remove_space_before_and_after(tag.get_text()).replace(
                        '"', ''))

        elif previous_sibling.find('Characters:') != -1:
            for tag in metadata.find_all('span', attrs={'class': 'tag'}):
                e_shuushuu_results['characters'].append(
                    remove_space_before_and_after(tag.get_text()).replace(
                        '"', ''))

        elif previous_sibling.find('Artist:') != -1:
            for tag in metadata.find_all('span', attrs={'class': 'tag'}):
                e_shuushuu_results['artists'].append(
                    remove_space_before_and_after(tag.get_text()).replace(
                        '"', ''))

        elif previous_sibling.find('Rating:') != -1:
            e_shuushuu_results['image_rating'] = remove_space_before_and_after(
                metadata.get_text()).replace('"',
                                             '').replace('\t',
                                                         '').replace('\n', '')

    return e_shuushuu_results
Ejemplo n.º 5
0
def search_animepictures(url):
    erina_log.logcaches(f'Searching for Anime-Pictures Data...')
    animepictures_results = {}
    response = request(url)
    animepictures = BeautifulSoup(response.text, 'html.parser')

    animepictures_results['id'] = 'N/A'
    animepictures_results['uploader'] = 'N/A'
    animepictures_results['last_editing_user'] = '******'
    animepictures_results['post_date'] = 'N/A'
    animepictures_results['published_date'] = 'N/A'
    animepictures_results['download_count'] = 'N/A'
    animepictures_results['size'] = 'N/A'
    animepictures_results['aspect_ratio'] = 'N/A'
    animepictures_results['content_size'] = 'N/A'
    animepictures_results['average_color_name'] = 'N/A'
    animepictures_results['average_color'] = ['N/A']
    animepictures_results['artefacts_degree'] = 'N/A'
    animepictures_results['smoothness_degree'] = 'N/A'
    animepictures_results['complexity'] = 'N/A'

    animepictures_results['id'] = remove_space_before_and_after(
        animepictures.find('div', attrs={
            'class': 'post_content'
        }).find('h1').get_text().split('№')[1]).replace('\n',
                                                        '').replace('\t', '')

    all_content_titles = animepictures.find('div',
                                            attrs={
                                                'class': 'post_content'
                                            }).find_all('b')
    all_children = animepictures.find('div', attrs={
        "class": 'post_content'
    }).contents
    for element in all_content_titles:
        try:
            index = all_children.index(element) + 1
        except:
            index = -1
        if element.get_text().find('Posted by:') != -1:
            try:
                animepictures_results['uploader'] = animepictures.find(
                    'div', attrs={
                        'class': 'post_content'
                    }).find('div', attrs={
                        'class': 'post_content_avatar'
                    }).find('a').get_text()
            except:
                animepictures_results['uploader'] = 'N/A'
        elif element.get_text().find('Current status set by:') != -1:
            try:
                animepictures_results['last_editing_user'] = all_children[
                    index + 1].get_text()
            except:
                animepictures_results['last_editing_user'] = '******'
        elif element.get_text().find('Resolution:') != -1:
            try:
                animepictures_results['size'] = all_children[index +
                                                             1].get_text()
                animepictures_results[
                    'aspect_ratio'] = remove_space_before_and_after(
                        all_children[index + 3].get_text()).replace(
                            '\n', '').replace('\t', '')
            except:
                animepictures_results['size'] = 'N/A'
                animepictures_results['aspect_ratio'] = 'N/A'
        elif element.get_text().find('Color:') != -1:
            try:
                animepictures_results['average_color_name'] = all_children[
                    index + 1].get_text().split(' (')[0].replace('\t',
                                                                 '').replace(
                                                                     '\n', '')
                animepictures_results['average_color'] = all_children[
                    index + 1].get_text().split(' (')[1].replace(')',
                                                                 '').split(' ')
            except:
                animepictures_results['average_color_name'] = 'N/A'
                animepictures_results['average_color'] = ['N/A']
        elif element.get_text().find('Date Upload:') != -1:
            try:
                animepictures_results['post_date'] = all_children[index].strip(
                )
            except:
                animepictures_results['post_date'] = 'N/A'
        elif element.get_text().find('Date Published:') != -1:
            try:
                animepictures_results['published_date'] = all_children[
                    index].strip()
            except:
                animepictures_results['published_date'] = 'N/A'
        elif element.get_text().find('Downloads:') != -1:
            try:
                animepictures_results['download_count'] = all_children[
                    index].strip()
            except:
                animepictures_results['download_count'] = 'N/A'
        elif element.get_text().find('Size:') != -1:
            try:
                animepictures_results['content_size'] = all_children[
                    index].strip()
            except:
                animepictures_results['content_size'] = 'N/A'
        elif element.get_text().find('Artefacts Degree:') != -1:
            try:
                animepictures_results['artefacts_degree'] = all_children[
                    index].strip()
            except:
                animepictures_results['artefacts_degree'] = 'N/A'
        elif element.get_text().find('Smoothness Degree:') != -1:
            try:
                animepictures_results['smoothness_degree'] = all_children[
                    index].strip()
            except:
                animepictures_results['smoothness_degree'] = 'N/A'
        elif element.get_text().find('Complexity:') != -1:
            try:
                animepictures_results['complexity'] = all_children[
                    index].strip()
            except:
                animepictures_results['complexity'] = 'N/A'
    """
    unclear_list_of_info = animepictures.find('div', attrs={'class': 'post_content'}).get_text().split('\n')
    new_list_of_info = remove_elements_from_list(unclear_list_of_info, ['', '  ', '   ', '\t', '\t\t', '\t\t\t', '\xa0\xa0\xa0\xa0', '\xa0', '№', 'Downloads:', 'Resolution:', 'Size:', 'Color:','Complexity:', 'Anime picture', 'Posted by: ', 'Current status set by:', 'Date Upload: ', 'Date Published:', 'Artefacts Degree: ', 'Smoothness Degree: '], delete_inside_string=True)
    new_list_of_info = remove_elements_from_list(new_list_of_info, ['', ' '], delete_inside_string=False, remove_space_before_and_after_string=True)

    keys_list = ['id', 'uploader', 'last_editing_user', 'post_date', 'published_date', 'download_count', 'size', 'aspect_ratio', 'content_size', 'average_color', 'artefacts_degree', 'smoothness_degree', 'complexity']
    iteration = 0
    for key in keys_list:
        if key == 'average_color':
            animepictures_results['average_color_name'] = new_list_of_info[iteration]
            animepictures_results['average_color'] = new_list_of_info[iteration]
        else:
            animepictures_results[key] = new_list_of_info[iteration]
        iteration += 1
    """

    tags_list = animepictures.find('ul', attrs={'class': 'tags'})
    animepictures_results['copyright'] = tags_list.find('a',
                                                        attrs={
                                                            'class':
                                                            'copyright'
                                                        }).get_text()
    animepictures_results['artist'] = tags_list.find('a',
                                                     attrs={
                                                         'class': 'artist'
                                                     }).get_text()

    references = []
    reference_tags_html = BeautifulSoup(response.text.split(
        '<span>reference</span>')[1].split('<span>object</span>')[0],
                                        features='html.parser')
    for element in reference_tags_html.find_all('a'):
        references.append(element.get_text())

    animepictures_results['references'] = remove_elements_from_list(
        references, [''])

    objects = []
    object_tags_html = BeautifulSoup(response.text.split(
        '<span>object</span>')[1].split('<div class="sidebar_block">')[0],
                                     features='html.parser')
    for element in object_tags_html.find_all('a'):
        objects.append(element.get_text())

    animepictures_results['objects'] = remove_elements_from_list(objects, [''])

    similar_images_id = []
    for picture in animepictures.find('div', attrs={
            'class': 'image_body'
    }).find_all('a'):
        if str(picture.parent)[:4] != '<div':
            similar_images_id.append(picture['href'].replace(
                '/pictures/view_post/', '').split('?lang=')[0])
    animepictures_results['similar_images_id'] = similar_images_id

    artist_information_html = BeautifulSoup(response.text.split(
        f'<strong>{animepictures_results["artist"]}:</strong>')[1].split(
            '<div class="post_content">')[0],
                                            features='html.parser')
    artist_links = []
    for link in artist_information_html.find_all('a'):
        artist_links.append(link['href'])

    animepictures_results['artist_links'] = artist_links

    return animepictures_results
Ejemplo n.º 6
0
def search_yandere(url):
    erina_log.logcaches(f'Searching for Yande.re Data...')
    yandere_results = {}
    response = request(url)
    yandere = BeautifulSoup(response.text, 'html.parser')

    copyrights = []
    for element in yandere.find_all('li',
                                    attrs={'class': 'tag-type-copyright'}):
        copyrights.append(element.find_all('a')[1].get_text())
    yandere_results['copyrights'] = copyrights

    styles = []
    for element in yandere.find_all('li', attrs={'class': 'tag-type-style'}):
        styles.append(element.find_all('a')[1].get_text())
    yandere_results['styles'] = styles

    artists = []
    for element in yandere.find_all('li', attrs={'class': 'tag-type-artist'}):
        artists.append(element.find_all('a')[1].get_text())
    yandere_results['artists'] = artists

    characters = []
    for element in yandere.find_all('li',
                                    attrs={'class': 'tag-type-character'}):
        characters.append(element.find_all('a')[1].get_text())
    yandere_results['characters'] = characters

    tags = []
    for element in yandere.find_all('li', attrs={'class': 'tag-type-general'}):
        tags.append(element.find_all('a')[1].get_text())
    yandere_results['tags'] = tags

    yandere_statistics_html = response.text.split(
        '<h5>Statistics</h5>')[1].split('<h5>Options</h5>')[0]
    yandere_statistics = BeautifulSoup(yandere_statistics_html, 'html.parser')
    list_items = yandere_statistics.find_all('li')
    yandere_results['id'] = 'N/A'
    yandere_results['size'] = 'N/A'
    yandere_results['source'] = 'N/A'
    yandere_results['rating'] = 'N/A'
    yandere_results['date'] = 'N/A'
    yandere_results['uploader'] = 'N/A'
    yandere_results['score'] = 'N/A'
    yandere_results['favorited_by'] = ['N/A']
    for item in list_items:
        if item.get_text()[:3] == 'Id:':
            yandere_results['id'] = item.get_text()[4:]
        elif item.get_text()[:5] == 'Size:':
            yandere_results['size'] = item.get_text()[6:]
        elif item.get_text()[:7] == 'Source:':
            yandere_results['source'] = item.find('a')['href']
        elif item.get_text()[:7] == 'Rating:':
            yandere_results['rating'] = remove_space_before_and_after(
                item.get_text()[8:])
        elif item.get_text()[:7] == 'Posted:':
            yandere_results['date'] = item.get_text()[8:].split(' by ')[0]
            yandere_results['uploader'] = item.get_text()[8:].split(' by ')[1]
        elif item.get_text()[:6] == 'Score:':
            yandere_results['score'] = re.sub('[^0123456789]', '',
                                              item.get_text()[7:])
        elif item.get_text()[:13] == 'Favorited by:':
            yandere_results['favorited_by'] = item.get_text()[14:].split(', ')

    return yandere_results
Ejemplo n.º 7
0
def search_danbooru(url):
    erina_log.logcaches(f'Searching for Danbooru Data...')
    danbooru_results = {}
    response = request(url)
    danbooru = BeautifulSoup(response.text, 'html.parser')
    artists = []
    for element in danbooru.find('ul', attrs={
            'class': 'artist-tag-list'
    }).findChildren('li'):
        artists.append(
            element.find_all('a', attrs={'class': 'search-tag'})[0].get_text())
    danbooru_results['artists'] = artists

    copyrights = []
    for element in danbooru.find('ul', attrs={
            'class': 'copyright-tag-list'
    }).findChildren('li'):
        copyrights.append(
            element.find_all('a', attrs={'class': 'search-tag'})[0].get_text())
    danbooru_results['copyrights'] = copyrights

    characters = []
    for element in danbooru.find('ul', attrs={
            'class': 'character-tag-list'
    }).findChildren('li'):
        characters.append(
            element.find_all('a', attrs={'class': 'search-tag'})[0].get_text())
    danbooru_results['characters'] = characters

    tags = []
    for element in danbooru.find('ul', attrs={
            'class': 'general-tag-list'
    }).findChildren('li'):
        tags.append(
            element.find_all('a', attrs={'class': 'search-tag'})[0].get_text())
    danbooru_results['tags'] = tags

    metadatas = []
    for element in danbooru.find('ul', attrs={
            'class': 'meta-tag-list'
    }).findChildren('li'):
        metadatas.append(
            element.find_all('a', attrs={'class': 'search-tag'})[0].get_text())
    danbooru_results['metadatas'] = metadatas

    danbooru_results['id'] = remove_space_before_and_after(
        danbooru.find_all('li', attrs={'id':
                                       'post-info-id'})[0].get_text().replace(
                                           'ID: ', '').replace('\n', ''))
    danbooru_results['uploader'] = remove_space_before_and_after(
        danbooru.find_all('li',
                          attrs={'id':
                                 'post-info-uploader'})[0].get_text().replace(
                                     'Uploader: ',
                                     '').replace('\n', '')).replace('»', '')
    danbooru_results['date'] = remove_space_before_and_after(
        danbooru.find_all('li',
                          attrs={'id':
                                 'post-info-date'})[0].get_text().replace(
                                     'Date: ', '').replace('\n', ''))
    danbooru_results['content_size'] = remove_space_before_and_after(
        danbooru.find_all('li',
                          attrs={'id':
                                 'post-info-size'})[0].get_text().replace(
                                     'Size: ',
                                     '').split(' .')[0].replace('\n', ''))
    danbooru_results['format'] = remove_space_before_and_after(
        '.' +
        danbooru.find_all('li', attrs={'id': 'post-info-size'})[0].get_text(
        ).replace('Size: ', '').split(' .')[1].split(' ')[0].replace('\n', ''))
    danbooru_results['size'] = remove_space_before_and_after(
        danbooru.find_all(
            'li', attrs={'id': 'post-info-size'})[0].get_text().replace(
                'Size: ', '').split(' (')[1][:-1].replace('\n',
                                                          '').replace(')', ''))
    danbooru_results['source'] = danbooru.find_all(
        'li', attrs={'id': 'post-info-source'})[0].find('a')['href']
    danbooru_results['rating'] = remove_space_before_and_after(
        danbooru.find_all('li',
                          attrs={'id':
                                 'post-info-rating'})[0].get_text().replace(
                                     'Rating: ', '').replace('\n', ''))
    danbooru_results['score'] = remove_space_before_and_after(
        danbooru.find_all('li',
                          attrs={'id':
                                 'post-info-score'})[0].get_text().replace(
                                     'Score: ', '').replace('\n', ''))
    danbooru_results['favorites'] = remove_space_before_and_after(
        danbooru.find_all('li',
                          attrs={'id':
                                 'post-info-favorites'})[0].get_text().replace(
                                     'Favorites: ', '').replace('\n', ''))
    danbooru_results['status'] = remove_space_before_and_after(
        danbooru.find_all('li',
                          attrs={'id':
                                 'post-info-status'})[0].get_text().replace(
                                     '\n',
                                     '').replace(' ',
                                                 '').replace('Status:', ''))
    return danbooru_results