Esempio n. 1
0
def get_or_create_artist(name):
    if ' ' in name:
        parts = name.split()
        if len(parts) == 2:
            last, first = parts
        else:
            last, first = parts[-1], ' '.join(parts[:-1])  # Diana Wynne Jones
    else:
        last, first = name, ''
    try:
        contestants = []
        if Artist.objects.filter(first_name=first, last_name=last).count():
            contestants = Artist.objects.filter(first_name=first,
                                                last_name=last)
            return Artist.objects.get(first_name=first, last_name=last)
        elif first != '' and Artist.objects.filter(first_name=first).count():
            contestants = Artist.objects.filter(first_name=first)
        elif Artist.objects.filter(last_name=last).count():
            contestants = Artist.objects.filter(last_name=last)
    except:
        pass
    if contestants:
        choice = pick_among(contestants)
        if choice:
            return choice
    artist = Artist(first_name=first, last_name=last)
    artist.save()
    return artist
Esempio n. 2
0
def get_or_create_artist(name):
    if ' ' in name:
        parts = name.split()
        if len(parts) == 2:
            last, first = parts
        else:
            last, first = parts[-1], ' '.join(parts[:-1])  # Diana Wynne Jones
    else:
        last, first = name, ''
    try:
        contestants = []
        if Artist.objects.filter(first_name=first, last_name=last).count():
            contestants = Artist.objects.filter(first_name=first, last_name=last)
            return Artist.objects.get(first_name=first, last_name=last)
        elif first != '' and Artist.objects.filter(first_name=first).count():
            contestants = Artist.objects.filter(first_name=first)
        elif Artist.objects.filter(last_name=last).count():
            contestants = Artist.objects.filter(last_name=last)
    except:
        pass
    if contestants:
        choice = pick_among(contestants)
        if choice:
            return choice
    artist = Artist(first_name=first, last_name=last)
    artist.save()
    return artist
Esempio n. 3
0
def run():
    with open('../data/manga-news/manga.csv') as f:
        next(f)
        artists = {}
        hipsters = Counter()
        for i, line in enumerate(f):
            # print(len(line.split(';;')))
            title, vo_title, writer, mangaka, editor, origin, genre1, genre2, manga_type, synopsis, poster = line.split(';;')
            for artist in [writer, mangaka]:
                if artist in artists:
                    continue
                m = re.match('^([A-ZÔÛÏ\'-]+) (.*)$', writer)
                if m:
                    last_name, first_name = m.groups()
                    last_name = last_name.lower().capitalize()
                if not m:
                    first_name = ''
                    last_name = artist
                if Artist.objects.filter(first_name=first_name, last_name=last_name).count() == 0:
                    a = Artist(first_name=first_name, last_name=last_name)
                    a.save()
                else:
                    a = Artist.objects.get(first_name=first_name, last_name=last_name)
                artists[artist] = a
    with open('../data/manga-news/manga.csv') as f:
        next(f)
        for i, line in enumerate(f):
            title, vo_title, writer, mangaka, editor, origin, genre1, genre2, manga_type, synopsis, poster = line.split(';;')
            try:
                if Manga.objects.filter(title=title, vo_title=vo_title).count() == 0:
                    manga = Manga(title=title, vo_title=vo_title, mangaka=artists[mangaka], writer=artists[writer], editor=editor, origin=origin.lower().replace('hong kong', 'hong-kong').replace('international', 'intl'), manga_type=manga_type.lower(), source='', poster=poster, synopsis=synopsis)
                    manga.save()
                else:
                    manga = Manga.objects.get(title=title, vo_title=vo_title)
                if genre1:
                    manga.genre.add(Genre.objects.get(title=genre1))
                if genre2:
                    manga.genre.add(Genre.objects.get(title=genre2))
            except IntegrityError as err:
                print(line)
                print(writer)
                print(err)
                break
            except DataError as err:
                print(line)
                print(origin)
                print(err)
                break
            except Genre.DoesNotExist as err:
                print(line)
                print('Genres: [%s] [%s]' % (genre1, genre2))
                print(err)
                break
Esempio n. 4
0
    def test_anidb_get_animes(self):
        # Fake an artist entry with no AniDB creator ID that will be filled by retrieving Sangatsu
        artist = Artist(name="Shinbou Akiyuki").save()

        filenames = ['anidb/sangatsu_no_lion.xml', 'anidb/sangatsu_no_lion.xml', 'anidb/hibike_euphonium.xml']
        with responses.RequestsMock(assert_all_requests_are_fired=True) as rsps:
            for filename in filenames:
                rsps.add(
                    responses.GET,
                    AniDB.BASE_URL,
                    body=self.read_fixture(filename),
                    status=200,
                    content_type='application/xml'
                )

            sangatsu = self.anidb.get_or_update_work(11606)
            tags_sangatsu_from_anidb = self.anidb.get_tags(11606)
            tags_diff_sangatsu = diff_between_anidb_and_local_tags(sangatsu, tags_sangatsu_from_anidb)
            hibike = self.anidb.get_or_update_work(10889)

        # Retrieve tags
        tags_sangatsu = set(Work.objects.get(pk=sangatsu.pk).taggedwork_set.all().values_list('tag__title', flat=True))
        tags_hibike = set(Work.objects.get(pk=hibike.pk).taggedwork_set.all().values_list('tag__title', flat=True))
        shared_tags = tags_sangatsu.intersection(tags_hibike)

        # Checks on tags
        self.assertEqual(len(tags_sangatsu), 30)
        self.assertEqual(len(tags_hibike), 38)
        self.assertEqual(len(shared_tags), 18)

        # Check for Sangatsu's informations
        self.assertEqual(sangatsu.title, 'Sangatsu no Lion')
        self.assertEqual(sangatsu.nb_episodes, 22)
        self.assertEqual(sangatsu.studio.title, 'Shaft')
        self.assertEqual(sangatsu.date, datetime(2016, 10, 8, 0, 0))
        self.assertEqual(sangatsu.end_date, datetime(2017, 3, 18, 0, 0))

        # Check for Sangatsu's staff
        staff_sangatsu = Work.objects.get(pk=sangatsu.pk).staff_set.all().values_list('artist__name', flat=True)
        self.assertCountEqual(staff_sangatsu, ['Umino Chika', 'Hashimoto Yukari', 'Shinbou Akiyuki', 'Okada Kenjirou'])

        # Check retrieved tags from AniDB
        self.assertEqual(len(tags_diff_sangatsu["deleted_tags"]), 0)
        self.assertEqual(len(tags_diff_sangatsu["added_tags"]), 0)
        self.assertEqual(len(tags_diff_sangatsu["updated_tags"]), 0)
        self.assertEqual(len(tags_diff_sangatsu["kept_tags"]), len(tags_sangatsu))

        # Check for no artist duplication
        artist = Artist.objects.filter(name="Shinbou Akiyuki")
        self.assertEqual(artist.count(), 1)
        self.assertEqual(artist.first().anidb_creator_id, 59)
Esempio n. 5
0
    def test_insert_into_database(self):
        self.add_fake_auth()
        artist = Artist(name='Ishihara Tatsuya').save()

        # Test insert AniListEntry into database
        responses.add(
            responses.GET,
            urljoin(AniList.BASE_URL, 'browse/anime'),
            body=self.read_fixture('anilist/airing_summer_2017_trimmed.json'),
            status=200,
            content_type='application/json')

        seasonal = list(
            self.anilist.list_seasonal_animes(year=2017, season='summer'))
        self.assertEqual(
            len(insert_works_into_database_from_anilist(seasonal)), 7)

        # Test insert AniListRichEntry into database
        responses.add(responses.GET,
                      urljoin(AniList.BASE_URL, 'anime/20912/page'),
                      body=self.read_fixture('anilist/hibike_euphonium.json'),
                      status=200,
                      content_type='application/json')

        hibike_entry = self.anilist.get_work_by_id(AniListWorks.animes, 20912)
        hibike = insert_work_into_database_from_anilist(hibike_entry)

        titles_hibike = WorkTitle.objects.filter(work=hibike)
        genres_hibike = hibike.genre.values_list('title', flat=True)
        related_hibike = RelatedWork.objects.filter(parent_work=hibike)
        staff_hibike = Work.objects.get(
            pk=hibike.pk).staff_set.all().values_list('artist__name',
                                                      flat=True)

        self.assertEqual(hibike.studio.title, 'Kyoto Animation')
        self.assertEqual(len(titles_hibike), 3)
        self.assertEqual(len(related_hibike), 4)
        self.assertCountEqual(genres_hibike,
                              ['Slice of Life', 'Music', 'Drama'])
        self.assertCountEqual(
            staff_hibike,
            ['Ishihara Tatsuya', 'Matsuda Akito', 'Takeda Ayano'])

        # Check for no artist duplication
        artist = Artist.objects.filter(name='Ishihara Tatsuya')
        self.assertEqual(artist.count(), 1)
        self.assertEqual(artist.first().anilist_creator_id, 100055)

        # Try adding this work to the DB again
        hibike_again = insert_work_into_database_from_anilist(hibike_entry)
        self.assertEqual(hibike, hibike_again)
Esempio n. 6
0
def build_staff(work: Work,
                staff: List[AniListStaff]) -> List[Staff]:
    """
    Insert Artist and Staff objects for a given Work when required into Mangaki's database.
    :param work: a work
    :param staff: a list of staff (and artists) informations
    :type work: Work
    :type staff: List[AniListStaff]
    :return: a list of Staff objects that were inserted in Mangaki's database
    :rtype: List[Staff]
    """

    if not staff:
        return []

    anilist_roles_map = {
        'Director': 'director',
        'Music': 'composer',
        'Original Creator': 'author'
    }

    artists_to_add = {}
    artists = {}
    for creator in staff:
        name = '{} {}'.format(creator.name_last or '', creator.name_first or '').strip()

        try:  # This artist exists : prevent duplicates by updating with the AniList id
            artist = Artist.objects.get(Q(name=name) | Q(anilist_creator_id=creator.id))
            artist.name = name
            artist.anilist_creator_id = creator.id
            artist.save()
            artists[creator.id] = artist
        except Artist.DoesNotExist: # This artist does not yet exist : will be bulk created
            artist = Artist(name=name, anilist_creator_id=creator.id)
            artists_to_add[creator.id] = artist

    artists.update(
        {artist.anilist_creator_id: artist for artist in Artist.objects.bulk_create(list(artists_to_add.values()))}
    )

    artists_ids = [artist.id for artist in artists.values()]

    existing_staff_artists = set(Staff.objects.filter(work=work, artist__in=artists_ids).values_list('work__id',
                                                                                                     'artist__id',
                                                                                                     'role'))

    role_map = staff_roles.role_map

    missing_staff = {creator.id: Staff(
           work=work,
           role=role_map.get(anilist_roles_map[creator.role]),
           artist=artists[creator.id]
        ) for creator in staff if anilist_roles_map.get(creator.role)
        and (work.id, artists[creator.id].id, role_map.get(anilist_roles_map[creator.role]).id) not in existing_staff_artists
    }

    Staff.objects.bulk_create(list(missing_staff.values()))

    return list(missing_staff.values())
Esempio n. 7
0
def run():
    with open('../data/manga-news/anime.csv') as f:
        next(f)
        artists = {}
        anime_data = []
        for i, line in enumerate(f):
            title, vo_title, studio, author, editor, anime_type, genre1, genre2, nb_episodes, origin, synopsis, poster = line.split(
                ';;')
            anime_data.append(AnimeData(*line.split(';;')))
    with open('../data/PAIR.log') as f:
        for line in f:
            mangaki_id = int(line.split('::')[0])
            mn_id = int(line.split('::')[1])
            anime = Anime.objects.get(id=mangaki_id)
            data = anime_data[mn_id - 1]
            anime.studio = create_if_not_exists(Studio, data.studio)
            anime.editor = create_if_not_exists(Editor, data.editor)
            m = re.match('^([A-ZÔÛÏ\'-]+) (.*)$', data.author)
            if m:
                last_name, first_name = m.groups()
                last_name = last_name.lower().capitalize()
            if not m:
                first_name = ''
                last_name = data.author
            if Artist.objects.filter(first_name=first_name,
                                     last_name=last_name).count() == 0:
                print('NEW')
                a = Artist(first_name=first_name, last_name=last_name)
                a.save()
            else:
                a = Artist.objects.get(first_name=first_name,
                                       last_name=last_name)
                print('Exists', a)
            anime.author = a
            anime.anime_type = data.anime_type
            genre1 = create_if_not_exists(Genre, data.genre1)
            genre2 = create_if_not_exists(Genre, data.genre2)
            anime.genre.add(genre1)
            anime.genre.add(genre2)
            anime.nb_episodes = data.nb_episodes
            anime.origin = data.origin
            anime.synopsis = data.synopsis  # The most important!
            anime.save()
Esempio n. 8
0
def build_staff(work: Work, staff: List[AniListStaff]) -> List[Staff]:
    """
    Insert Artist and Staff objects for a given Work when required into Mangaki's database.
    :param work: a work
    :param staff: a list of staff (and artists) informations
    :type work: Work
    :type staff: List[AniListStaff]
    :return: a list of Staff objects that were inserted in Mangaki's database
    :rtype: List[Staff]
    """

    if not staff:
        return []

    anilist_roles_map = {
        'Director': 'director',
        'Music': 'composer',
        'Original Creator': 'author'
    }

    artists_to_add = []
    artists = []
    for creator in staff:
        name = '{} {}'.format(creator.name_last or '', creator.name_first
                              or '').strip()

        try:  # This artist exists : prevent duplicates by updating with the AniList id
            artist = Artist.objects.get(
                Q(name=name) | Q(anilist_creator_id=creator.id))
            artist.name = name
            artist.anilist_creator_id = creator.id
            artist.save()
            artists.append(artist)
        except Artist.DoesNotExist:  # This artist does not yet exist : will be bulk created
            artist = Artist(name=name, anilist_creator_id=creator.id)
            artists_to_add.append(artist)

    artists.extend(Artist.objects.bulk_create(artists_to_add))

    existing_staff_artists = set(
        s.artist for s in Staff.objects.filter(work=work, artist__in=artists))

    role_map = staff_roles.role_map

    missing_staff = [
        Staff(work=work,
              role=role_map.get(anilist_roles_map[creator.role]),
              artist=artists[index]) for index, creator in enumerate(staff)
        if anilist_roles_map.get(creator.role)
        and artists[index] not in existing_staff_artists
    ]

    Staff.objects.bulk_create(missing_staff)

    return missing_staff
Esempio n. 9
0
def run():
    with open("../data/manga-news/anime.csv") as f:
        next(f)
        artists = {}
        anime_data = []
        for i, line in enumerate(f):
            title, vo_title, studio, author, editor, anime_type, genre1, genre2, nb_episodes, origin, synopsis, poster = line.split(
                ";;"
            )
            anime_data.append(AnimeData(*line.split(";;")))
    with open("../data/PAIR.log") as f:
        for line in f:
            mangaki_id = int(line.split("::")[0])
            mn_id = int(line.split("::")[1])
            anime = Anime.objects.get(id=mangaki_id)
            data = anime_data[mn_id - 1]
            anime.studio = create_if_not_exists(Studio, data.studio)
            anime.editor = create_if_not_exists(Editor, data.editor)
            m = re.match("^([A-ZÔÛÏ'-]+) (.*)$", data.author)
            if m:
                last_name, first_name = m.groups()
                last_name = last_name.lower().capitalize()
            if not m:
                first_name = ""
                last_name = data.author
            if Artist.objects.filter(first_name=first_name, last_name=last_name).count() == 0:
                print("NEW")
                a = Artist(first_name=first_name, last_name=last_name)
                a.save()
            else:
                a = Artist.objects.get(first_name=first_name, last_name=last_name)
                print("Exists", a)
            anime.author = a
            anime.anime_type = data.anime_type
            genre1 = create_if_not_exists(Genre, data.genre1)
            genre2 = create_if_not_exists(Genre, data.genre2)
            anime.genre.add(genre1)
            anime.genre.add(genre2)
            anime.nb_episodes = data.nb_episodes
            anime.origin = data.origin
            anime.synopsis = data.synopsis  # The most important!
            anime.save()
Esempio n. 10
0
    def test_insert_into_database(self):
        artist = Artist(name='Ishihara Tatsuya').save()

        # Test insert AniListEntry into database
        responses.add(responses.POST,
                      self.anilist.BASE_URL,
                      body=self.read_fixture('anilist/hibike_euphonium.json'),
                      status=200,
                      content_type='application/json')

        hibike_entry = self.anilist.get_work(search_id=20912)
        hibike = insert_work_into_database_from_anilist(hibike_entry,
                                                        build_related=False)

        titles_hibike = WorkTitle.objects.filter(work=hibike).values_list(
            'title', flat=True)
        genres_hibike = hibike.genre.values_list('title', flat=True)
        related_hibike = RelatedWork.objects.filter(parent_work=hibike)
        staff_hibike = Work.objects.get(
            pk=hibike.pk).staff_set.all().values_list('artist__name',
                                                      flat=True)

        self.assertEqual(hibike.studio.title, 'Kyoto Animation')
        self.assertCountEqual(
            titles_hibike,
            ['Hibike! Euphonium', 'Sound! Euphonium', '響け!ユーフォニアム'])
        self.assertCountEqual(genres_hibike,
                              ['Slice of Life', 'Music', 'Drama'])
        self.assertCountEqual(
            staff_hibike,
            ['Ishihara Tatsuya', 'Matsuda Akito', 'Takeda Ayano'])

        # Check for no artist duplication
        artist = Artist.objects.filter(name='Ishihara Tatsuya')
        self.assertEqual(artist.count(), 1)
        self.assertEqual(artist.first().anilist_creator_id, 100055)

        # Try adding this work to the DB again
        hibike_again = insert_work_into_database_from_anilist(
            hibike_entry, build_related=False)
        self.assertEqual(hibike, hibike_again)
Esempio n. 11
0
def build_staff(work: Work, staff: List[Tuple[AniListEntry,
                                              str]]) -> List[Staff]:
    anilist_roles_map = {
        'Director': 'director',
        'Music': 'composer',
        'Original Creator': 'author'
    }

    artists_to_add = []
    artists = []
    for creator in staff:
        name = '{} {}'.format(creator.name_last or '', creator.name_first
                              or '').strip()

        try:  # This artist exists : prevent duplicates by updating with the AniList id
            artist = Artist.objects.get(
                Q(name=name) | Q(anilist_creator_id=creator.id))
            artist.name = name
            artist.anilist_creator_id = creator.id
            artist.save()
            artists.append(artist)
        except Artist.DoesNotExist:  # This artist does not yet exist : will be bulk created
            artist = Artist(name=name, anilist_creator_id=creator.id)
            artists_to_add.append(artist)

    artists.extend(Artist.objects.bulk_create(artists_to_add))

    existing_staff_artists = set(
        s.artist for s in Staff.objects.filter(work=work, artist__in=artists))

    missing_staff = [
        Staff(work=work,
              role=role_map.get(anilist_roles_map[creator.role]),
              artist=artists[index]) for index, creator in enumerate(staff)
        if anilist_roles_map.get(creator.role)
        and artists[index] not in existing_staff_artists
    ]

    Staff.objects.bulk_create(missing_staff)

    return missing_staff
Esempio n. 12
0
    def _build_staff(self,
                     work: Work,
                     creators: List[Dict[str, Any]],
                     reload_role_cache: bool = False) -> List[Staff]:
        if reload_role_cache:
            del self.role_map

        processed_anidb_aids = []
        artists_to_add = []
        artists_list = []
        for nc in creators:
            artist = Artist.objects.filter(
                Q(name=nc["name"])
                | Q(anidb_creator_id=nc["anidb_creator_id"])).first()

            if nc["anidb_creator_id"] in processed_anidb_aids:  # Skip if this artist has more than one role
                continue

            if not artist:  # This artist does not yet exist : will be bulk created
                artist = Artist(name=nc["name"],
                                anidb_creator_id=nc["anidb_creator_id"])
                artists_to_add.append(artist)
            else:  # This artist exists : prevent duplicates by updating with the AniDB id
                artist.name = nc["name"]
                artist.anidb_creator_id = nc["anidb_creator_id"]
                artist.save()
                artists_list.append(artist)
            processed_anidb_aids.append(nc["anidb_creator_id"])

        artists_list.extend(Artist.objects.bulk_create(artists_to_add))
        artists = {artist.name: artist for artist in artists_list}

        existing_staff = set(
            Staff.objects.filter(
                work=work,
                role__in=(nc["role"] for nc in creators),
                artist__name__in=(nc["name"] for nc in creators)).values_list(
                    'work', 'role', 'artist'))
        missing_staff = [
            Staff(work=work, role=nc["role"], artist=artists[nc["name"]])
            for nc in creators
            if (work.pk, nc["role"].pk,
                artists[nc["name"]].pk) not in existing_staff
        ]

        Staff.objects.bulk_create(missing_staff)
        return missing_staff
Esempio n. 13
0
    def _build_staff(self,
                     work: Work,
                     creators: List[Dict[str, Any]],
                     reload_role_cache: bool = False) -> List[Staff]:
        if reload_role_cache:
            del self.role_map

        artists_to_add = []
        artists = []
        for nc in creators:
            artist = Artist.objects.filter(
                Q(name=nc["name"])
                | Q(anidb_creator_id=nc["anidb_creator_id"])).first()

            if not artist:  # This artist does not yet exist : will be bulk created
                artist = Artist(name=nc["name"],
                                anidb_creator_id=nc["anidb_creator_id"])
                artists_to_add.append(artist)
            else:  # This artist exists : prevent duplicates by updating with the AniDB id
                artist.name = nc["name"]
                artist.anidb_creator_id = nc["anidb_creator_id"]
                artist.save()
                artists.append(artist)

        artists.extend(Artist.objects.bulk_create(artists_to_add))

        staffs = []
        for index, nc in enumerate(creators):
            staffs.append(
                Staff(work=work, role=nc["role"], artist=artists[index]))

        existing_staff = set(
            Staff.objects.filter(work=work,
                                 role__in=[nc["role"] for nc in creators],
                                 artist__in=[artist for artist in artists
                                             ]).values_list(
                                                 'work', 'role', 'artist'))

        missing_staff = [
            staff for staff in staffs
            if (staff.work, staff.role, staff.artist) not in existing_staff
        ]

        Staff.objects.bulk_create(missing_staff)
        return missing_staff
Esempio n. 14
0
def run():
    with open('../data/manga-news/manga.csv') as f:
        next(f)
        artists = {}
        hipsters = Counter()
        for i, line in enumerate(f):
            # print(len(line.split(';;')))
            title, vo_title, writer, mangaka, editor, origin, genre1, genre2, manga_type, synopsis, poster = line.split(
                ';;')
            for artist in [writer, mangaka]:
                if artist in artists:
                    continue
                m = re.match('^([A-ZÔÛÏ\'-]+) (.*)$', writer)
                if m:
                    last_name, first_name = m.groups()
                    last_name = last_name.lower().capitalize()
                if not m:
                    first_name = ''
                    last_name = artist
                if Artist.objects.filter(first_name=first_name,
                                         last_name=last_name).count() == 0:
                    a = Artist(first_name=first_name, last_name=last_name)
                    a.save()
                else:
                    a = Artist.objects.get(first_name=first_name,
                                           last_name=last_name)
                artists[artist] = a
    with open('../data/manga-news/manga.csv') as f:
        next(f)
        for i, line in enumerate(f):
            title, vo_title, writer, mangaka, editor, origin, genre1, genre2, manga_type, synopsis, poster = line.split(
                ';;')
            try:
                if Manga.objects.filter(title=title,
                                        vo_title=vo_title).count() == 0:
                    manga = Manga(title=title,
                                  vo_title=vo_title,
                                  mangaka=artists[mangaka],
                                  writer=artists[writer],
                                  editor=editor,
                                  origin=origin.lower().replace(
                                      'hong kong', 'hong-kong').replace(
                                          'international', 'intl'),
                                  manga_type=manga_type.lower(),
                                  source='',
                                  poster=poster,
                                  synopsis=synopsis)
                    manga.save()
                else:
                    manga = Manga.objects.get(title=title, vo_title=vo_title)
                if genre1:
                    manga.genre.add(Genre.objects.get(title=genre1))
                if genre2:
                    manga.genre.add(Genre.objects.get(title=genre2))
            except IntegrityError as err:
                print(line)
                print(writer)
                print(err)
                break
            except DataError as err:
                print(line)
                print(origin)
                print(err)
                break
            except Genre.DoesNotExist as err:
                print(line)
                print('Genres: [%s] [%s]' % (genre1, genre2))
                print(err)
                break