def get_or_create_artist(name): if ' ' in name: parts = name.split() if len(parts) == 2: last, first = parts else: last, first = parts[-1], ' '.join(parts[:-1]) # Diana Wynne Jones else: last, first = name, '' try: contestants = [] if Artist.objects.filter(first_name=first, last_name=last).count(): contestants = Artist.objects.filter(first_name=first, last_name=last) return Artist.objects.get(first_name=first, last_name=last) elif first != '' and Artist.objects.filter(first_name=first).count(): contestants = Artist.objects.filter(first_name=first) elif Artist.objects.filter(last_name=last).count(): contestants = Artist.objects.filter(last_name=last) except: pass if contestants: choice = pick_among(contestants) if choice: return choice artist = Artist(first_name=first, last_name=last) artist.save() return artist
def run(): with open('../data/manga-news/manga.csv') as f: next(f) artists = {} hipsters = Counter() for i, line in enumerate(f): # print(len(line.split(';;'))) title, vo_title, writer, mangaka, editor, origin, genre1, genre2, manga_type, synopsis, poster = line.split(';;') for artist in [writer, mangaka]: if artist in artists: continue m = re.match('^([A-ZÔÛÏ\'-]+) (.*)$', writer) if m: last_name, first_name = m.groups() last_name = last_name.lower().capitalize() if not m: first_name = '' last_name = artist if Artist.objects.filter(first_name=first_name, last_name=last_name).count() == 0: a = Artist(first_name=first_name, last_name=last_name) a.save() else: a = Artist.objects.get(first_name=first_name, last_name=last_name) artists[artist] = a with open('../data/manga-news/manga.csv') as f: next(f) for i, line in enumerate(f): title, vo_title, writer, mangaka, editor, origin, genre1, genre2, manga_type, synopsis, poster = line.split(';;') try: if Manga.objects.filter(title=title, vo_title=vo_title).count() == 0: manga = Manga(title=title, vo_title=vo_title, mangaka=artists[mangaka], writer=artists[writer], editor=editor, origin=origin.lower().replace('hong kong', 'hong-kong').replace('international', 'intl'), manga_type=manga_type.lower(), source='', poster=poster, synopsis=synopsis) manga.save() else: manga = Manga.objects.get(title=title, vo_title=vo_title) if genre1: manga.genre.add(Genre.objects.get(title=genre1)) if genre2: manga.genre.add(Genre.objects.get(title=genre2)) except IntegrityError as err: print(line) print(writer) print(err) break except DataError as err: print(line) print(origin) print(err) break except Genre.DoesNotExist as err: print(line) print('Genres: [%s] [%s]' % (genre1, genre2)) print(err) break
def test_anidb_get_animes(self): # Fake an artist entry with no AniDB creator ID that will be filled by retrieving Sangatsu artist = Artist(name="Shinbou Akiyuki").save() filenames = ['anidb/sangatsu_no_lion.xml', 'anidb/sangatsu_no_lion.xml', 'anidb/hibike_euphonium.xml'] with responses.RequestsMock(assert_all_requests_are_fired=True) as rsps: for filename in filenames: rsps.add( responses.GET, AniDB.BASE_URL, body=self.read_fixture(filename), status=200, content_type='application/xml' ) sangatsu = self.anidb.get_or_update_work(11606) tags_sangatsu_from_anidb = self.anidb.get_tags(11606) tags_diff_sangatsu = diff_between_anidb_and_local_tags(sangatsu, tags_sangatsu_from_anidb) hibike = self.anidb.get_or_update_work(10889) # Retrieve tags tags_sangatsu = set(Work.objects.get(pk=sangatsu.pk).taggedwork_set.all().values_list('tag__title', flat=True)) tags_hibike = set(Work.objects.get(pk=hibike.pk).taggedwork_set.all().values_list('tag__title', flat=True)) shared_tags = tags_sangatsu.intersection(tags_hibike) # Checks on tags self.assertEqual(len(tags_sangatsu), 30) self.assertEqual(len(tags_hibike), 38) self.assertEqual(len(shared_tags), 18) # Check for Sangatsu's informations self.assertEqual(sangatsu.title, 'Sangatsu no Lion') self.assertEqual(sangatsu.nb_episodes, 22) self.assertEqual(sangatsu.studio.title, 'Shaft') self.assertEqual(sangatsu.date, datetime(2016, 10, 8, 0, 0)) self.assertEqual(sangatsu.end_date, datetime(2017, 3, 18, 0, 0)) # Check for Sangatsu's staff staff_sangatsu = Work.objects.get(pk=sangatsu.pk).staff_set.all().values_list('artist__name', flat=True) self.assertCountEqual(staff_sangatsu, ['Umino Chika', 'Hashimoto Yukari', 'Shinbou Akiyuki', 'Okada Kenjirou']) # Check retrieved tags from AniDB self.assertEqual(len(tags_diff_sangatsu["deleted_tags"]), 0) self.assertEqual(len(tags_diff_sangatsu["added_tags"]), 0) self.assertEqual(len(tags_diff_sangatsu["updated_tags"]), 0) self.assertEqual(len(tags_diff_sangatsu["kept_tags"]), len(tags_sangatsu)) # Check for no artist duplication artist = Artist.objects.filter(name="Shinbou Akiyuki") self.assertEqual(artist.count(), 1) self.assertEqual(artist.first().anidb_creator_id, 59)
def test_insert_into_database(self): self.add_fake_auth() artist = Artist(name='Ishihara Tatsuya').save() # Test insert AniListEntry into database responses.add( responses.GET, urljoin(AniList.BASE_URL, 'browse/anime'), body=self.read_fixture('anilist/airing_summer_2017_trimmed.json'), status=200, content_type='application/json') seasonal = list( self.anilist.list_seasonal_animes(year=2017, season='summer')) self.assertEqual( len(insert_works_into_database_from_anilist(seasonal)), 7) # Test insert AniListRichEntry into database responses.add(responses.GET, urljoin(AniList.BASE_URL, 'anime/20912/page'), body=self.read_fixture('anilist/hibike_euphonium.json'), status=200, content_type='application/json') hibike_entry = self.anilist.get_work_by_id(AniListWorks.animes, 20912) hibike = insert_work_into_database_from_anilist(hibike_entry) titles_hibike = WorkTitle.objects.filter(work=hibike) genres_hibike = hibike.genre.values_list('title', flat=True) related_hibike = RelatedWork.objects.filter(parent_work=hibike) staff_hibike = Work.objects.get( pk=hibike.pk).staff_set.all().values_list('artist__name', flat=True) self.assertEqual(hibike.studio.title, 'Kyoto Animation') self.assertEqual(len(titles_hibike), 3) self.assertEqual(len(related_hibike), 4) self.assertCountEqual(genres_hibike, ['Slice of Life', 'Music', 'Drama']) self.assertCountEqual( staff_hibike, ['Ishihara Tatsuya', 'Matsuda Akito', 'Takeda Ayano']) # Check for no artist duplication artist = Artist.objects.filter(name='Ishihara Tatsuya') self.assertEqual(artist.count(), 1) self.assertEqual(artist.first().anilist_creator_id, 100055) # Try adding this work to the DB again hibike_again = insert_work_into_database_from_anilist(hibike_entry) self.assertEqual(hibike, hibike_again)
def build_staff(work: Work, staff: List[AniListStaff]) -> List[Staff]: """ Insert Artist and Staff objects for a given Work when required into Mangaki's database. :param work: a work :param staff: a list of staff (and artists) informations :type work: Work :type staff: List[AniListStaff] :return: a list of Staff objects that were inserted in Mangaki's database :rtype: List[Staff] """ if not staff: return [] anilist_roles_map = { 'Director': 'director', 'Music': 'composer', 'Original Creator': 'author' } artists_to_add = {} artists = {} for creator in staff: name = '{} {}'.format(creator.name_last or '', creator.name_first or '').strip() try: # This artist exists : prevent duplicates by updating with the AniList id artist = Artist.objects.get(Q(name=name) | Q(anilist_creator_id=creator.id)) artist.name = name artist.anilist_creator_id = creator.id artist.save() artists[creator.id] = artist except Artist.DoesNotExist: # This artist does not yet exist : will be bulk created artist = Artist(name=name, anilist_creator_id=creator.id) artists_to_add[creator.id] = artist artists.update( {artist.anilist_creator_id: artist for artist in Artist.objects.bulk_create(list(artists_to_add.values()))} ) artists_ids = [artist.id for artist in artists.values()] existing_staff_artists = set(Staff.objects.filter(work=work, artist__in=artists_ids).values_list('work__id', 'artist__id', 'role')) role_map = staff_roles.role_map missing_staff = {creator.id: Staff( work=work, role=role_map.get(anilist_roles_map[creator.role]), artist=artists[creator.id] ) for creator in staff if anilist_roles_map.get(creator.role) and (work.id, artists[creator.id].id, role_map.get(anilist_roles_map[creator.role]).id) not in existing_staff_artists } Staff.objects.bulk_create(list(missing_staff.values())) return list(missing_staff.values())
def run(): with open('../data/manga-news/anime.csv') as f: next(f) artists = {} anime_data = [] for i, line in enumerate(f): title, vo_title, studio, author, editor, anime_type, genre1, genre2, nb_episodes, origin, synopsis, poster = line.split( ';;') anime_data.append(AnimeData(*line.split(';;'))) with open('../data/PAIR.log') as f: for line in f: mangaki_id = int(line.split('::')[0]) mn_id = int(line.split('::')[1]) anime = Anime.objects.get(id=mangaki_id) data = anime_data[mn_id - 1] anime.studio = create_if_not_exists(Studio, data.studio) anime.editor = create_if_not_exists(Editor, data.editor) m = re.match('^([A-ZÔÛÏ\'-]+) (.*)$', data.author) if m: last_name, first_name = m.groups() last_name = last_name.lower().capitalize() if not m: first_name = '' last_name = data.author if Artist.objects.filter(first_name=first_name, last_name=last_name).count() == 0: print('NEW') a = Artist(first_name=first_name, last_name=last_name) a.save() else: a = Artist.objects.get(first_name=first_name, last_name=last_name) print('Exists', a) anime.author = a anime.anime_type = data.anime_type genre1 = create_if_not_exists(Genre, data.genre1) genre2 = create_if_not_exists(Genre, data.genre2) anime.genre.add(genre1) anime.genre.add(genre2) anime.nb_episodes = data.nb_episodes anime.origin = data.origin anime.synopsis = data.synopsis # The most important! anime.save()
def build_staff(work: Work, staff: List[AniListStaff]) -> List[Staff]: """ Insert Artist and Staff objects for a given Work when required into Mangaki's database. :param work: a work :param staff: a list of staff (and artists) informations :type work: Work :type staff: List[AniListStaff] :return: a list of Staff objects that were inserted in Mangaki's database :rtype: List[Staff] """ if not staff: return [] anilist_roles_map = { 'Director': 'director', 'Music': 'composer', 'Original Creator': 'author' } artists_to_add = [] artists = [] for creator in staff: name = '{} {}'.format(creator.name_last or '', creator.name_first or '').strip() try: # This artist exists : prevent duplicates by updating with the AniList id artist = Artist.objects.get( Q(name=name) | Q(anilist_creator_id=creator.id)) artist.name = name artist.anilist_creator_id = creator.id artist.save() artists.append(artist) except Artist.DoesNotExist: # This artist does not yet exist : will be bulk created artist = Artist(name=name, anilist_creator_id=creator.id) artists_to_add.append(artist) artists.extend(Artist.objects.bulk_create(artists_to_add)) existing_staff_artists = set( s.artist for s in Staff.objects.filter(work=work, artist__in=artists)) role_map = staff_roles.role_map missing_staff = [ Staff(work=work, role=role_map.get(anilist_roles_map[creator.role]), artist=artists[index]) for index, creator in enumerate(staff) if anilist_roles_map.get(creator.role) and artists[index] not in existing_staff_artists ] Staff.objects.bulk_create(missing_staff) return missing_staff
def run(): with open("../data/manga-news/anime.csv") as f: next(f) artists = {} anime_data = [] for i, line in enumerate(f): title, vo_title, studio, author, editor, anime_type, genre1, genre2, nb_episodes, origin, synopsis, poster = line.split( ";;" ) anime_data.append(AnimeData(*line.split(";;"))) with open("../data/PAIR.log") as f: for line in f: mangaki_id = int(line.split("::")[0]) mn_id = int(line.split("::")[1]) anime = Anime.objects.get(id=mangaki_id) data = anime_data[mn_id - 1] anime.studio = create_if_not_exists(Studio, data.studio) anime.editor = create_if_not_exists(Editor, data.editor) m = re.match("^([A-ZÔÛÏ'-]+) (.*)$", data.author) if m: last_name, first_name = m.groups() last_name = last_name.lower().capitalize() if not m: first_name = "" last_name = data.author if Artist.objects.filter(first_name=first_name, last_name=last_name).count() == 0: print("NEW") a = Artist(first_name=first_name, last_name=last_name) a.save() else: a = Artist.objects.get(first_name=first_name, last_name=last_name) print("Exists", a) anime.author = a anime.anime_type = data.anime_type genre1 = create_if_not_exists(Genre, data.genre1) genre2 = create_if_not_exists(Genre, data.genre2) anime.genre.add(genre1) anime.genre.add(genre2) anime.nb_episodes = data.nb_episodes anime.origin = data.origin anime.synopsis = data.synopsis # The most important! anime.save()
def test_insert_into_database(self): artist = Artist(name='Ishihara Tatsuya').save() # Test insert AniListEntry into database responses.add(responses.POST, self.anilist.BASE_URL, body=self.read_fixture('anilist/hibike_euphonium.json'), status=200, content_type='application/json') hibike_entry = self.anilist.get_work(search_id=20912) hibike = insert_work_into_database_from_anilist(hibike_entry, build_related=False) titles_hibike = WorkTitle.objects.filter(work=hibike).values_list( 'title', flat=True) genres_hibike = hibike.genre.values_list('title', flat=True) related_hibike = RelatedWork.objects.filter(parent_work=hibike) staff_hibike = Work.objects.get( pk=hibike.pk).staff_set.all().values_list('artist__name', flat=True) self.assertEqual(hibike.studio.title, 'Kyoto Animation') self.assertCountEqual( titles_hibike, ['Hibike! Euphonium', 'Sound! Euphonium', '響け!ユーフォニアム']) self.assertCountEqual(genres_hibike, ['Slice of Life', 'Music', 'Drama']) self.assertCountEqual( staff_hibike, ['Ishihara Tatsuya', 'Matsuda Akito', 'Takeda Ayano']) # Check for no artist duplication artist = Artist.objects.filter(name='Ishihara Tatsuya') self.assertEqual(artist.count(), 1) self.assertEqual(artist.first().anilist_creator_id, 100055) # Try adding this work to the DB again hibike_again = insert_work_into_database_from_anilist( hibike_entry, build_related=False) self.assertEqual(hibike, hibike_again)
def build_staff(work: Work, staff: List[Tuple[AniListEntry, str]]) -> List[Staff]: anilist_roles_map = { 'Director': 'director', 'Music': 'composer', 'Original Creator': 'author' } artists_to_add = [] artists = [] for creator in staff: name = '{} {}'.format(creator.name_last or '', creator.name_first or '').strip() try: # This artist exists : prevent duplicates by updating with the AniList id artist = Artist.objects.get( Q(name=name) | Q(anilist_creator_id=creator.id)) artist.name = name artist.anilist_creator_id = creator.id artist.save() artists.append(artist) except Artist.DoesNotExist: # This artist does not yet exist : will be bulk created artist = Artist(name=name, anilist_creator_id=creator.id) artists_to_add.append(artist) artists.extend(Artist.objects.bulk_create(artists_to_add)) existing_staff_artists = set( s.artist for s in Staff.objects.filter(work=work, artist__in=artists)) missing_staff = [ Staff(work=work, role=role_map.get(anilist_roles_map[creator.role]), artist=artists[index]) for index, creator in enumerate(staff) if anilist_roles_map.get(creator.role) and artists[index] not in existing_staff_artists ] Staff.objects.bulk_create(missing_staff) return missing_staff
def _build_staff(self, work: Work, creators: List[Dict[str, Any]], reload_role_cache: bool = False) -> List[Staff]: if reload_role_cache: del self.role_map processed_anidb_aids = [] artists_to_add = [] artists_list = [] for nc in creators: artist = Artist.objects.filter( Q(name=nc["name"]) | Q(anidb_creator_id=nc["anidb_creator_id"])).first() if nc["anidb_creator_id"] in processed_anidb_aids: # Skip if this artist has more than one role continue if not artist: # This artist does not yet exist : will be bulk created artist = Artist(name=nc["name"], anidb_creator_id=nc["anidb_creator_id"]) artists_to_add.append(artist) else: # This artist exists : prevent duplicates by updating with the AniDB id artist.name = nc["name"] artist.anidb_creator_id = nc["anidb_creator_id"] artist.save() artists_list.append(artist) processed_anidb_aids.append(nc["anidb_creator_id"]) artists_list.extend(Artist.objects.bulk_create(artists_to_add)) artists = {artist.name: artist for artist in artists_list} existing_staff = set( Staff.objects.filter( work=work, role__in=(nc["role"] for nc in creators), artist__name__in=(nc["name"] for nc in creators)).values_list( 'work', 'role', 'artist')) missing_staff = [ Staff(work=work, role=nc["role"], artist=artists[nc["name"]]) for nc in creators if (work.pk, nc["role"].pk, artists[nc["name"]].pk) not in existing_staff ] Staff.objects.bulk_create(missing_staff) return missing_staff
def _build_staff(self, work: Work, creators: List[Dict[str, Any]], reload_role_cache: bool = False) -> List[Staff]: if reload_role_cache: del self.role_map artists_to_add = [] artists = [] for nc in creators: artist = Artist.objects.filter( Q(name=nc["name"]) | Q(anidb_creator_id=nc["anidb_creator_id"])).first() if not artist: # This artist does not yet exist : will be bulk created artist = Artist(name=nc["name"], anidb_creator_id=nc["anidb_creator_id"]) artists_to_add.append(artist) else: # This artist exists : prevent duplicates by updating with the AniDB id artist.name = nc["name"] artist.anidb_creator_id = nc["anidb_creator_id"] artist.save() artists.append(artist) artists.extend(Artist.objects.bulk_create(artists_to_add)) staffs = [] for index, nc in enumerate(creators): staffs.append( Staff(work=work, role=nc["role"], artist=artists[index])) existing_staff = set( Staff.objects.filter(work=work, role__in=[nc["role"] for nc in creators], artist__in=[artist for artist in artists ]).values_list( 'work', 'role', 'artist')) missing_staff = [ staff for staff in staffs if (staff.work, staff.role, staff.artist) not in existing_staff ] Staff.objects.bulk_create(missing_staff) return missing_staff
def run(): with open('../data/manga-news/manga.csv') as f: next(f) artists = {} hipsters = Counter() for i, line in enumerate(f): # print(len(line.split(';;'))) title, vo_title, writer, mangaka, editor, origin, genre1, genre2, manga_type, synopsis, poster = line.split( ';;') for artist in [writer, mangaka]: if artist in artists: continue m = re.match('^([A-ZÔÛÏ\'-]+) (.*)$', writer) if m: last_name, first_name = m.groups() last_name = last_name.lower().capitalize() if not m: first_name = '' last_name = artist if Artist.objects.filter(first_name=first_name, last_name=last_name).count() == 0: a = Artist(first_name=first_name, last_name=last_name) a.save() else: a = Artist.objects.get(first_name=first_name, last_name=last_name) artists[artist] = a with open('../data/manga-news/manga.csv') as f: next(f) for i, line in enumerate(f): title, vo_title, writer, mangaka, editor, origin, genre1, genre2, manga_type, synopsis, poster = line.split( ';;') try: if Manga.objects.filter(title=title, vo_title=vo_title).count() == 0: manga = Manga(title=title, vo_title=vo_title, mangaka=artists[mangaka], writer=artists[writer], editor=editor, origin=origin.lower().replace( 'hong kong', 'hong-kong').replace( 'international', 'intl'), manga_type=manga_type.lower(), source='', poster=poster, synopsis=synopsis) manga.save() else: manga = Manga.objects.get(title=title, vo_title=vo_title) if genre1: manga.genre.add(Genre.objects.get(title=genre1)) if genre2: manga.genre.add(Genre.objects.get(title=genre2)) except IntegrityError as err: print(line) print(writer) print(err) break except DataError as err: print(line) print(origin) print(err) break except Genre.DoesNotExist as err: print(line) print('Genres: [%s] [%s]' % (genre1, genre2)) print(err) break