def get_or_create_artist(name): if ' ' in name: parts = name.split() if len(parts) == 2: last, first = parts else: last, first = parts[-1], ' '.join(parts[:-1]) # Diana Wynne Jones else: last, first = name, '' try: contestants = [] if Artist.objects.filter(first_name=first, last_name=last).count(): contestants = Artist.objects.filter(first_name=first, last_name=last) return Artist.objects.get(first_name=first, last_name=last) elif first != '' and Artist.objects.filter(first_name=first).count(): contestants = Artist.objects.filter(first_name=first) elif Artist.objects.filter(last_name=last).count(): contestants = Artist.objects.filter(last_name=last) except: pass if contestants: choice = pick_among(contestants) if choice: return choice artist = Artist(first_name=first, last_name=last) artist.save() return artist
def get_or_create_artist(name): if ' ' in name: parts = name.split() if len(parts) == 2: last, first = parts else: last, first = parts[-1], ' '.join(parts[:-1]) # Diana Wynne Jones else: last, first = name, '' try: contestants = [] if Artist.objects.filter(first_name=first, last_name=last).count(): contestants = Artist.objects.filter(first_name=first, last_name=last) return Artist.objects.get(first_name=first, last_name=last) elif first != '' and Artist.objects.filter(first_name=first).count(): contestants = Artist.objects.filter(first_name=first) elif Artist.objects.filter(last_name=last).count(): contestants = Artist.objects.filter(last_name=last) except: pass if contestants: choice = pick_among(contestants) if choice: return choice artist = Artist(first_name=first, last_name=last) artist.save() return artist
def run(): with open('../data/manga-news/manga.csv') as f: next(f) artists = {} hipsters = Counter() for i, line in enumerate(f): # print(len(line.split(';;'))) title, vo_title, writer, mangaka, editor, origin, genre1, genre2, manga_type, synopsis, poster = line.split(';;') for artist in [writer, mangaka]: if artist in artists: continue m = re.match('^([A-ZÔÛÏ\'-]+) (.*)$', writer) if m: last_name, first_name = m.groups() last_name = last_name.lower().capitalize() if not m: first_name = '' last_name = artist if Artist.objects.filter(first_name=first_name, last_name=last_name).count() == 0: a = Artist(first_name=first_name, last_name=last_name) a.save() else: a = Artist.objects.get(first_name=first_name, last_name=last_name) artists[artist] = a with open('../data/manga-news/manga.csv') as f: next(f) for i, line in enumerate(f): title, vo_title, writer, mangaka, editor, origin, genre1, genre2, manga_type, synopsis, poster = line.split(';;') try: if Manga.objects.filter(title=title, vo_title=vo_title).count() == 0: manga = Manga(title=title, vo_title=vo_title, mangaka=artists[mangaka], writer=artists[writer], editor=editor, origin=origin.lower().replace('hong kong', 'hong-kong').replace('international', 'intl'), manga_type=manga_type.lower(), source='', poster=poster, synopsis=synopsis) manga.save() else: manga = Manga.objects.get(title=title, vo_title=vo_title) if genre1: manga.genre.add(Genre.objects.get(title=genre1)) if genre2: manga.genre.add(Genre.objects.get(title=genre2)) except IntegrityError as err: print(line) print(writer) print(err) break except DataError as err: print(line) print(origin) print(err) break except Genre.DoesNotExist as err: print(line) print('Genres: [%s] [%s]' % (genre1, genre2)) print(err) break
def _build_staff(self, work: Work, creators: List[Dict[str, Any]], reload_role_cache: bool = False) -> List[Staff]: if reload_role_cache: del self.role_map processed_anidb_aids = [] artists_to_add = [] artists_list = [] for nc in creators: artist = Artist.objects.filter( Q(name=nc["name"]) | Q(anidb_creator_id=nc["anidb_creator_id"])).first() if nc["anidb_creator_id"] in processed_anidb_aids: # Skip if this artist has more than one role continue if not artist: # This artist does not yet exist : will be bulk created artist = Artist(name=nc["name"], anidb_creator_id=nc["anidb_creator_id"]) artists_to_add.append(artist) else: # This artist exists : prevent duplicates by updating with the AniDB id artist.name = nc["name"] artist.anidb_creator_id = nc["anidb_creator_id"] artist.save() artists_list.append(artist) processed_anidb_aids.append(nc["anidb_creator_id"]) artists_list.extend(Artist.objects.bulk_create(artists_to_add)) artists = {artist.name: artist for artist in artists_list} existing_staff = set( Staff.objects.filter( work=work, role__in=(nc["role"] for nc in creators), artist__name__in=(nc["name"] for nc in creators)).values_list( 'work', 'role', 'artist')) missing_staff = [ Staff(work=work, role=nc["role"], artist=artists[nc["name"]]) for nc in creators if (work.pk, nc["role"].pk, artists[nc["name"]].pk) not in existing_staff ] Staff.objects.bulk_create(missing_staff) return missing_staff
def _build_staff(self, work: Work, creators: List[Dict[str, Any]], reload_role_cache: bool = False) -> List[Staff]: if reload_role_cache: del self.role_map artists_to_add = [] artists = [] for nc in creators: artist = Artist.objects.filter( Q(name=nc["name"]) | Q(anidb_creator_id=nc["anidb_creator_id"])).first() if not artist: # This artist does not yet exist : will be bulk created artist = Artist(name=nc["name"], anidb_creator_id=nc["anidb_creator_id"]) artists_to_add.append(artist) else: # This artist exists : prevent duplicates by updating with the AniDB id artist.name = nc["name"] artist.anidb_creator_id = nc["anidb_creator_id"] artist.save() artists.append(artist) artists.extend(Artist.objects.bulk_create(artists_to_add)) staffs = [] for index, nc in enumerate(creators): staffs.append( Staff(work=work, role=nc["role"], artist=artists[index])) existing_staff = set( Staff.objects.filter(work=work, role__in=[nc["role"] for nc in creators], artist__in=[artist for artist in artists ]).values_list( 'work', 'role', 'artist')) missing_staff = [ staff for staff in staffs if (staff.work, staff.role, staff.artist) not in existing_staff ] Staff.objects.bulk_create(missing_staff) return missing_staff
def run(): with open('../data/manga-news/anime.csv') as f: next(f) artists = {} anime_data = [] for i, line in enumerate(f): title, vo_title, studio, author, editor, anime_type, genre1, genre2, nb_episodes, origin, synopsis, poster = line.split( ';;') anime_data.append(AnimeData(*line.split(';;'))) with open('../data/PAIR.log') as f: for line in f: mangaki_id = int(line.split('::')[0]) mn_id = int(line.split('::')[1]) anime = Anime.objects.get(id=mangaki_id) data = anime_data[mn_id - 1] anime.studio = create_if_not_exists(Studio, data.studio) anime.editor = create_if_not_exists(Editor, data.editor) m = re.match('^([A-ZÔÛÏ\'-]+) (.*)$', data.author) if m: last_name, first_name = m.groups() last_name = last_name.lower().capitalize() if not m: first_name = '' last_name = data.author if Artist.objects.filter(first_name=first_name, last_name=last_name).count() == 0: print('NEW') a = Artist(first_name=first_name, last_name=last_name) a.save() else: a = Artist.objects.get(first_name=first_name, last_name=last_name) print('Exists', a) anime.author = a anime.anime_type = data.anime_type genre1 = create_if_not_exists(Genre, data.genre1) genre2 = create_if_not_exists(Genre, data.genre2) anime.genre.add(genre1) anime.genre.add(genre2) anime.nb_episodes = data.nb_episodes anime.origin = data.origin anime.synopsis = data.synopsis # The most important! anime.save()
def run(): with open("../data/manga-news/anime.csv") as f: next(f) artists = {} anime_data = [] for i, line in enumerate(f): title, vo_title, studio, author, editor, anime_type, genre1, genre2, nb_episodes, origin, synopsis, poster = line.split( ";;" ) anime_data.append(AnimeData(*line.split(";;"))) with open("../data/PAIR.log") as f: for line in f: mangaki_id = int(line.split("::")[0]) mn_id = int(line.split("::")[1]) anime = Anime.objects.get(id=mangaki_id) data = anime_data[mn_id - 1] anime.studio = create_if_not_exists(Studio, data.studio) anime.editor = create_if_not_exists(Editor, data.editor) m = re.match("^([A-ZÔÛÏ'-]+) (.*)$", data.author) if m: last_name, first_name = m.groups() last_name = last_name.lower().capitalize() if not m: first_name = "" last_name = data.author if Artist.objects.filter(first_name=first_name, last_name=last_name).count() == 0: print("NEW") a = Artist(first_name=first_name, last_name=last_name) a.save() else: a = Artist.objects.get(first_name=first_name, last_name=last_name) print("Exists", a) anime.author = a anime.anime_type = data.anime_type genre1 = create_if_not_exists(Genre, data.genre1) genre2 = create_if_not_exists(Genre, data.genre2) anime.genre.add(genre1) anime.genre.add(genre2) anime.nb_episodes = data.nb_episodes anime.origin = data.origin anime.synopsis = data.synopsis # The most important! anime.save()
def run(): with open('../data/manga-news/manga.csv') as f: next(f) artists = {} hipsters = Counter() for i, line in enumerate(f): # print(len(line.split(';;'))) title, vo_title, writer, mangaka, editor, origin, genre1, genre2, manga_type, synopsis, poster = line.split( ';;') for artist in [writer, mangaka]: if artist in artists: continue m = re.match('^([A-ZÔÛÏ\'-]+) (.*)$', writer) if m: last_name, first_name = m.groups() last_name = last_name.lower().capitalize() if not m: first_name = '' last_name = artist if Artist.objects.filter(first_name=first_name, last_name=last_name).count() == 0: a = Artist(first_name=first_name, last_name=last_name) a.save() else: a = Artist.objects.get(first_name=first_name, last_name=last_name) artists[artist] = a with open('../data/manga-news/manga.csv') as f: next(f) for i, line in enumerate(f): title, vo_title, writer, mangaka, editor, origin, genre1, genre2, manga_type, synopsis, poster = line.split( ';;') try: if Manga.objects.filter(title=title, vo_title=vo_title).count() == 0: manga = Manga(title=title, vo_title=vo_title, mangaka=artists[mangaka], writer=artists[writer], editor=editor, origin=origin.lower().replace( 'hong kong', 'hong-kong').replace( 'international', 'intl'), manga_type=manga_type.lower(), source='', poster=poster, synopsis=synopsis) manga.save() else: manga = Manga.objects.get(title=title, vo_title=vo_title) if genre1: manga.genre.add(Genre.objects.get(title=genre1)) if genre2: manga.genre.add(Genre.objects.get(title=genre2)) except IntegrityError as err: print(line) print(writer) print(err) break except DataError as err: print(line) print(origin) print(err) break except Genre.DoesNotExist as err: print(line) print('Genres: [%s] [%s]' % (genre1, genre2)) print(err) break