Ejemplo n.º 1
0
def get_or_create_artist(name):
    if ' ' in name:
        parts = name.split()
        if len(parts) == 2:
            last, first = parts
        else:
            last, first = parts[-1], ' '.join(parts[:-1])  # Diana Wynne Jones
    else:
        last, first = name, ''
    try:
        contestants = []
        if Artist.objects.filter(first_name=first, last_name=last).count():
            contestants = Artist.objects.filter(first_name=first,
                                                last_name=last)
            return Artist.objects.get(first_name=first, last_name=last)
        elif first != '' and Artist.objects.filter(first_name=first).count():
            contestants = Artist.objects.filter(first_name=first)
        elif Artist.objects.filter(last_name=last).count():
            contestants = Artist.objects.filter(last_name=last)
    except:
        pass
    if contestants:
        choice = pick_among(contestants)
        if choice:
            return choice
    artist = Artist(first_name=first, last_name=last)
    artist.save()
    return artist
Ejemplo n.º 2
0
def get_or_create_artist(name):
    if ' ' in name:
        parts = name.split()
        if len(parts) == 2:
            last, first = parts
        else:
            last, first = parts[-1], ' '.join(parts[:-1])  # Diana Wynne Jones
    else:
        last, first = name, ''
    try:
        contestants = []
        if Artist.objects.filter(first_name=first, last_name=last).count():
            contestants = Artist.objects.filter(first_name=first, last_name=last)
            return Artist.objects.get(first_name=first, last_name=last)
        elif first != '' and Artist.objects.filter(first_name=first).count():
            contestants = Artist.objects.filter(first_name=first)
        elif Artist.objects.filter(last_name=last).count():
            contestants = Artist.objects.filter(last_name=last)
    except:
        pass
    if contestants:
        choice = pick_among(contestants)
        if choice:
            return choice
    artist = Artist(first_name=first, last_name=last)
    artist.save()
    return artist
Ejemplo n.º 3
0
def run():
    with open('../data/manga-news/manga.csv') as f:
        next(f)
        artists = {}
        hipsters = Counter()
        for i, line in enumerate(f):
            # print(len(line.split(';;')))
            title, vo_title, writer, mangaka, editor, origin, genre1, genre2, manga_type, synopsis, poster = line.split(';;')
            for artist in [writer, mangaka]:
                if artist in artists:
                    continue
                m = re.match('^([A-ZÔÛÏ\'-]+) (.*)$', writer)
                if m:
                    last_name, first_name = m.groups()
                    last_name = last_name.lower().capitalize()
                if not m:
                    first_name = ''
                    last_name = artist
                if Artist.objects.filter(first_name=first_name, last_name=last_name).count() == 0:
                    a = Artist(first_name=first_name, last_name=last_name)
                    a.save()
                else:
                    a = Artist.objects.get(first_name=first_name, last_name=last_name)
                artists[artist] = a
    with open('../data/manga-news/manga.csv') as f:
        next(f)
        for i, line in enumerate(f):
            title, vo_title, writer, mangaka, editor, origin, genre1, genre2, manga_type, synopsis, poster = line.split(';;')
            try:
                if Manga.objects.filter(title=title, vo_title=vo_title).count() == 0:
                    manga = Manga(title=title, vo_title=vo_title, mangaka=artists[mangaka], writer=artists[writer], editor=editor, origin=origin.lower().replace('hong kong', 'hong-kong').replace('international', 'intl'), manga_type=manga_type.lower(), source='', poster=poster, synopsis=synopsis)
                    manga.save()
                else:
                    manga = Manga.objects.get(title=title, vo_title=vo_title)
                if genre1:
                    manga.genre.add(Genre.objects.get(title=genre1))
                if genre2:
                    manga.genre.add(Genre.objects.get(title=genre2))
            except IntegrityError as err:
                print(line)
                print(writer)
                print(err)
                break
            except DataError as err:
                print(line)
                print(origin)
                print(err)
                break
            except Genre.DoesNotExist as err:
                print(line)
                print('Genres: [%s] [%s]' % (genre1, genre2))
                print(err)
                break
Ejemplo n.º 4
0
    def _build_staff(self,
                     work: Work,
                     creators: List[Dict[str, Any]],
                     reload_role_cache: bool = False) -> List[Staff]:
        if reload_role_cache:
            del self.role_map

        processed_anidb_aids = []
        artists_to_add = []
        artists_list = []
        for nc in creators:
            artist = Artist.objects.filter(
                Q(name=nc["name"])
                | Q(anidb_creator_id=nc["anidb_creator_id"])).first()

            if nc["anidb_creator_id"] in processed_anidb_aids:  # Skip if this artist has more than one role
                continue

            if not artist:  # This artist does not yet exist : will be bulk created
                artist = Artist(name=nc["name"],
                                anidb_creator_id=nc["anidb_creator_id"])
                artists_to_add.append(artist)
            else:  # This artist exists : prevent duplicates by updating with the AniDB id
                artist.name = nc["name"]
                artist.anidb_creator_id = nc["anidb_creator_id"]
                artist.save()
                artists_list.append(artist)
            processed_anidb_aids.append(nc["anidb_creator_id"])

        artists_list.extend(Artist.objects.bulk_create(artists_to_add))
        artists = {artist.name: artist for artist in artists_list}

        existing_staff = set(
            Staff.objects.filter(
                work=work,
                role__in=(nc["role"] for nc in creators),
                artist__name__in=(nc["name"] for nc in creators)).values_list(
                    'work', 'role', 'artist'))
        missing_staff = [
            Staff(work=work, role=nc["role"], artist=artists[nc["name"]])
            for nc in creators
            if (work.pk, nc["role"].pk,
                artists[nc["name"]].pk) not in existing_staff
        ]

        Staff.objects.bulk_create(missing_staff)
        return missing_staff
Ejemplo n.º 5
0
    def _build_staff(self,
                     work: Work,
                     creators: List[Dict[str, Any]],
                     reload_role_cache: bool = False) -> List[Staff]:
        if reload_role_cache:
            del self.role_map

        artists_to_add = []
        artists = []
        for nc in creators:
            artist = Artist.objects.filter(
                Q(name=nc["name"])
                | Q(anidb_creator_id=nc["anidb_creator_id"])).first()

            if not artist:  # This artist does not yet exist : will be bulk created
                artist = Artist(name=nc["name"],
                                anidb_creator_id=nc["anidb_creator_id"])
                artists_to_add.append(artist)
            else:  # This artist exists : prevent duplicates by updating with the AniDB id
                artist.name = nc["name"]
                artist.anidb_creator_id = nc["anidb_creator_id"]
                artist.save()
                artists.append(artist)

        artists.extend(Artist.objects.bulk_create(artists_to_add))

        staffs = []
        for index, nc in enumerate(creators):
            staffs.append(
                Staff(work=work, role=nc["role"], artist=artists[index]))

        existing_staff = set(
            Staff.objects.filter(work=work,
                                 role__in=[nc["role"] for nc in creators],
                                 artist__in=[artist for artist in artists
                                             ]).values_list(
                                                 'work', 'role', 'artist'))

        missing_staff = [
            staff for staff in staffs
            if (staff.work, staff.role, staff.artist) not in existing_staff
        ]

        Staff.objects.bulk_create(missing_staff)
        return missing_staff
Ejemplo n.º 6
0
def run():
    with open('../data/manga-news/anime.csv') as f:
        next(f)
        artists = {}
        anime_data = []
        for i, line in enumerate(f):
            title, vo_title, studio, author, editor, anime_type, genre1, genre2, nb_episodes, origin, synopsis, poster = line.split(
                ';;')
            anime_data.append(AnimeData(*line.split(';;')))
    with open('../data/PAIR.log') as f:
        for line in f:
            mangaki_id = int(line.split('::')[0])
            mn_id = int(line.split('::')[1])
            anime = Anime.objects.get(id=mangaki_id)
            data = anime_data[mn_id - 1]
            anime.studio = create_if_not_exists(Studio, data.studio)
            anime.editor = create_if_not_exists(Editor, data.editor)
            m = re.match('^([A-ZÔÛÏ\'-]+) (.*)$', data.author)
            if m:
                last_name, first_name = m.groups()
                last_name = last_name.lower().capitalize()
            if not m:
                first_name = ''
                last_name = data.author
            if Artist.objects.filter(first_name=first_name,
                                     last_name=last_name).count() == 0:
                print('NEW')
                a = Artist(first_name=first_name, last_name=last_name)
                a.save()
            else:
                a = Artist.objects.get(first_name=first_name,
                                       last_name=last_name)
                print('Exists', a)
            anime.author = a
            anime.anime_type = data.anime_type
            genre1 = create_if_not_exists(Genre, data.genre1)
            genre2 = create_if_not_exists(Genre, data.genre2)
            anime.genre.add(genre1)
            anime.genre.add(genre2)
            anime.nb_episodes = data.nb_episodes
            anime.origin = data.origin
            anime.synopsis = data.synopsis  # The most important!
            anime.save()
Ejemplo n.º 7
0
def run():
    with open("../data/manga-news/anime.csv") as f:
        next(f)
        artists = {}
        anime_data = []
        for i, line in enumerate(f):
            title, vo_title, studio, author, editor, anime_type, genre1, genre2, nb_episodes, origin, synopsis, poster = line.split(
                ";;"
            )
            anime_data.append(AnimeData(*line.split(";;")))
    with open("../data/PAIR.log") as f:
        for line in f:
            mangaki_id = int(line.split("::")[0])
            mn_id = int(line.split("::")[1])
            anime = Anime.objects.get(id=mangaki_id)
            data = anime_data[mn_id - 1]
            anime.studio = create_if_not_exists(Studio, data.studio)
            anime.editor = create_if_not_exists(Editor, data.editor)
            m = re.match("^([A-ZÔÛÏ'-]+) (.*)$", data.author)
            if m:
                last_name, first_name = m.groups()
                last_name = last_name.lower().capitalize()
            if not m:
                first_name = ""
                last_name = data.author
            if Artist.objects.filter(first_name=first_name, last_name=last_name).count() == 0:
                print("NEW")
                a = Artist(first_name=first_name, last_name=last_name)
                a.save()
            else:
                a = Artist.objects.get(first_name=first_name, last_name=last_name)
                print("Exists", a)
            anime.author = a
            anime.anime_type = data.anime_type
            genre1 = create_if_not_exists(Genre, data.genre1)
            genre2 = create_if_not_exists(Genre, data.genre2)
            anime.genre.add(genre1)
            anime.genre.add(genre2)
            anime.nb_episodes = data.nb_episodes
            anime.origin = data.origin
            anime.synopsis = data.synopsis  # The most important!
            anime.save()
Ejemplo n.º 8
0
def run():
    with open('../data/manga-news/manga.csv') as f:
        next(f)
        artists = {}
        hipsters = Counter()
        for i, line in enumerate(f):
            # print(len(line.split(';;')))
            title, vo_title, writer, mangaka, editor, origin, genre1, genre2, manga_type, synopsis, poster = line.split(
                ';;')
            for artist in [writer, mangaka]:
                if artist in artists:
                    continue
                m = re.match('^([A-ZÔÛÏ\'-]+) (.*)$', writer)
                if m:
                    last_name, first_name = m.groups()
                    last_name = last_name.lower().capitalize()
                if not m:
                    first_name = ''
                    last_name = artist
                if Artist.objects.filter(first_name=first_name,
                                         last_name=last_name).count() == 0:
                    a = Artist(first_name=first_name, last_name=last_name)
                    a.save()
                else:
                    a = Artist.objects.get(first_name=first_name,
                                           last_name=last_name)
                artists[artist] = a
    with open('../data/manga-news/manga.csv') as f:
        next(f)
        for i, line in enumerate(f):
            title, vo_title, writer, mangaka, editor, origin, genre1, genre2, manga_type, synopsis, poster = line.split(
                ';;')
            try:
                if Manga.objects.filter(title=title,
                                        vo_title=vo_title).count() == 0:
                    manga = Manga(title=title,
                                  vo_title=vo_title,
                                  mangaka=artists[mangaka],
                                  writer=artists[writer],
                                  editor=editor,
                                  origin=origin.lower().replace(
                                      'hong kong', 'hong-kong').replace(
                                          'international', 'intl'),
                                  manga_type=manga_type.lower(),
                                  source='',
                                  poster=poster,
                                  synopsis=synopsis)
                    manga.save()
                else:
                    manga = Manga.objects.get(title=title, vo_title=vo_title)
                if genre1:
                    manga.genre.add(Genre.objects.get(title=genre1))
                if genre2:
                    manga.genre.add(Genre.objects.get(title=genre2))
            except IntegrityError as err:
                print(line)
                print(writer)
                print(err)
                break
            except DataError as err:
                print(line)
                print(origin)
                print(err)
                break
            except Genre.DoesNotExist as err:
                print(line)
                print('Genres: [%s] [%s]' % (genre1, genre2))
                print(err)
                break