import utils
import csv
import re

with open('initial/writers.csv', 'r', encoding='utf8') as data:
    reader = csv.reader(data)
    with open('cleaned/writes_cleaned.csv', 'w', encoding='utf8') as out:
        writer = csv.writer(out)
        staff_map = utils.get_staff_map()
        clips = utils.get_clip_set()
        next(reader)

        added = set()

        for row in reader:
            name = utils.lettres(row[0]).lstrip()
            clipIds = re.split("\|", row[1][1:-1])
            workTypes = re.split("\|", row[2][1:-1])
            roles = re.split("\|", row[3][1:-1])
            addInfos = re.split("\|", row[4][1:-1])
            size = len(clipIds)
            if name in staff_map and len(workTypes) == size and len(
                    roles) == size and len(addInfos) == size:
                for i in range(size):
                    if clipIds[i] in clips:
                        staffid = staff_map[name]
                        clipId = clipIds[i]
                        pair = (staffid, clipId)
                        if pair not in added:
                            workType = workTypes[i]
                            role = roles[i]
import utils

with open('initial/release_dates.csv', encoding="utf8") as csvfile:
    reader = csv.reader(csvfile)

    with open('cleaned/releasedates_cleaned.csv', 'w', encoding="utf8") as out:
        wr = csv.writer(out)
        country_map = utils.get_country_map()
        clips = utils.get_clip_set()
        next(reader)

        added = set()

        for row in reader:
            if row[0] in clips:
                clipid = row[0]
                no_accents = utils.acc(row[1])
                only_letters = utils.lettres(no_accents).lstrip()
                if only_letters == 'Democratic Republic of Congo':
                    only_letters = 'Democratic Republic of the Congo'

                # Only keep the numbers and the letters in the "ReleaseDate" column
                only_numbers_letters = utils.alet(row[2])

                if only_letters in country_map:
                    countryId = country_map[only_letters]
                    new_row = (clipid, countryId)
                    if new_row not in added:
                        wr.writerow((clipid, countryId, only_numbers_letters))
                        added.add(new_row)
Exemple #3
0
genre_map = {}

clips = utils.get_clip_set()

with open('initial/genres.csv', encoding="utf8") as csvfile:
    reader = csv.reader(csvfile)
    next(reader)
    with open('cleaned/genres_cleaned.csv', 'w', encoding="utf8") as out:
        wr = csv.writer(out)
        added = set()
        for row in reader:
            if row[0] in clips:
                clipid = row[0]
                genre = row[1]
                l = utils.acc(genre)
                b = utils.lettres(l)
                if utils.diff_letters(l, b) < 2 and len(b) != 0 and b.lower(
                ) != 'null' and b.lower() != 'none':
                    if b not in genres:
                        new_row = (genreId, b)
                        if new_row not in added:
                            genres.add(b)
                            genre_map[b] = genreId
                            genreId += 1
                            wr.writerow(new_row)
                            added.add(new_row)
                    else:
                        new_row = (genre_map[b], b)
                        if new_row not in added:
                            wr.writerow(new_row)
                            added.add(new_row)
import utils
import csv
import re

with open('initial/actors.csv', 'r', encoding='utf8') as data:
    reader = csv.reader(data)
    with open('cleaned/acts_cleaned.csv', 'w', encoding='utf8') as out:
        writer = csv.writer(out)
        staff_map = utils.get_staff_map()
        clips = utils.get_clip_set()
        next(reader)

        added = set()

        for row in reader:
            name = utils.lettres(row[0]).lstrip()
            clipIds = re.split("\|", row[1][1:-1])
            chars = re.split("\|", row[2][1:-1])
            orders = re.split("\|", row[3][1:-1])
            addInfos = re.split("\|", row[4][1:-1])
            size = len(clipIds)
            if name in staff_map and len(chars) == size and len(
                    orders) == size and len(addInfos) == size:
                for i in range(size):
                    if clipIds[i] in clips:
                        staffid = staff_map[name]
                        clipId = clipIds[i]
                        pair = (staffid, clipId)
                        if pair not in added:
                            cha = utils.lettres(chars[i])
                            order = utils.numbers(orders[i])
import csv
import unicodedata
import re
import utils

with open('initial/clips.csv', encoding='utf8') as csvfile:
    reader = csv.reader(csvfile)
    next(reader)
    with open('cleaned/clips_set.csv', 'w', encoding='utf8') as fclips_set:
        clip_set = csv.writer(fclips_set)
        with open('cleaned/clips_cleaned.csv', 'w', encoding='utf8') as out:
            wr = csv.writer(out)
            added = set()
            for row in reader:
                clipid = row[0]
                # Only keep the numbers in the "Year" column
                only_numbers = utils.numbers(row[2])
                # Only keep the doubles in the "ClipType" column
                only_letters = utils.lettres(row[3])
                if len(clipid) != 0 and clipid.lower() != 'null' and (
                        clipid) not in added:
                    wr.writerow((clipid, row[1], only_numbers, only_letters))
                    added.add((clipid))
                    clip_set.writerow([clipid])
                    '''
                    for id in added:
                        clip_set.writerow(id)
                        '''