Exemple #1
0
def createTeamsDimension(path='TPD-CSV/LigaRecord/'):
    rawTable = ext.csvReader(path + 'teams.csv')
    teamsTable = ext.tableCleanup(rawTable)

    # a informação Team in League vai sair da tabela user_details_logins
    rawTable = ext.csvReader(path + 'user_details_logins.csv')
    detailsTable = ext.tableCleanup(rawTable)

    # getting users from user_details_logins.csv
    user = dict()
    for line in detailsTable:
        user[line['id_user']] = line['in_league']

    # creating array of dicts with user as key, in_league value in first position and team_id in proceeding posetions
    teams = list()
    
    candidateKey = 1
    # going through every entry from the relational database.
    for line in teamsTable:
        teamsDict = dict()
        teamsDict['Team Key (PK)'] = candidateKey
        candidateKey += 1
        ##
        teamsDict['Team Natural ID'] = line['id']
        teamsDict['Team Name'] = line['name']
        teamsDict['Team Create Date'] = line['createdate']
        teamsDict['Team Origin'] = line['origin']
        teamsDict['Team Is Paid'] = 'Team is paid' if line['is_paid']==1 else 'Team is free'
        teamsDict['Team In League'] = 'Team in league' if user[line['id_user']]==1 else 'Team not in league'
        
        #appending to output
        teams.append(teamsDict)
    
    return teams
Exemple #2
0
def createVisitsFacts(path='TPD-CSV/'):
    rawTable = ext.csvReader(path + 'LigaRecord/user_details_logins.csv')
    logins = ext.tableCleanup(rawTable)

    # retrieving rounds dictionary
    rawTable = ext.csvReader(path + 'LigaRecord/rounds.csv')
    rounds = ext.tableCleanup(rawTable)

    # compativel com tabela user_details_logins.csv
    weekdayToNumber = dict()
    weekdayToNumber['logins_monday'] = 1
    weekdayToNumber['logins_tuesday'] = 2
    weekdayToNumber['logins_wednesday'] = 3
    weekdayToNumber['logins_thursday'] = 4
    weekdayToNumber['logins_friday'] = 5
    weekdayToNumber['logins_saturday'] = 6
    weekdayToNumber['logins_sunday'] = 7

    # loading current season dateDimension
    dateDict = date.createDateDimension()
    # loading seasonDimension
    seasonDict = season.createSeasonDimension()

    visitsFactsList = list()
    for line in logins:
        # removing season foreign key
        seasonLink = foreignCheck(seasonDict, line['season'], 'Season Name',
                                  'Season Key (PK)')
        # retrieving user
        user = line['id_user']

        weekday = weekdayToNumber.keys()

        for collumn in weekday:
            # retrieving logins per day
            logins = line[collumn]
            round = line['round_order']

            #dict for temporary storage
            loginsDict = dict()

            # linking with user
            loginsDict['User Key (FK)'] = user
            # linking with season
            loginsDict['Season Key (FK)'] = seasonLink
            # dateLink
            dateLink = foreignCheck(dateDict,
                                    [round, weekdayToNumber[weekday]],
                                    ['Round Number', 'Weekday'],
                                    'Date Key (PK)')
            loginsDict['Date Key (FK)'] = dateLink

            ## falta o durable user key
            loginsDict['Visit Count'] = logins

            visitsFactsList.append(loginsDict)

    return visitsFactsList
Exemple #3
0
def createDateDimension(path='TPD-CSV/'):
    rawTable = ext.csvReader(path + 'LigaRecord/rounds.csv')
    roundsTable = ext.tableCleanup(rawTable)

    rawTable = ext.csvReader(path + 'classicos.csv')
    classicosTable = ext.tableCleanup(rawTable)

    # creating array of dicts with user as key, in_league value in first position and team_id in proceeding posetions
    dates = list()
    # assigning the selected year gregorian day of the first day in the year
    gregorianYear = roundsTable[0]['start_date'].toordinal()

    candidateKey = 1
    # going through every entry from the relational database.
    for n in range(365):
        dateDict = dict()
        # properly setting up the day and returning the full date
        date = datetime.fromordinal(gregorianYear + n)
        dateDict['Date Key (PK)'] = candidateKey
        candidateKey += 1
        ##
        dateDict['Day'] = date.day
        dateDict['Day Of Month'] = date.day
        dateDict['Weekday'] = date.weekday() + 1
        dateDict['Calendar Weekday'] = getCalendarWeekday(date.weekday())
        dateDict['Month'] = date.month
        dateDict['Calendar Month'] = getCalendarMonth(date.month)
        dateDict['Year'] = date.year
        dateDict['Date Full'] = date.date().strftime('%Y-%m-%d')
        dateDict['Weekend Indicator'] = 'Weekday' if date.weekday(
        ) <= 4 else 'Weekend'
        dateDict['Season Stage Indicator'] = getStageIndicator(
            date, roundsTable)
        # Não temos informação sobre isto
        # dateDict['Turn'] = turn(date, roundsTable)
        # dateDict['Turn Indicator'] = turnIndicator(date, roundsTable)
        dateDict['Round Number'] = roundNumber(date, roundsTable)
        dateDict['Round Lifecycle Indicator'] = roundLifecycleIndicator(
            date, roundsTable)
        dateDict['Lifecycle Round Number'] = roundNumber(date, roundsTable)
        dateDict['Round Includes Classic Match'] = classicMatch(
            date, classicosTable)
        dateDict[
            'Is Winter'] = 'Winter transfer season' if date.month == 2 else 'Non winter transfer season'

        #appending to output
        dates.append(dateDict)

    return dates
Exemple #4
0
def createSeasonDimension(path='TPD-CSV/LigaRecord/'):
    # season is updated yearly, the "rounds.csv" file should contain the necessary information
    # to construct this dimension.

    # season is updated every year day 07-01, and expected to end the next year's june.
    seasonStartDay = '07-01'
    seasonEndDay = '06-30'
    # reading the stored .csv file to an array of dicts
    rawTable = ext.csvReader(path + 'rounds.csv')
    formatTable = ext.tableCleanup(rawTable)

    candidateKey = 1
    season = dict()
    season['Season Key (PK)'] = candidateKey
    candidateKey += 1
    ##
    season['Season Name'] = formatTable[1]['season']
    # retrieving year from rounds.csv
    year = (formatTable[1]['start_date']).year
    season['Season Start Date'] = str('%i-%s' % (year, seasonStartDay))
    season['Season End Date'] = str('%i-%s' % (year + 1, seasonEndDay))
    seasonUpdatedM = 'Updated game version'
    seasonDeprecatedM = "Deprecated game version"
    season[
        'Season Has Updated Game Version'] = seasonUpdatedM if year >= 2015 else seasonDeprecatedM
    variableWeekdayM = "Variable weekday publish date"
    fixedWeekdayM = "Fixed weekday publish date"
    season[
        'Season Has Variable Weekday Publish Date'] = variableWeekdayM if year >= 2015 else fixedWeekdayM
    season[
        'Team Player Transfers Allowed Per Month'] = 1 if year >= 2015 else 2

    return season
Exemple #5
0
def createUserDimension(path='TPD-CSV/LigaRecord/'):

    rawTable = ext.csvReader(path + 'user.csv')
    userTable = ext.tableCleanup(rawTable)
    
    rawTable = ext.csvReader(path + 'user_details_logins.csv')
    detailsTable = ext.tableCleanup(rawTable)
    
    users = list()
    
    for line in userTable:
    
        user = dict()
        user['User Natural ID'] = line['id']
        #user['User Email'] = formatTable[1]['?']
        user['User Nickname'] = line['nickname']
        user['User Birthdate'] = str(line['birthdate'])
        user['User Gender'] = line['gender']
        user['User Club'] = line['club']
        user['User Region'] = line['region']
    
        addressInfo = af.addressFinder(line['address'])
        user['User Zipcode Locality'] = addressInfo["cod_postal"]+"-"+addressInfo["ext_postal"]
        user['User Zipcode Locality Designation'] = addressInfo["desig_postal"]
        user['User Locality'] = addressInfo["nome_localidade"]
        user['User County'] = addressInfo["nome_concelho"]
        user['User District'] = addressInfo["nome_distrito"]
        #user['User Country'] = formatTable[1]['?']
    
        user['User Original Start Date'] = str(line['startdate'])
        #user['User Season Start Date'] = formatTable[1]['?']
        #user['User Premium Date'] = formatTable[1]['?']
        user['User Agegroup'] = line['agegroup']
        #user['User Is In League'] = formatTable[1]['?']
        #user['Effective Date Row'] = formatTable[1]['?']
        #user['Expiration Date Row'] = formatTable[1]['?']
        #user['Timestamp Row'] = formatTable[1]['?']
        #user['Is Current Row'] = formatTable[1]['?']
        
        users.append(user)
    
    return users