def createTeamsDimension(path='TPD-CSV/LigaRecord/'): rawTable = ext.csvReader(path + 'teams.csv') teamsTable = ext.tableCleanup(rawTable) # a informação Team in League vai sair da tabela user_details_logins rawTable = ext.csvReader(path + 'user_details_logins.csv') detailsTable = ext.tableCleanup(rawTable) # getting users from user_details_logins.csv user = dict() for line in detailsTable: user[line['id_user']] = line['in_league'] # creating array of dicts with user as key, in_league value in first position and team_id in proceeding posetions teams = list() candidateKey = 1 # going through every entry from the relational database. for line in teamsTable: teamsDict = dict() teamsDict['Team Key (PK)'] = candidateKey candidateKey += 1 ## teamsDict['Team Natural ID'] = line['id'] teamsDict['Team Name'] = line['name'] teamsDict['Team Create Date'] = line['createdate'] teamsDict['Team Origin'] = line['origin'] teamsDict['Team Is Paid'] = 'Team is paid' if line['is_paid']==1 else 'Team is free' teamsDict['Team In League'] = 'Team in league' if user[line['id_user']]==1 else 'Team not in league' #appending to output teams.append(teamsDict) return teams
def createVisitsFacts(path='TPD-CSV/'): rawTable = ext.csvReader(path + 'LigaRecord/user_details_logins.csv') logins = ext.tableCleanup(rawTable) # retrieving rounds dictionary rawTable = ext.csvReader(path + 'LigaRecord/rounds.csv') rounds = ext.tableCleanup(rawTable) # compativel com tabela user_details_logins.csv weekdayToNumber = dict() weekdayToNumber['logins_monday'] = 1 weekdayToNumber['logins_tuesday'] = 2 weekdayToNumber['logins_wednesday'] = 3 weekdayToNumber['logins_thursday'] = 4 weekdayToNumber['logins_friday'] = 5 weekdayToNumber['logins_saturday'] = 6 weekdayToNumber['logins_sunday'] = 7 # loading current season dateDimension dateDict = date.createDateDimension() # loading seasonDimension seasonDict = season.createSeasonDimension() visitsFactsList = list() for line in logins: # removing season foreign key seasonLink = foreignCheck(seasonDict, line['season'], 'Season Name', 'Season Key (PK)') # retrieving user user = line['id_user'] weekday = weekdayToNumber.keys() for collumn in weekday: # retrieving logins per day logins = line[collumn] round = line['round_order'] #dict for temporary storage loginsDict = dict() # linking with user loginsDict['User Key (FK)'] = user # linking with season loginsDict['Season Key (FK)'] = seasonLink # dateLink dateLink = foreignCheck(dateDict, [round, weekdayToNumber[weekday]], ['Round Number', 'Weekday'], 'Date Key (PK)') loginsDict['Date Key (FK)'] = dateLink ## falta o durable user key loginsDict['Visit Count'] = logins visitsFactsList.append(loginsDict) return visitsFactsList
def createDateDimension(path='TPD-CSV/'): rawTable = ext.csvReader(path + 'LigaRecord/rounds.csv') roundsTable = ext.tableCleanup(rawTable) rawTable = ext.csvReader(path + 'classicos.csv') classicosTable = ext.tableCleanup(rawTable) # creating array of dicts with user as key, in_league value in first position and team_id in proceeding posetions dates = list() # assigning the selected year gregorian day of the first day in the year gregorianYear = roundsTable[0]['start_date'].toordinal() candidateKey = 1 # going through every entry from the relational database. for n in range(365): dateDict = dict() # properly setting up the day and returning the full date date = datetime.fromordinal(gregorianYear + n) dateDict['Date Key (PK)'] = candidateKey candidateKey += 1 ## dateDict['Day'] = date.day dateDict['Day Of Month'] = date.day dateDict['Weekday'] = date.weekday() + 1 dateDict['Calendar Weekday'] = getCalendarWeekday(date.weekday()) dateDict['Month'] = date.month dateDict['Calendar Month'] = getCalendarMonth(date.month) dateDict['Year'] = date.year dateDict['Date Full'] = date.date().strftime('%Y-%m-%d') dateDict['Weekend Indicator'] = 'Weekday' if date.weekday( ) <= 4 else 'Weekend' dateDict['Season Stage Indicator'] = getStageIndicator( date, roundsTable) # Não temos informação sobre isto # dateDict['Turn'] = turn(date, roundsTable) # dateDict['Turn Indicator'] = turnIndicator(date, roundsTable) dateDict['Round Number'] = roundNumber(date, roundsTable) dateDict['Round Lifecycle Indicator'] = roundLifecycleIndicator( date, roundsTable) dateDict['Lifecycle Round Number'] = roundNumber(date, roundsTable) dateDict['Round Includes Classic Match'] = classicMatch( date, classicosTable) dateDict[ 'Is Winter'] = 'Winter transfer season' if date.month == 2 else 'Non winter transfer season' #appending to output dates.append(dateDict) return dates
def createSeasonDimension(path='TPD-CSV/LigaRecord/'): # season is updated yearly, the "rounds.csv" file should contain the necessary information # to construct this dimension. # season is updated every year day 07-01, and expected to end the next year's june. seasonStartDay = '07-01' seasonEndDay = '06-30' # reading the stored .csv file to an array of dicts rawTable = ext.csvReader(path + 'rounds.csv') formatTable = ext.tableCleanup(rawTable) candidateKey = 1 season = dict() season['Season Key (PK)'] = candidateKey candidateKey += 1 ## season['Season Name'] = formatTable[1]['season'] # retrieving year from rounds.csv year = (formatTable[1]['start_date']).year season['Season Start Date'] = str('%i-%s' % (year, seasonStartDay)) season['Season End Date'] = str('%i-%s' % (year + 1, seasonEndDay)) seasonUpdatedM = 'Updated game version' seasonDeprecatedM = "Deprecated game version" season[ 'Season Has Updated Game Version'] = seasonUpdatedM if year >= 2015 else seasonDeprecatedM variableWeekdayM = "Variable weekday publish date" fixedWeekdayM = "Fixed weekday publish date" season[ 'Season Has Variable Weekday Publish Date'] = variableWeekdayM if year >= 2015 else fixedWeekdayM season[ 'Team Player Transfers Allowed Per Month'] = 1 if year >= 2015 else 2 return season
def createUserDimension(path='TPD-CSV/LigaRecord/'): rawTable = ext.csvReader(path + 'user.csv') userTable = ext.tableCleanup(rawTable) rawTable = ext.csvReader(path + 'user_details_logins.csv') detailsTable = ext.tableCleanup(rawTable) users = list() for line in userTable: user = dict() user['User Natural ID'] = line['id'] #user['User Email'] = formatTable[1]['?'] user['User Nickname'] = line['nickname'] user['User Birthdate'] = str(line['birthdate']) user['User Gender'] = line['gender'] user['User Club'] = line['club'] user['User Region'] = line['region'] addressInfo = af.addressFinder(line['address']) user['User Zipcode Locality'] = addressInfo["cod_postal"]+"-"+addressInfo["ext_postal"] user['User Zipcode Locality Designation'] = addressInfo["desig_postal"] user['User Locality'] = addressInfo["nome_localidade"] user['User County'] = addressInfo["nome_concelho"] user['User District'] = addressInfo["nome_distrito"] #user['User Country'] = formatTable[1]['?'] user['User Original Start Date'] = str(line['startdate']) #user['User Season Start Date'] = formatTable[1]['?'] #user['User Premium Date'] = formatTable[1]['?'] user['User Agegroup'] = line['agegroup'] #user['User Is In League'] = formatTable[1]['?'] #user['Effective Date Row'] = formatTable[1]['?'] #user['Expiration Date Row'] = formatTable[1]['?'] #user['Timestamp Row'] = formatTable[1]['?'] #user['Is Current Row'] = formatTable[1]['?'] users.append(user) return users