Beispiel #1
0
    def parseAndDownloadTeamYearlyStandings(self):
        files = findExt(self.getSeasonDir(), ext=".p", debug=False)
        for ifile in files:
            year = getBaseFilename(ifile)
            htmldata = getFile(ifile)
            bsdata = getHTML(htmldata)

            idVals = {}
            links = bsdata.findAll("a")
            for link in links:
                attrs = link.attrs
                if attrs.get("data-clubhouse-uid") is not None:
                    href = attrs['href']
                    name = getBasename(href)
                    idval = getBasename(getDirname(href))

                    if idVals.get(idval) is not None:
                        if idVals[idval] != name:
                            raise ValueError("Error in ID for this year!")
                    idVals[idval] = name

            for idVal, name in idVals.items():
                self.downloadTeamDataByYear(idVal,
                                            name,
                                            season=str(year),
                                            debug=True)
Beispiel #2
0
    def processWikiFilmYearlyData(self, procYear=None, debug=False):
        outdir = self.getDataDir()
        if procYear == None:
            files = findExt(outdir, ext=".p")
        else:
            files = findPatternExt(outdir, pattern=str(procYear), ext=".p")

        from collections import OrderedDict
        movies = OrderedDict()
        yearlyData = {}

        for ifile in sorted(files):
            if debug:
                print("Processing {0}".format(ifile))
            year = getBaseFilename(ifile)
            movies[year] = self.parseWikiFilmYearlyData(ifile, debug=False)

            yearlyData[year] = sorted(movies[year].items(),
                                      key=operator.itemgetter(1),
                                      reverse=False)
            print("---->", year,
                  " (Top 5/{0} Movies) <----".format(len(yearlyData[year])))
            for item in yearlyData[year][:5]:
                print(item)
            print('\n')

        savename = setFile(self.getResultsDir(), "{0}.json".format(self.name))
        print("Saving {0} WikiFilm data to {1}".format(len(yearlyData),
                                                       savename))
        saveFile(savename, yearlyData)
Beispiel #3
0
    def processAACTACategoryData(self, debug=False):
        outdir = self.getDataDir()
        files = findExt(outdir, ext="*.p")

        from collections import OrderedDict
        movies = OrderedDict()
        print(files)
        for ifile in files:

            if debug:
                print("Processing {0}".format(ifile))
            category = getBaseFilename(ifile)
            results = self.parseAACTACategoryData(ifile, category, debug=debug)

            if len(results) == 0:
                raise ValueError("No results for {0}".format(ifile))

            for year, yearData in results.items():
                for category, categoryData in yearData.items():
                    if movies.get(year) is None:
                        movies[year] = []
                    for movie in categoryData:
                        movies[year].append(movie)

        for year in movies.keys():
            movies[year] = list(set(movies[year]))
            yearlyMovies = movies[year]
            movies[year] = []
            for movie in yearlyMovies:
                movies[year].append([movie, 10])

        savename = setFile(self.getResultsDir(), "{0}.json".format(self.name))
        print("Saving {0} Years of AACTA Data to {1}".format(
            len(movies), savename))
        saveFile(savename, movies)
Beispiel #4
0
    def createRawOscarData(self, debug=True):
        print("Checking for poorly parsed oscar data.")
        indir = self.wikiData.getResultsDir()
        files = sorted(findExt(indir, ext=".json"))
        if debug:
            print("Found {0} oscar files".format(len(files)))
        yearlyData = {}
        for ifile in files:
            year = getBaseFilename(ifile)
            yearlyData[year] = getFile(ifile)

        savename = setFile(self.getCorrectionsDir(), "saved.yaml")
        if not isFile(savename):
            savedData = {}
        else:
            savedData = getFile(savename)

        for year in savedData.keys():
            for title in savedData[year].keys():
                savedWinner = savedData[year][title].get("Winner")
                savedNominees = savedData[year][title].get("Nominees")
                if savedWinner is not None:
                    print("Overwritting {0} {1} winner".format(year, title))
                    yearlyData[year][title]["Winner"] = savedWinner
                if savedNominees is not None:
                    print("Overwritting {0} {1} nominees".format(year, title))
                    yearlyData[year][title]["Nominees"] = savedNominees

        savename = setFile(self.getCorrectionsDir(), "raw.yaml")
        saveFile(idata=yearlyData, ifile=savename)
Beispiel #5
0
 def getMyBoxSetMusic(self, dirval):
     musicdata = myArtistAlbumData()
     for dval in self.directoryMapping["BoxSet"]:
         boxsetval = join(dirval, dval)
         for dname in glob(boxsetval):
             musicdata.albums += [getBaseFilename(x) for x in findAll(dname)]
             self.updateFileCount(musicdata, dname)
     return musicdata
Beispiel #6
0
 def getURL(self):
     if self.inputdata is not None:
         artistURL = "https://kworb.net/itunes/artist/{0}.html".format(
             getBaseFilename(self.inputdata))
         auc = artistDBURLClass(url=artistURL)
         return auc
     else:
         auc = artistDBURLClass(url=None, err="NoInput")
         return auc
Beispiel #7
0
    def parseUltimateMovieRankingsYearlyData(self, procYear=None, debug=False):
        outdir = self.getDataDir()
        if procYear == None:
            files = findExt(outdir, ext=".p")
        else:
            files = findPatternExt(outdir, pattern=str(procYear), ext=".p")

        from collections import OrderedDict
        movieData = OrderedDict()
        for ifile in sorted(files):
            #ifile = "/Users/tgadfort/Documents/code/movies/ultimatemovierankings/data/2017.p"
            htmldata = getFile(ifile)
            bsdata = getHTML(htmldata)
            year = getBaseFilename(ifile)

            data = {}
            done = False
            tables = bsdata.findAll("table")  #, {"id": "table_3"})
            movies = {}
            for it, table in enumerate(tables):
                ths = table.findAll("th")
                trs = table.findAll("tr")
                for itr, tr in enumerate(trs):
                    tds = tr.findAll("td")
                    if len(tds) == 11:
                        val = removeTag(tds[1], 'span')
                        film = val.text
                        film = film.replace(" ({0})".format(year), "")
                        try:
                            rank = float(tds[-1].text)
                        except:
                            try:
                                rank = float(tds[-2].text)
                            except:
                                raise ValueError(tds[-1], tds[-2], tr)

                        movies[film] = rank

            movieData[year] = movies

        yearlyData = {}
        for year in sorted(movieData.keys()):
            yearlyData[year] = sorted(movieData[year].items(),
                                      key=operator.itemgetter(1),
                                      reverse=True)
            print("---->", year,
                  " (Top 5/{0} Movies) <----".format(len(yearlyData[year])))
            for item in yearlyData[year][:5]:
                print(item)
            print('\n')

        savename = setFile(self.getResultsDir(), "{0}.json".format(self.name))
        print("Saving {0} Years of Ultimate Movie Rankings data to {1}".format(
            len(yearlyData), savename))
        saveFile(savename, yearlyData)
    def parse(self, modVal, expr, force=False, debug=False):
        ts = timestat("Parsing ModVal={0} Unofficial Files".format(modVal))

        tsFiles = timestat("Finding Files To Parse")
        newFiles = self.getArtistUnofficialFiles(modVal, expr, force)
        tsFiles.stop()

        N = len(newFiles)
        modValue = 50 if N >= 100 else 10
        if N > 0:
            tsDB = timestat("Loading ModVal={0} DB Data".format(modVal))
            dbdata = self.getDBData(modVal, force)
            tsDB.stop()

        newData = 0
        tsParse = timestat(
            "Parsing {0} New Unofficial Files For ModVal={1}".format(
                N, modVal))
        for i, ifile in enumerate(newFiles):
            if (i + 1) % modValue == 0 or (i + 1) == N:
                print("{0: <15}Parsing {1}".format("{0}/{1}".format(i + 1, N),
                                                   ifile))
            artistID = getBaseFilename(ifile)
            info = self.artist.getData(ifile)

            currentKeys = []
            if dbdata.get(artistID) is not None:
                currentKeys = list(dbdata[artistID].media.media.keys())
            else:
                dbdata[artistID] = info
                newData += 1
                continue

            keys = list(set(list(info.media.media.keys()) + currentKeys))
            for k in keys:
                v = info.media.media.get(k)
                if v is None:
                    continue
                iVal = {v2.code: v2 for v2 in v}
                dVal = dbdata[artistID].media.media.get(k)
                if dVal is None:
                    Tretval = iVal
                else:
                    Tretval = {v2.code: v2 for v2 in dVal}
                    Tretval.update(iVal)
                dbdata[artistID].media.media[k] = list(Tretval.values())
            newData += 1

        tsParse.stop()

        print("Found {0} Unofficial Artist Records For ModVal={1}".format(
            newData, modVal))
        if newData > 0:
            self.saveDBData(modVal, dbdata, newData)
Beispiel #9
0
    def parse(self, modVal, expr, force=False, debug=False, quiet=False):
        ts = timestat("Parsing Primary ModVal={0} Files(expr=\'{1}\', force={2}, debug={3}, quiet={4})".format(modVal, expr, force, debug, quiet))
                
        tsFiles  = timestat("Finding Files To Parse")
        newFiles = self.getArtistPrimaryFiles(modVal, expr, force)
        tsFiles.stop()

        N = len(newFiles)        
        if N == 0:
            ts.stop()
            return
        
        modValue = max([250 * round((N/10)/250), 250])

        if force is True or not fileUtil(self.disc.getDBModValFilename(modVal)).exists:
            tsDB = timestat("Creating New DB For ModVal={0}".format(modVal))
            dbdata = {}
            ts.stop()
        else:
            tsDB = timestat("Loading ModVal={0} DB Data".format(modVal))
            dbdata = self.disc.getDBModValData(modVal)
            tsDB.stop()
            
        newData  = 0
        tsParse = timestat("Parsing {0} New Files For ModVal={1}".format(N, modVal))
        for i,ifile in enumerate(newFiles):
            if (i+1) % modValue == 0 or (i+1) == N:
                tsParse.update(n=i+1, N=N)
                #print("{0: <15}Parsing {1}".format("{0}/{1}".format(i+1,N), ifile))
                
            artistID = getBaseFilename(ifile)
            info     = self.artist.getData(ifile)
            if debug:
                print("\t",ifile,' ==> ',info.ID.ID,' <-> ',artistID)
            if info.ID.ID != artistID:
                if debug is True:
                    print("Error for {0}  ID={1}  FileID={2}".format(info.meta.title,info.ID.ID,artistID))
                    1/0
                continue
            dbdata[artistID] = info
            newData += 1
        tsParse.stop()
            
        if newData > 0:
            dbdata = Series(dbdata)
            print("Saving [{0}/{1}] {2} Entries To {3}".format(newData, len(dbdata), "ID Data", self.disc.getDBModValFilename(modVal)))
            self.disc.saveDBModValData(modVal=modVal, idata=dbdata)
        
        ts.stop()
        
        return newData > 0
Beispiel #10
0
 def parseFilms101Data(self, debug=False):
     outdir = self.getDataDir()
     resultsdir = self.getResultsDir()
     files  = findExt(outdir, ext=".p")
     movies = {}
     
     for ifile in sorted(files):
         year    = getBaseFilename(ifile)
         results = self.parseFilms101YearlyData(ifile, debug=debug)
         movies[year] = []
         for movie in results:
             movies[year].append([movie,10])
         print("Found {0} movies in {1}".format(len(movies[year]),year))
     savename = setFile(self.getResultsDir(), "{0}.json".format(self.name))
     print("Saving {0} Years of films101 Data to {1}".format(len(movies), savename))
     saveFile(savename, movies)
Beispiel #11
0
    def mergeBoxOfficeMojoResults(self, debug=False):
        retval = {}
        files = findExt(self.getResultsDir(), ext=".json")
        if debug:
            print("Found {0} files in the results directory".format(
                len(files)))
        for ifile in sorted(files):
            year = getBaseFilename(ifile)
            try:
                int(year)
            except:
                continue
            data = getFile(ifile)
            retval[year] = data
            if debug:
                print("  Adding {0} entries from {1}".format(len(data), ifile))

        savename = setFile(self.getResultsDir(), "results.json")
        if debug:
            print("Saving", len(retval), "years of movie data to", savename)
        saveFile(savename, retval)
Beispiel #12
0
    def downloadKWorbSpotifyArtists(self, update=False):
        url = "https://kworb.net/spotify/artists.html"
        savename = "kworb_spotifyartists.p"
        if update is True:
            self.dutils.downloadArtistURL(url=url,
                                          savename=savename,
                                          force=True)

        bsdata = getHTML(savename)
        data = []
        artistDir = self.disc.getArtistsDir()
        saveDir = setDir(artistDir, "data")
        print(artistDir)
        for table in bsdata.findAll("table"):
            ths = [th.text for th in table.findAll("th")]
            for tr in table.findAll("tr")[1:]:
                item = dict(zip(ths, tr.findAll("td")))
                data.append(item)

        print("Found {0} Spotify Artists".format(len(data)))
        for i, item in enumerate(data):
            info = item["Artist"]
            url = info.find('a').attrs['href']
            name = info.find('a').text
            savename = setFile(saveDir, "{0}.p".format(getBaseFilename(url)))
            if isFile(savename):
                continue
                print("Y\t", savename, '\t', url, '\t', name)
            else:
                fullURL = "{0}/{1}".format(self.spotifyURL, url)
                print("{0}/{1}".format(i, len(data)), "\t-\t", savename, '\t',
                      fullURL, '\t', name)
                try:
                    self.dutils.downloadArtistURL(url=fullURL,
                                                  savename=savename,
                                                  force=True)
                except:
                    print("  ---> Error")
                    sleep(1)
Beispiel #13
0
    def processWikipediaYearlyData(self, procYear=None, debug=False):
        outdir = self.getDataDir()
        if procYear == None:
            files = findExt(outdir, ext=".p")
        else:
            files = findPatternExt(outdir, pattern=str(procYear), ext=".p")

        from collections import OrderedDict
        movies = OrderedDict()
        for ifile in files:

            if debug:
                print("Processing {0}".format(ifile))
            year = getBaseFilename(ifile)
            #if year == "1985": continue
            htmldata = getFile(ifile)
            bsdata = getHTML(htmldata)
            results = self.parseWikipediaOscarData(ifile, debug=False)

            if len(results) == 0:
                results = self.parseWikipediaOscarDataSpecial(ifile,
                                                              debug=debug)
            if len(results) == 0:
                raise ValueError("No results for {0}".format(ifile))

            for k, v in results.items():
                print("====>", year, '\t', k)
                print("      Winner  :", results[k]["Winner"])
                if debug:
                    print("      Nominees:", results[k]["Nominees"])
                    print("")

            savename = setFile(self.getResultsDir(), "{0}.json".format(year))
            print("Saving {0} wikipedia oscar data to {1}".format(
                year, savename))
            saveFile(savename, results)
Beispiel #14
0
 def findMyMovies(self, debug=False):
     movies = glob("/Volumes/*/Movies/*.*")
     mine   = dict(zip([getBaseFilename(x) for x in movies], movies))
     print("Found {0} movies on my disks".format(len(movies)))
     savename = setFile(self.getDataDir(), "mymovies.json")
     saveFile(idata=mine, ifile=savename, debug=True)
Beispiel #15
0
    def collect(self, hist, test=False, debug=False):
        files = findExt(hist.getGamesResultsDir(), ext=".p", debug=debug)
        for ifile in files:
            print(ifile)
            try:
                year = int(getBaseFilename(ifile).split("-")[0])
            except:
                raise ValueError("Could not get year from {0}".format(ifile))

            if year not in [2014, 2015, 2016]:
                continue

            yearData = getFile(ifile)

            seasonFilename = setFile(hist.getSeasonResultsDir(),
                                     "{0}.p".format(year))
            seasonData = getFile(seasonFilename)

            statsData = {}
            self.runners = {}
            self.passers = {}
            self.punters = {}
            self.kickers = {}
            self.fgkickers = {}

            for teamID, teamData in seasonData.teams.items():
                games = [x["Game"] for x in teamData.games]
                for game in games:
                    gameID = game.gameID

                    try:
                        gameData = yearData[gameID]
                    except:
                        continue

                    teamsMetaData = gameData["Teams"]
                    homeTeamMetaData = teamsMetaData["Home"]
                    awayTeamMetaData = teamsMetaData["Away"]
                    driveData = gameData["Plays"]

                    fieldMap = {}
                    fieldMap[
                        homeTeamMetaData["ID"]] = homeTeamMetaData["Abbrev"]
                    fieldMap[
                        homeTeamMetaData["Abbrev"]] = homeTeamMetaData["ID"]
                    fieldMap[
                        awayTeamMetaData["ID"]] = awayTeamMetaData["Abbrev"]
                    fieldMap[
                        awayTeamMetaData["Abbrev"]] = awayTeamMetaData["ID"]

                    fieldMap["Home"] = homeTeamMetaData["Abbrev"]
                    fieldMap["Away"] = awayTeamMetaData["Abbrev"]

                    copMap = {}
                    copMap[homeTeamMetaData["ID"]] = awayTeamMetaData["ID"]
                    copMap[awayTeamMetaData["ID"]] = homeTeamMetaData["ID"]

                    self.getRunners(driveData, fieldMap, debug=False)
                    self.getPassers(driveData, fieldMap, debug=False)
                    self.getPunters(driveData, fieldMap, debug=False)
                    self.getKickers(driveData, copMap, debug=False)
                    self.getFieldGoalKickers(driveData, fieldMap, debug=False)

            ###
            ### Now Assign Player To A Team
            ###

            ### Passers
            from math import sqrt
            mapping = {
                "Passers": self.passers,
                "Runners": self.runners,
                "Punters": self.punters,
                "Kickers": self.kickers,
                "FGKickers": self.fgkickers
            }
            for position, players in mapping.items():
                for name, passerTeams in players.items():
                    mc = passerTeams.most_common(1)[0]
                    frac = mc[1] / sum(dict(passerTeams).values())
                    if frac < 0.75:
                        continue
                    sig = sqrt(sum(dict(passerTeams).values()))
                    if sig < 2:
                        continue
                    teamID = mc[0]
                    if statsData.get(teamID) is None:
                        statsData[teamID] = {}
                    if statsData[teamID].get(position) is None:
                        statsData[teamID][position] = {}
                    statsData[teamID][position][name] = [
                        round(frac, 1), round(sig, 1)
                    ]

            ## Show team stats
            if debug:
                for teamID, teamStats in statsData.items():
                    print(teamID)
                    for pos, names in teamStats.items():
                        statsData[teamID]
                        print('\t', pos, names)

            if test is False:
                augmentedStatsFilename = setFile(
                    hist.getStatisticsResultsDir(),
                    "{0}-stats-extra.json".format(year))
                saveFile(idata=statsData,
                         ifile=augmentedStatsFilename,
                         debug=True)
Beispiel #16
0
 def getURL(self):
     if self.inputdata is not None:
         artistURL = "https://kworb.net/spotify/artist/{0}.html".format(
             getBaseFilename(self.inputdata))
     auc = artistDBURLClass(url=artistURL)
     return auc
Beispiel #17
0
    def parseTeamYearlyStandings(self,
                                 startYear=2003,
                                 endYear=2018,
                                 debug=False,
                                 verydebug=False):
        for year in range(startYear, endYear + 1):
            seasonDir = self.getYearlySeasonDir(year)
            files = findExt(seasonDir, ext=".p", debug=False)

            seasonData = season(year)

            for ifile in files:
                nameyear = getBaseFilename(ifile)
                htmldata = getFile(ifile)
                bsdata = getHTML(htmldata)
                teamName = nameyear.replace("-{0}".format(year), "")

                metadata = bsdata.find("meta", {"property": "og:url"})
                if metadata is None:
                    raise ValueError(
                        "Could not find basic team meta data for this file! {0}"
                        .format(ifile))

                try:
                    content = metadata.attrs['content']
                    year = getBasename(content)
                    teamID = getBasename(getDirname(getDirname(content)))
                except:
                    raise ValueError(
                        "Could not get team year and ID from meta data: {0}".
                        format(metadata))

                if verydebug:
                    print(year, '\t', teamID, '\t', ifile)

                ## Create Team Object
                teamData = team(year=year,
                                teamName=teamName,
                                teamMascot=None,
                                teamID=teamID)

                tables = bsdata.findAll("table", {"class": "Table2__table"})
                if verydebug:
                    print("\tFound {0} game tables".format(len(tables)))
                for it, table in enumerate(tables):
                    trs = table.findAll("tr")

                    headers = trs[1]
                    headers = [
                        x.text for x in headers.findAll("td") if x is not None
                    ]

                    gameRows = trs[2:]
                    totalGames = len(gameRows)

                    if verydebug:
                        print("\tFound {0} potential games".format(totalGames))

                    for ig, tr in enumerate(gameRows):
                        tds = tr.findAll("td")
                        gameData = dict(zip(headers, tds))
                        extra = {"OT": False, "Bowl": False}

                        ## Get the Date
                        try:
                            date = gameData["Date"]
                        except:
                            print(ifile)
                            raise ValueError(
                                "No date for this game! {0}".format(gameData))
                        date = date.text

                        ## Only Keep Games With Regular Dates
                        try:
                            dateval = "{0} {1}".format(
                                date.split(", ")[-1], year)
                            date = getDateTime(dateval)
                        except:
                            date = None

                        if date is None:
                            continue

                        ## Check for January Games (in the following year)
                        if date.month == 1:
                            date = addMonths(date, 12)

                        ## Get the Opponent
                        try:
                            opponent = gameData["Opponent"]
                        except:
                            raise ValueError(
                                "No opponent for this game! {0}".format(game))

                        try:
                            oppolink = opponent.find("a")
                            oppohref = oppolink.attrs['href']
                            opponame = getBasename(oppohref)
                            oppoID = getBasename(getDirname(oppohref))
                        except:
                            opponame = opponent.text
                            oppoID = 0
                            #raise ValueError("Could not find href in link! {0}".format(opponent))

                        try:
                            gamespan = opponent.find("span", {"class": "pr2"})
                            gametype = gamespan.text
                        except:
                            raise ValueError(
                                "Could not find game type from {0}".format(
                                    opponent))

                        if gametype == "vs":
                            location = teamID
                        elif gametype == "@":
                            location = oppoID
                        else:
                            raise ValueError(
                                "Location --> {0}".format(gametype))

                        if verydebug:
                            print("\t{0}/{1}\t{2}\t{3: <4}{4: <50}".format(
                                ig, totalGames, printDateTime(date), gametype,
                                opponame),
                                  end="\t")

                        ## Get the Result
                        try:
                            result = gameData["Result"]
                        except:
                            raise ValueError(
                                "No result for this game! {0}".format(game))

                        spans = result.findAll("span")
                        if len(spans) == 0:
                            continue
                        if len(spans) != 2:
                            raise ValueError(
                                "There are {0} spans in this row!: {1}".format(
                                    len(spans), result))
                        outcome = spans[0].text.strip()
                        score = spans[1].text.strip()

                        if score.endswith("OT"):
                            extra = {"OT": True}
                            score = score[:-3].strip()

                        try:
                            scores = [int(x) for x in score.split('-')]
                        except:
                            raise ValueError(
                                "Could not create integer scores from {0}".
                                format(spans))

                        if outcome == 'W':
                            teamScore = scores[0]
                            oppoScore = scores[1]
                            teamResult = "W"
                            oppoResult = "L"
                        elif outcome == "L":
                            teamScore = scores[1]
                            oppoScore = scores[0]
                            teamResult = "L"
                            oppoResult = "W"
                        elif outcome == "T":
                            teamScore = scores[0]
                            oppoScore = scores[1]
                            teamResult = "T"
                            oppoResult = "T"
                        else:
                            raise ValueError(
                                "Did not recognize game outcome {0}".format(
                                    outcome))

                        ## Get the Game
                        try:
                            gamelink = result.find("a")
                            gamehref = gamelink.attrs['href']
                        except:
                            raise ValueError(
                                "Could not find href in link! {0}".format(
                                    result))

                        if verydebug:
                            print("{0}  {1}".format(
                                teamResult, "-".join(
                                    str(x) for x in [teamScore, oppoScore])))

                        ## Create game object
                        gameData = game(gameID=gameID,
                                        date=date,
                                        teamA=teamID,
                                        teamB=oppoID,
                                        teamAResult=teamResult,
                                        teamBResult=oppoResult,
                                        teamAScore=teamScore,
                                        teamBScore=oppoScore,
                                        location=location)

                        ## Append game to team data
                        teamData.addGame(gameData)

                ## Show Summary
                teamData.setStatistics()
                if debug:
                    teamData.summary()
                    if teamData.ngames == 0:
                        removeFile(ifile, debug=True)

                seasonData.addTeam(teamData)

            #http://www.espn.com/college-football/team/schedule/_/id/201/season/2005"

            savename = setFile(self.getSeasonResultsDir(),
                               "{0}.p".format(year))
            saveFile(idata=seasonData, ifile=savename, debug=True)
Beispiel #18
0
    def parseGameData(self,
                      startYear=2003,
                      endYear=2018,
                      debug=False,
                      verydebug=False):
        noData = {}
        for year in range(startYear, endYear + 1):

            yearData = {}

            gamesDir = self.getYearlyGamesDir(year)
            files = findExt(gamesDir, ext=".p", debug=False)

            noData[year] = []
            for i, ifile in enumerate(files):
                gameID = getBaseFilename(ifile)

                if gameID in self.noGameData:
                    continue

                htmldata = getFile(ifile)
                bsdata = getHTML(htmldata)
                #print(bsdata)

                #verydebug=True
                #if gameID not in ['400603866']:
                #    continue

                teamData = bsdata.findAll("div", {"class": "team-container"})

                longNames = [
                    x.find("span", {"class": "long-name"}) for x in teamData
                ]
                longNames = [x.text for x in longNames if x is not None]

                shortNames = [
                    x.find("span", {"class": "short-name"}) for x in teamData
                ]
                shortNames = [x.text for x in shortNames if x is not None]

                teamAbbrevs = [
                    x.find("span", {"class": "abbrev"}) for x in teamData
                ]
                teamNames = [x.attrs for x in teamAbbrevs if x is not None]
                teamNames = [x['title'] for x in teamNames]
                teamAbbrevs = [x.text for x in teamAbbrevs]

                teamIDs = [
                    x.find("img", {"class": "team-logo"}) for x in teamData
                ]
                teamIDs = [x.attrs for x in teamIDs if x is not None]
                teamIDs = [x['src'] for x in teamIDs]
                teamIDs = [re.search(r"(\d+).png", x) for x in teamIDs]
                teamIDs = [x.groups()[0] for x in teamIDs]

                awayTeam = {
                    "Name": longNames[0],
                    "Mascot": shortNames[0],
                    "Abbrev": teamAbbrevs[0],
                    "ID": teamIDs[0]
                }
                homeTeam = {
                    "Name": longNames[1],
                    "Mascot": shortNames[1],
                    "Abbrev": teamAbbrevs[1],
                    "ID": teamIDs[1]
                }

                metadata = bsdata.find("meta", {"property": "og:title"})
                title = None
                if metadata is not None:
                    title = metadata.attrs['content']
                    if verydebug:
                        print("==> {0}".format(title))

                ## Possesions
                posData = bsdata.find("ul", {"class": "css-accordion"})
                if posData is None:
                    posData = bsdata.find("article", {"class": "play-by-play"})
                if posData is None:
                    noData[year].append(gameID)
                    if verydebug:
                        print("Could not find possession data! {0}".format(
                            gameID))
                    continue
                    #print(bsdata)
                    #1/0
                    #removeFile(ifile, debug)
                    #continue

                gameData = {
                    "Teams": {
                        "Away": awayTeam,
                        "Home": homeTeam
                    },
                    "Plays": []
                }

                if i % 10 == 0:
                    print("{0}/{1} with {2} no data games".format(
                        i, len(files), len(noData[year])))

                ###################
                ## Get Full Drive Data
                ###################

                drives = posData.findAll("li", {"class": "accordion-item"})
                if verydebug:
                    print("Drives {0}".format(len(drives)))

                for idr, drive in enumerate(drives):

                    ## Get Drive Summary
                    headlines = [
                        x.text.strip()
                        for x in drive.findAll("span", {"class": "headline"})
                    ]
                    if verydebug:
                        print("Headlines {0}".format(len(headlines)))

                    ## Get Drive Details
                    details = [
                        x.text.strip() for x in drive.findAll(
                            "span", {"class": "drive-details"})
                    ]
                    if verydebug:
                        print("Details {0}".format(len(details)))

                    ## Get Home Score
                    homescores = drive.findAll("span", {"class": "home"})
                    homescores = [
                        x.find("span", {"class": "team-score"})
                        for x in homescores
                    ]
                    homescores = [x.text for x in homescores if x is not None]
                    if verydebug:
                        print("Home Scores {0}".format(len(homescores)))

                    ## Get Away Score
                    awayscores = drive.findAll("span", {"class": "away"})
                    awayscores = [
                        x.find("span", {"class": "team-score"})
                        for x in awayscores
                    ]
                    awayscores = [x.text for x in awayscores if x is not None]
                    if verydebug:
                        print("Away Scores {0}".format(len(awayscores)))

                    ## Get Possession
                    possessions = drive.findAll("span", {"class": "home-logo"})
                    possessions = [
                        x.find("img", {"class": "team-logo"})
                        for x in possessions
                    ]
                    possessions = [
                        x.attrs['src'] for x in possessions if x is not None
                    ]
                    possessions = [x.split('&')[0] for x in possessions]
                    possessions = [getBaseFilename(x) for x in possessions]
                    if verydebug:
                        print("Possessions {0}".format(len(possessions)))

                    ## Check for valid headline (parsed correctly?)
                    if len(headlines) == 0:
                        continue

                    validFGs = [
                        "Missed FG", "Field Goal", "FIELD GOAL", "MISSED FG",
                        "Made FG", "Field Goal Good", "Field Goal Missed",
                        "Blocked FG"
                    ]
                    validTDs = [
                        "Touchdown", "TOUCHDOWN", "END OF HALF Touchdown",
                        "Downs Touchdown", "Missed FG Touchdown",
                        "End of Half Touchdown", "End of Game Touchdown",
                        "PUNT Touchdown", "FUMBLE Touchdown",
                        "INTERCEPTION Touchdown", "FIELD GOAL Touchdown",
                        "MISSED FG Touchdown", "Rushing Touchdown",
                        "Passing Touchdown", "Kickoff Return Touchdown",
                        "Interception Return Touch",
                        "Turnover on Downs Touchdown",
                        "Field Goal Missed Touchdown", "Field Goal Touchdown",
                        "Rushing Touchdown Touchdown",
                        "Field Goal Good Touchdown",
                        "Passing Touchdown Touchdown",
                        "Fumble Return Touchdown Touchdown", "Rushing TD",
                        "Passing TD", "Blocked Punt TD", "Punt Return TD",
                        "Fumble Ret. TD", "Interception TD", "Fumble TD",
                        "Rushing TD Touchdown", "Blocked Punt TD Touchdown",
                        "Blocked FG (TD)", "Punt Return TD Touchdown",
                        "Kick Return TD", "Kickoff Return Touchdown Touchdown",
                        "Missed FG (TD) Touchdown",
                        "Blocked FG (TD) Touchdown",
                        "Punt Return Touchdown Touchdown",
                        "Interception Return Touch Touchdown"
                    ]
                    validEnds = [
                        "End of Half", "End of Game", "END OF HALF",
                        "END OF GAME", "End of 4th Quarter"
                    ]
                    validTOs = [
                        "Fumble", "Interception", "FUMBLE", "INTERCEPTION",
                        "Kickoff", "KICKOFF", "Blocked Punt"
                    ]
                    validTOPnts = [
                        "Interception Touchdown", "Safety", "Punt Touchdown",
                        "Fumble Touchdown", "Punt Return Touchdown",
                        "Fumble Return Touchdown", "SAFETY"
                    ]
                    validDowns = [
                        "Punt", "Downs", "PUNT", "Possession (For OT Drives)",
                        "DOWNS", "Possession (For OT Drives) Touchdown",
                        "Turnover on Downs", "Poss. on downs", "Penalty"
                    ]
                    validPlay = [
                        "Rush", "Pass", "Sack", "Timeout", "Incomplete",
                        "Pass Complete"
                    ]
                    valid2PT = ["2PT Pass failed", "Missed PAT Return"]
                    validOdds = ["on-side kick"]
                    validHeadlines = validFGs + validTDs + validEnds + validTOs + validTOPnts + validDowns + validPlay + valid2PT
                    isValidHeadline = sum(
                        [x in validHeadlines for x in headlines])
                    if headlines[0] == '':
                        continue
                    if isValidHeadline == 0 and idr < len(drives) - 1:
                        print(idr, '/', len(drives))
                        print(title)
                        print(ifile)
                        #print(bsdata)
                        raise ValueError(
                            "No valid headline in {0}".format(headlines))
                        print("No valid headline in {0}".format(headlines))
                        continue

                    ## Analyze Play-by-Play
                    try:
                        driveList = drive.find("ul", {"class": "drive-list"})
                        plays = driveList.findAll("li")
                    except:
                        raise ValueError(
                            "Could not find drive list in drive {0}".format(
                                drive))

                    driveData = []
                    for ip, play in enumerate(plays):

                        ## Check for Starting Position
                        startPos = play.find("h3")
                        if startPos is None:
                            raise ValueError(
                                "Could not find Starting Position in Play! {0}"
                                .format(play))
                        startData = startPos.text.strip()

                        ## Check for Play Text
                        span = play.find("span", {"class": "post-play"})
                        if span is None:
                            raise ValueError(
                                "Could not find post play data! {0}".format(
                                    play))
                        playData = span.text.strip()

                        driveData.append({
                            "Play": ip,
                            "Start": startData,
                            "Data": playData
                        })

                        #print(idr,'\t',ip,'\t',startData,'\t',playData)

                    ## Save Drive Data
                    gameData["Plays"].append({
                        "Drive": len(gameData),
                        "Headline": headlines,
                        "Detail": details,
                        "HomeScore": homescores,
                        "AwayScore": awayscores,
                        "Possession": possessions,
                        "Data": driveData
                    })

                    if verydebug:
                        print(idr, '\t', headlines)
                        print(idr, '\t', details)
                        print(idr, '\t', homescores)
                        print(idr, '\t', awayscores)
                        print(idr, '\t', possessions)
                        print("")

                if verydebug:
                    print("Found {0} drives for gameID {1}".format(
                        len(gameData), gameID))
                yearData[gameID] = gameData

            print("Parsed {0}/{1} games in {2}".format(len(yearData),
                                                       len(files), year))
            savename = setFile(self.getGamesResultsDir(),
                               "{0}-games.p".format(year))
            saveFile(idata=yearData, ifile=savename, debug=True)

        return noData
Beispiel #19
0
def testAlbum(albumDir, artistDir, files):

    retval = {
        "Track": False,
        "Album": False,
        "Title": False,
        "Multi": False,
        "Skip": False,
        "Extra": False,
        "Mix": False
    }

    artistName = getDirBasics(artistDir)[-1]
    if artistName in skipDirs():
        retval["Skip"] = True

    #print("artistDir",artistDir)
    #print("albumDir",albumDir)
    #print("artistName",artistName)

    albumName = albumDir.replace(artistDir, "")[1:]
    if "/" in albumName:
        retval["Extra"] = True
    albumDirs = albumName.split("/")
    if albumDirs[0] in skipDirs():
        retval["Skip"] = True
    if albumName in skipDirs():
        retval["Skip"] = True

    #print("albumName",albumName)

    j = 0
    tags = {}

    print("\t-----> Album Info: {0} / {1} \t ==> {2} Songs".format(
        artistName, albumName, len(files)))
    if retval["Extra"] is True:
        return retval
    if retval["Skip"] is True:
        return retval

    ifiles = []
    for jf, ifile in enumerate(files):
        results = MusicID(ifile, debug=args.debug)
        if results.skip is True:
            continue
        tags[j] = results.getInfo()
        ifiles.append(ifile)
        #pbcs[j] = pb.getPaths(ifile).getDict()
        j += 1
    nfiles = j

    ## Track Tests
    testTrackNo = True
    trackCheckSum = sum(range(1, nfiles + 1))
    trackTrackSum = 0

    ## Album Tests
    testAlbum = testCD(albumName) or testDisc(albumName)
    retval["Multi"] = testAlbum

    retval["Mix"] = testMix(albumName)

    ## Title Tests
    testTitle = True
    for j in range(nfiles):
        ifile = ifiles[j]
        tag = tags[j]
        #pbc = pbcs[j]

        ###############################################################################################
        ## Album Tests
        ###############################################################################################
        albumTag = tag.get("Album")
        if albumTag is None:
            print("Album Name Error ==> [{0}]".format("No Album Tag"))
            retval["Album"] = True
            break

        try:
            albumName = albumTag[0]
            albumName = albumName.replace("/", " ")
        except:
            print("Track Number Error ==> [{0}]".format("No Value"))
            trackNo = ""
        if len(albumName) == 0:
            retval["Album"] = True
            break

        dirvals = getDirBasics(getDirname(ifile))
        if albumName not in dirvals:
            retval["Album"] = True

        ###############################################################################################
        ## Track Number Tests
        ###############################################################################################
        trkTag = tag.get("TrackNo")
        if trkTag is None:
            print("Track Number Error ==> [{0}]".format("No TrackNo Tag"))
            retval["Track"] = True
            break

        try:
            trackNo = trkTag[0]
        except:
            print("Track Number Error ==> [{0}]".format("No Value"))
            trackNo = ""
        if len(trackNo) == 0:
            retval["Track"] = True
            break

        trackNumberValue = None
        try:
            trackNumberValue = int(trackNo)
        except:
            try:
                trackVals = [int(x) for x in trackNo.split("/")]
                trackNumberValue = trackVals[0]
            except:
                print("Track Number Error ==> [{0}]".format(trackNo))
        try:
            trackTrackSum += trackNumberValue
        except:
            pass

        ###############################################################################################
        ## Title Number Tests
        ###############################################################################################
        titleTag = tag.get("Title")
        if titleTag is None:
            print("Title Error ==> [{0}]".format("No Title Tag"))
            retval["Title"] = True
            break
        try:
            title = titleTag[0]
        except:
            print("Title Error ==> [{0}]".format(titleTag))
            testTitle = False
            break

        if testMix(title):
            print("Possible Mix ==> [{0}]".format(title))
            retval["Mix"] = True
            break

        fileName = getBaseFilename(ifile)
        if not any([title in fileName, fileName in title]):
            print("Title Error ==> [{0}] not [{1}]".format(title, fileName))
            retval["Title"] = True
            break

    if sum(retval.values()) == 0:
        if trackTrackSum != trackCheckSum:
            print("Problem with track numbering...")
            print("  Expected {0} and found {1}".format(
                trackCheckSum, trackTrackSum))
            retval["Track"] = True

    #print(retval)
    return retval
Beispiel #20
0
    def parseArtistModValFiles(self, modVal, previousDays=5, force=False, debug=False, doExtra=False):        
        print("-"*100)
        print("Parsing Artist Files For ModVal {0}".format(modVal))
        artistInfo = self.artist

        artistDir = self.disc.getArtistsDir()
        maxModVal = self.disc.getMaxModVal()
                    
        artistDBDir = self.disc.getArtistsDBDir()        
        
        dirVal = setDir(artistDir, str(modVal))
        dbname = setFile(artistDBDir, "{0}-DB.p".format(modVal))

        
        newFiles = self.getArtistModValFiles(modVal, previousDays=previousDays, force=force)
        if force is False:
            dbdata = getFile(dbname, version=3)
        else:
            print("Forcing Reloads of ModVal={0}".format(modVal))
            print("  Processing {0} files.".format(len(newFiles)))
            dbdata = {}
 
        saveIt = 0
        for j,ifile in enumerate(newFiles):
            if force is True:
                if j % 100 == 0:
                    print("\tProcessed {0}/{1} files.".format(j,len(newFiles)))
            artistID = getBaseFilename(ifile)
            isKnown  = dbdata.get(artistID)
            info   = artistInfo.getData(ifile)

            if info.ID.ID != artistID:
                print("File: {0}".format(ifile))
                print(info.show())
                1/0

                # Check Profile
                try:
                    artistName = info.profile.search
                    if artistName is not None:
                        self.creditToDownload[artistID] = [artistName,self.getArtistSavename(artistID, credit=True)]
                except:
                    pass

                if debug is False:
                    continue
                print("ID From Name: {0}".format(artistID))
                print("ID From File: {0}".format(info.ID.ID))

                print("File: {0}".format(ifile))
                print("Info: {0}".format(info.url.get()))
                continue
                #1/0

            saveIt += 1
            dbdata[artistID] = info

               
        forceSave = False
        if saveIt > 0 and doExtra is True:
            print("\tCalling Extra Parsing")
            dbdata = self.parseArtistModValExtraFiles(modVal, dbdata=dbdata, force=force, debug=debug)
            forceSave = True
            saveIt = len(dbdata)
            
         
        if len(self.creditToDownload) > 0:
            print("Found {0} artists that need new downloads.".format(len(self.creditToDownload)))
        

        if saveIt > 0 or forceSave is True:
            savename = setFile(artistDBDir, "{0}-DB.p".format(modVal))     
            print("Saving {0} new artist IDs to {1}".format(saveIt, savename))
            dbNumAlbums = sum([self.getArtistNumAlbums(artistData) for artistData in dbdata.values()])
            print("Saving {0} total artist media".format(dbNumAlbums))
            saveFile(idata=dbdata, ifile=savename)
            
            self.createArtistModValMetadata(modVal=modVal, db=dbdata, debug=debug)
            self.createArtistAlbumModValMetadata(modVal=modVal, db=dbdata, debug=debug)
            
        return saveIt
Beispiel #21
0
    def parse(self, modVal, expr, force=False, debug=False):
        ts = timestat("Parsing ModVal={0} Credit Files".format(modVal))

        tsFiles = timestat("Finding Files To Parse")
        newFiles = self.getArtistCreditFiles(modVal, expr, force)
        tsFiles.stop()

        N = len(newFiles)
        modValue = 500 if N >= 1000 else 100
        if N > 0:
            tsDB = timestat("Loading ModVal={0} DB Data".format(modVal))
            dbdata = self.disc.getDBModValData(
                modVal).to_dict()  ## We do not want to overwrite other data
            tsDB.stop()

        newData = 0
        newIDs = 0
        tsParse = timestat(
            "Parsing {0} New Credit Files For ModVal={1}".format(N, modVal))
        for i, ifile in enumerate(newFiles):
            if (i + 1) % modValue == 0 or (i + 1) == N:
                print("{0: <15}Parsing {1}".format("{0}/{1}".format(i + 1, N),
                                                   ifile))
            artistID = getBaseFilename(ifile)

            ########################################
            # Test For Previous Entries
            ########################################
            if dbdata.get(artistID) is not None:
                if dbdata[artistID].media.media.get("Credits") is not None:
                    continue

            currentKeys = []
            info = self.artist.getData(ifile)
            if dbdata.get(artistID) is not None:
                currentKeys = list(dbdata[artistID].media.media.keys())
            else:
                dbdata[artistID] = info
                newData += 1
                newIDs += 1
                continue

            keys = list(set(list(info.media.media.keys()) + currentKeys))
            for k in keys:
                v = info.media.media.get(k)
                if v is None:
                    continue
                iVal = {v2.code: v2 for v2 in v}
                dVal = dbdata[artistID].media.media.get(k)
                if dVal is None:
                    Tretval = iVal
                else:
                    Tretval = {v2.code: v2 for v2 in dVal}
                    Tretval.update(iVal)
                dbdata[artistID].media.media[k] = list(Tretval.values())
            newData += 1

            ########################################
            # Update Profile If Needed
            ########################################
            extra = info.profile.extra
            newTabs = extra.get("Tabs", {}) if isinstance(extra, dict) else {}
            currentExtra = dbdata[artistID].profile.extra
            currentTabs = currentExtra.get("Tabs", {}) if isinstance(
                currentExtra, dict) else {}
            if len(currentTabs) == 0 and len(newTabs) > 0:
                dbdata[artistID].profile.extra["Tabs"] = newTabs
            if len(currentTabs) > 0 and len(newTabs) > 0:
                for tab, tabURL in newTabs.items():
                    if currentTabs.get(tab) is None:
                        dbdata[artistID].profile.extra["Tabs"][tab] = tabURL

        if newData > 0:
            dbdata = Series(dbdata)
            print("Saving {0} Credit Entries".format(newData))
            print("Saving {0} New Entries".format(newIDs))
            self.disc.saveDBModValData(
                idata=dbdata,
                modVal=modVal)  ## We do not want to overwrite other data
        else:
            print("Not Saving Any New Entries")

        tsParse.stop()
Beispiel #22
0
    def createCreditMetadata(self, modVal=None):
        modVals = [modVal] if modVal is not None else range(100)

        ts = timestat("Creating AllMusic Credit Metadata")
        for modVal in modVals:
            tsFiles = timestat(
                "Finding Primary Files For ModVal={0}".format(modVal))
            modValPrimaryFiles = self.getArtistPrimaryFiles(modVal,
                                                            expr=None,
                                                            force=True)
            tsFiles.stop()

            tsIgnore = timestat(
                "Removing IDs To Ignore From {0} Primary Files For ModVal={0}".
                format(len(modValPrimaryFiles), modVal))
            modValPrimaryGoodFiles = [
                ifile for ifile in modValPrimaryFiles
                if getBaseFilename(ifile) not in self.creditIgnores
            ]
            tsIgnore.stop()

            tsDBData = timestat(
                "Finding Known Artists From {0} Primary/Good Files For ModVal={1}"
                .format(len(modValPrimaryGoodFiles), modVal))
            dbData = self.disc.getDBModValData(modVal)
            missingArtistIDFiles = [
                ifile for ifile in modValPrimaryFiles
                if dbData.get(getBaseFilename(ifile)) is None
            ]
            tsDBData.stop()

            tsCredit = timestat(
                "Finding Known Credit Artists From {0} Unknown Artists For ModVal={1}"
                .format(len(missingArtistIDFiles), modVal))
            creditFiles = {
                getBaseFilename(ifile): ifile
                for ifile in self.dbCredit.getArtistCreditFiles(
                    modVal, expr=None, force=True)
            }
            missingCreditIDs = [
                ifile for ifile in missingArtistIDFiles
                if creditFiles.get(getBaseFilename(ifile)) is None
            ]
            tsCredit.stop()

            tsMeta = timestat(
                "Finding Metadata For {0}/{1}/{2}/{3} Missing ArtistIDs for ModVal={4}"
                .format(len(missingCreditIDs), len(missingArtistIDFiles),
                        len(modValPrimaryGoodFiles), len(modValPrimaryFiles),
                        modVal))
            metaData = {
                getBaseFilename(ifile): self.artist.getData(ifile).meta
                for ifile in missingCreditIDs
            }
            self.metadata[modVal] = {
                artistID: {
                    "title": meta.title,
                    "url": meta.url
                }
                for artistID, meta in metaData.items()
            }
            tsMeta.stop()
        ts.stop()
    def createUnofficialMetadata(self, modVal=None):
        modVals = [modVal] if modVal is not None else range(100)

        ts = timestat("Creating Unofficial Files Metadata")
        for modVal in modVals:
            tsDBData = timestat(
                "Finding Pages/URL/MediaCounts Data For ModVal={0}".format(
                    modVal))
            dbData = self.getDBData(modVal)
            dbArtistURLMedia = {
                artistID: {
                    "Name": artistData.artist.name,
                    "URL": artistData.url.url,
                    "MediaCounts":
                    artistData.mediaCounts.counts.get('Unofficial')
                }
                for artistID, artistData in dbData.items()
            }
            tsDBData.stop()

            tsMedia = timestat(
                "Finding Artists With Unofficial MediaCounts From {0} Artists For ModVal={1}"
                .format(len(dbArtistURLMedia), modVal))
            unofficialData = {
                artistID: artistData
                for artistID, artistData in dbArtistURLMedia.items()
                if artistData["MediaCounts"] is not None
            }
            tsMedia.stop()

            tsIgnore = timestat(
                "Removing Ignored Artists From {0} Artists For ModVal={1}".
                format(len(unofficialData), modVal))
            ignoreData = {
                artistID: artistData
                for artistID, artistData in unofficialData.items()
                if artistData["Name"] not in self.unofficialIgnores
            }
            tsIgnore.stop()

            tsUnofficial = timestat(
                "Finding Known Unofficial Artists From {0} Unofficial Artists For ModVal={1}"
                .format(len(ignoreData), modVal))
            unofficialFiles = {
                getBaseFilename(ifile): ifile
                for ifile in self.dbUnofficial.getArtistUnofficialFiles(
                    modVal, expr=None, force=True)
            }
            missingUnofficialIDs = {
                artistID: artistData
                for artistID, artistData in ignoreData.items()
                if unofficialFiles.get(artistID) is None
            }
            #return ignoreData, unofficialFiles, missingUnofficialIDs, unofficialData, dbArtistURLMedia
            tsUnofficial.stop()

            tsMeta = timestat(
                "Saving Metadata From {0}/{1}/{2}/{3} Artists For ModVal={4}".
                format(len(missingUnofficialIDs), len(ignoreData),
                       len(unofficialData), len(dbArtistURLMedia), modVal))
            self.metadata[modVal] = missingUnofficialIDs
            tsMeta.stop()
        ts.stop()
Beispiel #24
0
    def parseGames(self,
                   gameID=None,
                   test=False,
                   debug=False,
                   verydebug=False):
        self.logger.info("Parsing Games")

        if self.hist is None:
            raise ValueError("Must set historical class!")

        sep = "======================================================"

        if verydebug:
            debug = True

        self.unknownPlays = []

        self.toughParsing = ['400547724']
        self.poorlyParsed = [
            '401012731', '400547781', '400547808', '400548070', '400548428',
            '400610207', '400547822'
        ]
        self.poorlyParsed += [
            '400547970', '400547835', '400548026', '400548167', '400547827',
            '400548458'
        ]
        self.poorlyParsed += [
            '400547901', '400547976', '400548246', '400548278', '400548292',
            '400548448'
        ]

        self.statsToGet = {}
        self.badGames = {}
        self.goodGames = {}

        dc = debugclass()

        files = self.hist.getGamesResultsFiles()
        for ifile in files:
            try:
                year = int(getBaseFilename(ifile).split("-")[0])
            except:
                raise ValueError("Could not get year from {0}".format(ifile))

            if year != 2018:
                continue

            self.logger.info(" Parsing Games from {0}".format(year))

            yearData = getFile(ifile)
            seasonData = self.hist.getSeasonResultsData(year)
            statsData = self.hist.getStatisticsResultsData(year)
            augmentedStatsData = self.hist.getStatisticsAugmentedData(year)

            totalGames = 0
            for gameIdent, gameData in yearData.items():
                if gameID is not None:
                    if gameID != gameIdent:
                        continue

                if gameIdent in self.hist.noGameData or gameIdent in self.poorlyParsed or gameIdent in self.toughParsing:
                    continue

                self.logger.info("  Parsing Game ID {0}".format(gameIdent))

                teamsMetaData = gameData["Teams"]
                homeTeamMetaData = teamsMetaData["Home"]
                awayTeamMetaData = teamsMetaData["Away"]
                driveData = gameData["Plays"]

                ################################################################################
                ### Get maps
                ################################################################################
                fieldMap = self.makeFieldMap(awayTeamMetaData,
                                             homeTeamMetaData)
                copmap = self.makeCopMap(awayTeamMetaData, homeTeamMetaData)

                ################################################################################
                ### Get team data
                ################################################################################
                homeTeamGameData = self.getTeamGameData(
                    gameIdent, seasonData, homeTeamMetaData)
                if homeTeamGameData is None:
                    continue
                awayTeamGameData = self.getTeamGameData(
                    gameIdent, seasonData, awayTeamMetaData)
                if awayTeamGameData is None:
                    continue

                ################################################################################
                ### Learn key plays for use in determining possession
                ################################################################################
                players = gameplayers(teamsMap=fieldMap, statsData=statsData)
                players.augmentData(augmentedStatsData)

                pfp = possessionfromplayer(players)
                ps = playstart()
                pc = playclock()
                pt = playtype()
                ap = analyzepossession(copmap, players)
                ay = analyzeyards()
                ak = analyzekicking()
                apen = analyzepenalties()
                pcc = possessionchangeclass(copmap)

                gameResult = []
                totalPlays = 0

                ################################################################################
                ### Collect Post Drive Scores
                ################################################################################
                postDriveScores = {"Drives": [], "Final": []}
                postDriveScores["Final"] = [
                    awayTeamGameData.teamAScore, homeTeamGameData.teamAScore
                ]

                ################################################################################
                ### Iterate over drives
                ################################################################################
                for idr, drive in enumerate(driveData):
                    self.logger.debug("\n\n{0}".format(2 * sep))
                    self.logger.debug("{0}Parsing Plays for Drive {1}".format(
                        self.ind, idr))

                    ds = drivesummary(drive, fieldMap)
                    drivePlays = ds.getDrivePlays()

                    postDriveScores["Drives"].append(ds.getPostDriveScore())

                    ################################################################################
                    ### Iterate over plays in drive
                    ################################################################################
                    driveResults = []
                    for ipl, drivePlay in enumerate(drivePlays):
                        #playNo        = drivePlay['Play']
                        playStartText = drivePlay['Start']
                        playData = drivePlay['Data']
                        self.logger.debug("\n  {0}".format(sep))
                        self.logger.debug("{0}  Play {1}/{2}: {3}".format(
                            self.ind, ipl, len(drivePlays), playData))

                        ### Determine play starting position
                        startVals = ps.getStart(playStartText)

                        ### Determine play clock and quarter
                        pc.parsePlay(playData)
                        startVals.setClock(pc)
                        playText = pc.getPlayText()

                        ### Determine play type
                        play = pt.getPlay(playText)

                        #
                        ### Determine possession
                        playPossession = pfp.determinePossession(play)
                        if playPossession.isForced() is not None:
                            play.pa.forced = playPossession.isForced()

                        ### Result of play
                        playResult = playsummary(possession=playPossession,
                                                 start=startVals,
                                                 play=play,
                                                 valid=play.valid)

                        ### Save and move on
                        totalPlays += 1
                        driveResults.append(playResult)
                        continue

                        ### Check if we need to insert a play
                        #newPlay = self.addPlay(gameIdent, idr, ipl, playResult, possData, verydebug)
                        #if newPlay is not None:
                        #    totalPlays += 1
                        #    driveResults.append(newPlay)

                    fullDriveData = ds.getFullDrive()
                    fullDriveData.setPlays(driveResults)
                    #fullDriveData = self.insertMissingData(gameIdent, idr, fullDriveData, debug)
                    #fullDriveData = self.augmentPlayWithScore(fullDriveData, fieldMap, debug)
                    gameResult.append(fullDriveData)

                ################################################################################
                ### Show State Before Alterations
                ################################################################################
                self.logger.debug("\n{0}".format(2 * sep))
                self.logger.debug("{0}Found {1} Drives For This Game".format(
                    self.ind, len(gameResult)))
                self.logger.debug("\n{0}\n".format(2 * sep))

                ################################################################################
                ### Analyze Possession
                ################################################################################
                dc.showGame(gameResult)

                gameResult = ap.continuity(gameResult)
                gameResult = pcc.splitChangeOfPossession(gameResult)

                gameResult = ap.continuity(gameResult)
                gameResult = ap.returns(gameResult)
                gameResult = ap.pats(gameResult)
                gameResult = ap.endofgame(gameResult, postDriveScores)
                gameResult = ap.noplays(gameResult)
                gameResult = ap.nextplay(gameResult)
                gameResult = ap.endofdrive(gameResult)

                dc.showGame(gameResult, "Game")
                gameResult = ay.analyze(gameResult)

                gameResult = ak.kickoffs(gameResult)
                gameResult = ak.returns(gameResult)

                #gameResult = apen.penalties(gameResult)
                #gameResult = apen.isPenaltyAdditive(gameResult)

                scoreResult = ap.gamescore(gameResult, postDriveScores)

                if scoreResult is False:
                    if gameID is None:
                        self.badGames[gameIdent] = True
                        continue
                    dc.showGame(gameResult, "Score Is Not Corrent")
                dc.showGame(gameResult, "Good Game")

                totalGames += 1
                if debug:
                    self.logger.info("Found {0} plays in this game {1}".format(
                        totalPlays, gameID))

            self.logger.info("Found {0} total games for {1}".format(
                totalGames, ifile))
Beispiel #25
0
    def getMedia(self):
        amc = artistDBMediaClass()
        mediaType = "Videos"
        amc.media[mediaType] = []

        for table in self.bsdata.findAll("table"):
            trs = table.findAll("tr")
            ths = [th.text for th in table.findAll("th")]
            for tr in trs[1:]:
                td = tr.find('td')
                ref = td.find("a")
                name = td.text
                url = None
                if ref is not None:
                    url = ref.attrs['href']

                #https://kworb.net/youtube/video/fRh_vgS2dFE.html
                trackURL = "https://kworb.net/youtube/video/{0}.html".format(
                    getBaseFilename(url))

                songData = name.split(' - ')
                artistName = songData[0]
                trackName = " - ".join(songData[1:])

                removes = []
                removes = [
                    "(Official Music Video)", "(Official Lyric Video)",
                    "(Official Video (Short Version))", "(Official Video)",
                    "[Lyric Video]", "(Video Version)",
                    "[Official Music Video]", "(Official Audio)",
                    "(Shazam Version)", "(Explicit)", "(Dance Video)",
                    "(Lyric Video)", "[Official Video]",
                    "(Official Dance Video)", '(Acoustic)', '(Audio)',
                    '(Visualizer)', '(Video Commentary)', '(VEVO Footnotes)',
                    '(Choir Version)', '(Fan Lip Sync Version)', '(Trailer)',
                    '(Teaser)'
                ]
                for rmText in removes:
                    trackName = trackName.replace(rmText, "").strip()
                while trackName.find("  ") != -1:
                    trackName = trackName.replace("  ", " ")
                    if len(trackName) <= 1:
                        break

                if len(trackName.strip()) == 0:
                    continue

                code = self.dbUtils.getAlbumCode(name=trackName, url=trackURL)

                amdc = artistDBMediaDataClass(album=trackName,
                                              url=trackURL,
                                              aclass=None,
                                              aformat=None,
                                              artist=artistName,
                                              code=code,
                                              year=None)
                if amc.media.get(mediaType) is None:
                    amc.media[mediaType] = []
                amc.media[mediaType].append(amdc)

        return amc
Beispiel #26
0
def genMIDTags(albumDir, artistDir, files, args):

    retval = {
        "Track": False,
        "Album": False,
        "Title": False,
        "Multi": False,
        "Skip": False,
        "Extra": False,
        "Mix": False
    }

    artistName = getDirBasics(artistDir)[-1]
    albumName = albumDir.replace(artistDir, "")[1:]

    #print("albumName",albumName)

    j = 0
    tags = {}

    print("\t-----> Album Info: {0} / {1} \t ==> {2} Songs".format(
        artistName, albumName, len(files)))

    ifiles = []
    for jf, ifile in enumerate(files):
        results = MusicID(ifile, debug=args.debug)
        if results.skip is True:
            continue
        tags[j] = results.getInfo()
        ifiles.append(ifile)
        #pbcs[j] = pb.getPaths(ifile).getDict()
        j += 1
    nfiles = j

    fixVals = {}
    for j in range(nfiles):
        ifile = ifiles[j]
        tag = tags[j]

        trackname = getBaseFilename(ifile)

        newtags = {"TrackNo": None, "Title": None}

        tracks = guessTrackNumber(trackname)
        retval = getBestVal(tracks)
        if retval is not None:
            if retval.get('Val') is not None:
                newtags["TrackNo"] = retval["Val"]
                trackname = stripName(retval["Rep"])

        titles = guessTitle(trackname)
        retval = getBestVal(titles)
        if retval is not None:
            if retval.get('Val') is not None:
                newtags["Title"] = retval["Val"]
                trackname = stripName(retval["Rep"])

        #if tags[j]["TrackNo"] is None:
        if newtags["TrackNo"] is not None:
            if fixVals.get(ifile) is None:
                fixVals[ifile] = {}
            fixVals[ifile]["track"] = newtags["TrackNo"]

        #if tags[j]["Title"] is None:
        if args.ignoretitle is False:
            if newtags["Title"] is not None:
                if fixVals.get(ifile) is None:
                    fixVals[ifile] = {}
                fixVals[ifile]["title"] = newtags["Title"]

        if args.tryalbum:
            if fixVals.get(ifile) is None:
                fixVals[ifile] = {}
            fixVals[ifile]["album"] = albumName

    if len(fixVals) > 0:
        print("")

    for ifile in fixVals.keys():
        print("mid -f \"{0}\" ".format(ifile), end="")
        for tag, val in fixVals[ifile].items():
            print(" -{0} \"{1}\"".format(tag, val), end="")
        print("\n")

    if len(fixVals) > 0:
        print("")
Beispiel #27
0
    def parse(self, modVal, expr='< 0 Days', force=True, debug=False):
        ts = timestat("Parsing ModVal={0} Extra Files".format(modVal))  
        
        tsFiles  = timestat("Finding Files To Parse")
        newFiles = self.getArtistExtraFiles(modVal, expr, force=force)
        tsFiles.stop()

        N = len(newFiles)
        modValue = 50 if N >= 100 else 10
        if N > 0:
            tsDB = timestat("Loading ModVal={0} DB Data".format(modVal))
            dbdata   = self.getDBData(modVal, force)
            tsDB.stop()
            
        newData  = 0
        tsParse = timestat("Parsing {0} New Extra Files For ModVal={1}".format(N, modVal))
        
        for i,ifile in enumerate(newFiles):
            if (i+1) % modValue == 0 or (i+1) == N:
                print("{0: <15}Parsing {1}".format("{0}/{1}".format(i+1,N), ifile))
            artistID = getBaseFilename(ifile)
            if len(artistID.split("-")) != 2:
                print("Error with extra file: {0}".format(ifile))
                continue
                
            try:
                artistID = artistID.split("-")[0]
            except:
                print("Error with extra file: {0}".format(ifile))
                continue
                
            info     = self.artist.getData(ifile)
            
            currentKeys = []
            if dbdata.get(artistID) is not None:
                currentKeys = list(dbdata[artistID].media.media.keys())
            else:
                dbdata[artistID] = info
                newData += 1
                continue
            
            keys = list(set(list(info.media.media.keys()) + currentKeys))
            for k in keys:
                v = info.media.media.get(k)
                if v is None:
                    continue
                iVal  = {v2.code: v2 for v2 in v}
                dVal  = dbdata[artistID].media.media.get(k)
                if dVal is None:
                    Tretval = iVal
                else:
                    Tretval = {v2.code: v2 for v2 in dVal}
                    Tretval.update(iVal)
                dbdata[artistID].media.media[k] = list(Tretval.values())
            newData += 1
            
        tsParse.stop()
            
        if newData > 0:
            self.saveDBData(modVal, dbdata, newData)
            
        return newData > 0