Beispiel #1
0
    def __construct_bat_ave_csv(self, year): # pragma: no cover
        """
        int -> None
        year: int | the year for which the csv file should be constructed

        Produces a csv with lahmanIDs, corresponding batting averages, 
        and plate appearances in year year, sorted by batting average column. 
        Saves it to file. (Helper function for _calc_players)
        """
        self._check_year(year)

        # Set initial variables
        batAveCol = 'batAve' + str(year) + 'Sorted'
        
        # get series of unique playerIDs corresponding to given year
        df = pd.read_csv(Filepath.get_lahman_file("Batting"), 
                           usecols=['playerID', 'yearID', 'AB'])
        df = df[df.yearID == year]
        uniqueIDArray = Series(df.playerID.values.ravel()).unique()
             # uniqueIDArray is of type ndarray

        # Initalize progressbar
        widgets = ['     Creating batting average csv for year %s ' % year, 
            Timer(), ' ', Percentage()]
        pbar = ProgressBar(maxval=len(uniqueIDArray), widgets=widgets).start()

        # calculate batting averages, firstName, lastName, retrosheetID and plate appearances
        batAveList, plateAppearList = [], []
        firstNameList, lastNameList = [], []
        retrosheetIDList = []
        for index, lahmanID in enumerate(uniqueIDArray):
            player = Player(lahmanID, year)
            batAveList.append(player.get_bat_ave())
            plateAppearList.append(Researcher.num_plate_appearances(year, player))
            firstNameList.append(player.get_first_name())
            lastNameList.append(player.get_last_name())
            retrosheetIDList.append(player.get_retrosheet_id())
            pbar.update(index)
        pbar.finish() # kill the proress bar

        # Write the data to csv
        batAveS = Series(batAveList, name=batAveCol)
        firstNameS = Series(firstNameList, name='FirstName')
        lastNameS = Series(lastNameList, name='LastName')
        retrosheetIDS = Series(retrosheetIDList, name='RetrosheetID')
        plateAppearS = Series(plateAppearList, name='PA')
        uniqueIDS = Series(uniqueIDArray, name='lahmanID')
        df = pd.concat([uniqueIDS, firstNameS, lastNameS, 
                        retrosheetIDS, batAveS, plateAppearS], axis=1)
        df.sort(columns=batAveCol, ascending=False, inplace=True)
        df.to_csv(path_or_buf=Filepath.get_retrosheet_file(folder='persistent', 
            fileF='batAve', year=year), index=False)
def __get_dataframes_for_choose_players(**kwargs):
    """
    kwargs -> pd.DataFrame
        sN: int | Strategy Number 
        vMN: int | Virtual Machine Number
        num: int | number of unhandled accounts to return
        activeDate: datetime.date | date for which we should parse the dataframe

    Returns two pandas dataframes. One holds num of the unhandled accounts
    corresponding to strategy sN and virtual machine vMN. The second holds
    all unhandled accounts corresponding to strategy sN and virtual machine vMN
    """
    import os

    ### Type check
    assert type(kwargs['sN']) == int
    assert type(kwargs['vMN']) == int
    assert type(kwargs['num']) == int
    assert type(kwargs['activeDate']) == date

    ### Read in the minion accounts file if available
    minionPath = Filepath.get_minion_account_file(sN=kwargs['sN'],
                                                  vMN=kwargs['vMN'])
    if os.path.isfile(minionPath):
        dfPath = minionPath
    ### Otherwise get the master file
    else:
        dfPath = Filepath.get_accounts_file()
    df = pd.read_excel(dfPath, sheetname='Production')

    ### Let the user know what's up
    print "--> Getting accounts file {}".format(dfPath)

    ### Parse it down to include only what we want
    # If it's the master accounts file, only include
    # those accounts with this strategy number and virtual machine number
    if dfPath != minionPath:
        df = df[df.Strategy == kwargs['sN']][df.VM == kwargs['vMN']]

        # Only include those accounts that haven't yet been updated
    dateFormatted = __get_date_formatted_for_excel(activeDate)
    if dateFormatted in df.columns:
        df = df[pd.isnull(df[dateFormatted]
                          )]  # pd.isnull checks for NaNs (unhandled accounts)

        # Only include the columns we want
    df = df[['ID', 'Email', 'MLBPassword', 'Strategy', 'VM']]

    return df[0:kwargs['num']], df
def __get_dataframes_for_choose_players(**kwargs):
    """
    kwargs -> pd.DataFrame
        sN: int | Strategy Number 
        vMN: int | Virtual Machine Number
        num: int | number of unhandled accounts to return
        activeDate: datetime.date | date for which we should parse the dataframe

    Returns two pandas dataframes. One holds num of the unhandled accounts
    corresponding to strategy sN and virtual machine vMN. The second holds
    all unhandled accounts corresponding to strategy sN and virtual machine vMN
    """
    import os 

    ### Type check
    assert type(kwargs['sN']) == int
    assert type(kwargs['vMN']) == int
    assert type(kwargs['num']) == int
    assert type(kwargs['activeDate']) == date

    ### Read in the minion accounts file if available
    minionPath = Filepath.get_minion_account_file(
                     sN=kwargs['sN'], vMN=kwargs['vMN'])
    if os.path.isfile(minionPath):
        dfPath = minionPath
    ### Otherwise get the master file
    else:
        dfPath = Filepath.get_accounts_file()
    df = pd.read_excel( dfPath, sheetname='Production' )

    ### Let the user know what's up
    print "--> Getting accounts file {}".format(dfPath)

    ### Parse it down to include only what we want
        # If it's the master accounts file, only include 
        # those accounts with this strategy number and virtual machine number
    if dfPath != minionPath:
        df = df[df.Strategy == kwargs['sN']][df.VM == kwargs['vMN']]

        # Only include those accounts that haven't yet been updated
    dateFormatted = __get_date_formatted_for_excel(activeDate)
    if dateFormatted in df.columns:
        df = df[pd.isnull(df[dateFormatted])] # pd.isnull checks for NaNs (unhandled accounts)

        # Only include the columns we want
    df = df[['ID', 'Email', 'MLBPassword', 'Strategy', 'VM']]

    return df[0:kwargs['num']], df
Beispiel #4
0
    def fetch_retrosheet_id_from_name(self):
        """
        None -> string

        Produces retrosheet id of self from self.firstName and self.LastName 
        and potentially self.debut
        """
        # open retrosheet id file and get rows corresponding to name
        df = pd.read_csv(Filepath.get_retrosheet_file(
            folder='base', fileF='id'))
        df = df[df.FIRST == self.get_first_name()]
        df = df[df.LAST == self.get_last_name()]

        if len(df) == 0: # if no rows found, raise an exception
            raise NoPlayerException("No player found with name %s" % \
                    self.get_first_name()+ " " + self.get_last_name())
        if len(df) == 1: # if 1 row found, unique player found. return id
            return df.ID.item()

        # else len(df) > 1. If debut date given, find corresponding id. 
        # Otherwise, prompt user for debut date and find id
        i = 0
        while self.get_debut() not in df.DEBUT.values:
            if i > 0: print "\nYou mistyped. Try again"
            print "\nMultiple ids found. What was " + \
                "%s's debut date? Options:" % self.get_name()
            for debut in df.DEBUT: print debut
            self.set_debut(str(raw_input()))
            i += 1

        for debut in df.DEBUT: # find right debut date and return id
            if datetime.strptime(debut, '%m/%d/%Y') == \
                 datetime.strptime(self.debut, '%m/%d/%Y'):
                return df[df.DEBUT == debut].ID.item()
Beispiel #5
0
 def __set_lahman_id(self):
     # get lahman master csv with playerID and retroID rows
     df = pd.read_csv(Filepath.get_lahman_file("master"), 
                      usecols=["playerID", "retroID"])
     
     # Get lahmanId corresponding to self's retrosheet id
     return df[df.retroID == self.get_retrosheet_id()]['playerID'].item()
def reportUnusedPlayers(sN, vMN, activeDate):
    """
    int int -> None
    Comapares the global list "eligiblePlayers" to the listed players
    in the minion Account file for sN and vMN and logs player selection 
    rates to the log file

    For example, if eligiblePlayers = (p1, p2, p3) and 3 accounts chose p1, 
    5 accounts chose p2 and 0 accounts chose p3, then it will write:

    **** Player Selection Rates ****
    p2: 5
    p1: 3
    p3: 0
    """
    global logEligiblePlayers
    global ignorePlayers
    global playerExceptions

    ### Read in the minion accounts file
    minionPath = Filepath.get_minion_account_file(sN=sN, vMN=vMN)
    df = pd.read_excel( minionPath, sheetname='Production' )

        ### Let the user know what's up
    print "--> Reporting Player selection rates for {}".format(minionPath)

    ### Only include column with today's selections
    df = df[__get_date_formatted_for_excel(activeDate)]

    ### Compare to eligible players and construct selection counts
    playerCounts = {}
    for player in logEligiblePlayers:
        playerCounts[player] = 0
    for selection in df:
        for player in logEligiblePlayers:
            if str(player) in selection:
                playerCounts[player] += 1

    ### Organize the player counts
    sortedPlayerCounts = []
    for player, count in playerCounts.iteritems():
        sortedPlayerCounts.append((player, count))
    sortedPlayerCounts.sort(key=lambda x: x[1])

    ### Log counts to file
    logger = getLogger(activeDate=activeDate, sN=sN, vMN=vMN)

    ### Tell us what values the global variables had
    logger.info("\n\n**** logEligiblePlayers ****\n" + str(logEligiblePlayers))
    logger.info("\n\n**** ignorePlayers ****\n" + str(ignorePlayers))
    logger.info("\n\n**** playerExceptions ****\n" + str(playerExceptions))

    info = "\n\n**** Player Selection Rates ****\n"
    for player, count in sortedPlayerCounts:
        info = info + "\n --->{}: {}".format(player, count)
    logger.info(info)
def reportUnusedPlayers(sN, vMN, activeDate):
    """
    int int -> None
    Comapares the global list "eligiblePlayers" to the listed players
    in the minion Account file for sN and vMN and logs player selection 
    rates to the log file

    For example, if eligiblePlayers = (p1, p2, p3) and 3 accounts chose p1, 
    5 accounts chose p2 and 0 accounts chose p3, then it will write:

    **** Player Selection Rates ****
    p2: 5
    p1: 3
    p3: 0
    """
    global logEligiblePlayers
    global ignorePlayers
    global playerExceptions

    ### Read in the minion accounts file
    minionPath = Filepath.get_minion_account_file(sN=sN, vMN=vMN)
    df = pd.read_excel(minionPath, sheetname='Production')

    ### Let the user know what's up
    print "--> Reporting Player selection rates for {}".format(minionPath)

    ### Only include column with today's selections
    df = df[__get_date_formatted_for_excel(activeDate)]

    ### Compare to eligible players and construct selection counts
    playerCounts = {}
    for player in logEligiblePlayers:
        playerCounts[player] = 0
    for selection in df:
        for player in logEligiblePlayers:
            if str(player) in selection:
                playerCounts[player] += 1

    ### Organize the player counts
    sortedPlayerCounts = []
    for player, count in playerCounts.iteritems():
        sortedPlayerCounts.append((player, count))
    sortedPlayerCounts.sort(key=lambda x: x[1])

    ### Log counts to file
    logger = getLogger(activeDate=activeDate, sN=sN, vMN=vMN)

    ### Tell us what values the global variables had
    logger.info("\n\n**** logEligiblePlayers ****\n" + str(logEligiblePlayers))
    logger.info("\n\n**** ignorePlayers ****\n" + str(ignorePlayers))
    logger.info("\n\n**** playerExceptions ****\n" + str(playerExceptions))

    info = "\n\n**** Player Selection Rates ****\n"
    for player, count in sortedPlayerCounts:
        info = info + "\n --->{}: {}".format(player, count)
    logger.info(info)
Beispiel #8
0
    def fetch_retrosheet_id_from_lahman_ID(self):
        """
        None -> string

        Produces retrosheet id of self from self.lId
        """
        # open lahman master.csv, get right row and return id
        df = pd.read_csv(Filepath.get_lahman_file("master"), 
                usecols=['playerID', 'retroID'])
        return df[df.playerID == self.lId].retroID.item()
def get_num_accounts(sN=None, vMN=None, getRemaining=True, activeDate=None):
    """
    int int bool -> int 
       sN: Strategy Number
       vMN: virtual Machine Number
       remaining: Indicates whether or not to count a player iff he hasn't
           already been assigned to today.

    Returns the number of accounts correspodning to strategy number sN
    and virtual machine vMN. If required==True, then only returns the number
    of accounts that have yet to be assigned to
    """
    import os

    ## Type check
    assert type(sN) == int
    assert type(vMN) == int
    assert type(getRemaining) == bool

    ## Assign initial variables
    day = __get_date_formatted_for_excel(activeDate)
    minionPath = Filepath.get_minion_account_file(sN=sN, vMN=vMN)

    ## If the minion account File exists, get it
    if os.path.isfile(minionPath):
        minionDF = pd.read_excel( minionPath, 
                                  sheetname="Production")
        # If appropriate, parse out all accounts that have already been handled
        if getRemaining and (day in minionDF.columns): 
            # pd.isnull checks for NaNs
            minionDF = minionDF[pd.isnull(minionDF[day])]

    ## Otherwise get the accounts from the master accounts file
    else:    
        fullDF = pd.read_excel( Filepath.get_accounts_file(), 
                                sheetname='Production',
                                parse_cols= 'A:F' )
        minionDF = fullDF[fullDF.Strategy == sN][fullDF.VM == vMN]

    ## return the length of the dataframe
    return len(minionDF)
def get_num_accounts(sN=None, vMN=None, getRemaining=True, activeDate=None):
    """
    int int bool -> int 
       sN: Strategy Number
       vMN: virtual Machine Number
       remaining: Indicates whether or not to count a player iff he hasn't
           already been assigned to today.

    Returns the number of accounts correspodning to strategy number sN
    and virtual machine vMN. If required==True, then only returns the number
    of accounts that have yet to be assigned to
    """
    import os

    ## Type check
    assert type(sN) == int
    assert type(vMN) == int
    assert type(getRemaining) == bool

    ## Assign initial variables
    day = __get_date_formatted_for_excel(activeDate)
    minionPath = Filepath.get_minion_account_file(sN=sN, vMN=vMN)

    ## If the minion account File exists, get it
    if os.path.isfile(minionPath):
        minionDF = pd.read_excel(minionPath, sheetname="Production")
        # If appropriate, parse out all accounts that have already been handled
        if getRemaining and (day in minionDF.columns):
            # pd.isnull checks for NaNs
            minionDF = minionDF[pd.isnull(minionDF[day])]

    ## Otherwise get the accounts from the master accounts file
    else:
        fullDF = pd.read_excel(Filepath.get_accounts_file(),
                               sheetname='Production',
                               parse_cols='A:F')
        minionDF = fullDF[fullDF.Strategy == sN][fullDF.VM == vMN]

    ## return the length of the dataframe
    return len(minionDF)
def getLogger(activeDate, sN, vMN):
    ## Create logger that handles file logging
    logger = logging.getLogger()
         # handler to write to logs
    fileHandler = logging.FileHandler(Filepath.get_log_file(activeDate, sN, vMN))
    formatter = logging.Formatter('%(asctime)s %(message)s\n')
    fileHandler.setFormatter(formatter)
         # add handlers to logger
    logger.addHandler(fileHandler)

    # Set logger level
    logger.setLevel(20) # info level

    return logger
Beispiel #12
0
    def __calc__players(self, year, minPA):
        """
        int int -> ListOfTuples(player, player.bat_ave)

        Calculates the top P players with at least minPA plate appearances
        with respect to batting average in season year
        """
        self._check_year(year)

        # set initial variables
        minPA = minPA # minimum plate appearances to qualify for calculation
        players = []

        # check if the file with batting Averages for year year has
        # already been constructed, if not construct it
        if not os.path.isfile(Filepath.get_retrosheet_file(folder='persistent', 
            fileF='batAve', year=year)):
            self.__construct_bat_ave_csv(year) # pragma: no cover

        # Construct a list of the top P players
        df = DataFrame.from_csv(Filepath.get_retrosheet_file(folder='persistent', 
            fileF='batAve', year=year))
        lenPlayers, P = 0, self.get_p()
        append = players.append
        for lahmanID, firstName, lastName, retrosheetID, batAve, PA in df.itertuples():
            if lenPlayers == P: # we've got all the players
                break
            if PA >= minPA: # make sure the player has enough plate appearances
                player = Player(lahmanID, year, batAve=batAve, 
                                firstName=firstName, lastName=lastName, 
                                retrosheetID=retrosheetID)
                append(player)
                # append(Player(lahmanID, year))
                lenPlayers += 1

        return players
Beispiel #13
0
def getLogger(activeDate, sN, vMN):
    ## Create logger that handles file logging
    logger = logging.getLogger()
    # handler to write to logs
    fileHandler = logging.FileHandler(
        Filepath.get_log_file(activeDate, sN, vMN))
    formatter = logging.Formatter('%(asctime)s %(message)s\n')
    fileHandler.setFormatter(formatter)
    # add handlers to logger
    logger.addHandler(fileHandler)

    # Set logger level
    logger.setLevel(20)  # info level

    return logger
Beispiel #14
0
    def _set_bat_ave(self, year):
        """
        int -> float
        year: int | year as a 4 digit int

        Produces the season batting average of self in year year, rounded
        off to 3 decimal places
        """
        # Read in relevant columns from batting.csv
        df = pd.read_csv(Filepath.get_lahman_file("batting"), 
                         usecols=['playerID', 'yearID', 'AB', 'H'])

        # Getting batting stats for player in given year
        lId = self.get_lahman_id()
        batting_stats_df = df[df.playerID == lId][df.yearID == year]

        # Sum over all the hits and divide by the sum over all at-bats
        #   accounts for players who were traded mid season via summing
        return round(sum(batting_stats_df.H) / sum(batting_stats_df.AB), 3)
Beispiel #15
0
    def report_results(self, test=False, method=None):
        """
        bool String -> None
        test: bool | True if a test run, false otherwise
        method: int | the index of player selection method used in the simulationm

        Produces results of self.npsim in an excel file
        """
        npsim = self.get_npsim()

        ## Initalize variables
        numTopBots = 2 # number of top bot histories to report
        firstBot = npsim.get_bots()[0]
        firstBotHist = firstBot.get_history()
        firstTuple = firstBotHist[0]
        startDate = firstTuple[4]
        endDate = npsim.get_bots()[0].get_history()[-1][4]
        writer = ExcelWriter(Filepath.get_results_file(
            simYear=npsim.get_sim_year(), batAveYear=npsim.get_bat_year(), 
            N=npsim.get_n(), P=npsim.get_p(), startDate=startDate, 
            endDate=endDate, minPA=npsim.minPA, minERA=npsim.minERA, 
            selectionMethodNumber=method, doubleDown=npsim.doubleDown,
            test=test))

        ## calculate best bots
        npsim.get_bots().sort(key=lambda bot: bot.get_max_streak_length())
        npsim.get_bots().reverse()
        bestBots = npsim.get_bots()

        ## report sim metadata
        self.__report_sim_metadata_results_excel(writer, method=method)
        
        ## report results for top performing bots
        for bot in bestBots[0:numTopBots]:
            self.__report_bot_results_to_excel(bot, writer)

        ## report bots metadata
        self.__report_bots_metadata_results_excel(writer)


        ## save everthing to file
        writer.save()
Beispiel #16
0
from scan import Scan, Particleset
import numpy as np
from glob import glob
from filepath import Filepath
from os.path import join

lazfile_path = "/home/dunbar/Research/helheim/data/lazfiles"
lazfiles = glob(join(lazfile_path, "*.laz"))
lazfiles = [Filepath(file) for file in lazfiles]
lazfiles.sort(key=lambda i: i.datetime)

xbounds, ybounds = (535400.00 + 10, 536400.00 + 10), (7358200.00 + 10,
                                                      7359800 + 10)

for filepath in lazfiles:
    scan = Scan(filepath)
    orig = np.max(scan.file.points.shape)
    print(f"\n Down-sampling {scan.filepath.filepath}\n")
    points = np.squeeze(scan.bounds(xbounds, ybounds))
    shuffleinds = np.random.shuffle(np.arange(np.max(points.shape)))
    points = np.array(points)[shuffleinds]
    points = np.squeeze(points[::2].transpose())
    print(f"\n Reduction: {points.shape/orig}\n")
    scan.writefile(scan.filepath.filepath.replace(".laz", ".dslaz"), points)
Beispiel #17
0
                   sheet_name='Production') 
    writer.save()

if __name__ == '__main__':
    """
    Usage: 
        1) ./accounts.py N
           -> creates and logs N new accounts
        2) ./accounts.py num
           -> returns the number of accounts created and logged to date
    """ 
    assert len(sys.argv) == 2
    
    ## Is this a type 2 call?
    if sys.argv[-1] == 'num':
        df = pd.read_excel(Filepath.get_accounts_file())
        print "Num Accounts: {}".format(len(df)) 
    
    ## Else its a type 1 call
    else:
        numAccounts = int(sys.argv[1])
        origCount = numAccounts
        # make accounts in sets of 50 so that in case something bad happens,
        #  we dont lose e.g 1000 accounts
        blockSize = 20
        while numAccounts > 0:
            if numAccounts < blockSize:
                main(numAccounts)
                break
            else: 
                print "********** CREATING IN CHUNKS OF {}:.Completed {} of {} ***********".format(
Beispiel #18
0
import rasterio
from rasterio import mask

def bound_box(pointa,pointb):
	minx,miny = np.minimum(pointa[0],pointb[0]), np.minimum(pointa[1],pointb[1])
	maxx,maxy = np.maximum(pointa[0],pointb[0]), np.maximum(pointa[1],pointb[1])
	bbox = box(minx,miny,maxx,maxy)
	geo = gpd.GeoDataFrame({'geometry': bbox}, index=[0],crs=32624) #24N
	return [json.loads(geo.to_json())['features'][0]['geometry']]

DEM_path = "/home/dunbar/Research/helheim/data/interpolated_dem"
lazfile_path = "/home/dunbar/Research/helheim/data/lazfiles"
raster_path = "/home/dunbar/Research/helheim/data/2016_cpd_vels"
lazfiles = glob(join(lazfile_path,"*.laz"))
velorasters = glob(join(raster_path,"*.tif"))
lazfiles = [ Filepath(x) for x in lazfiles ]
lazfiles.sort(key= lambda i: i.datetime)
cpdvels = [Filepath(x) for x in velorasters]
cpdvels.sort(key = lambda i: i.datetime)
diagupp, diaglow = (535000.00,7359250.00),(537000.00,7358250.00)
boundpolygon = bound_box(diagupp,diaglow)



with rasterio.open(cpdvels[0].filepath) as src:
	outimage = np.squeeze(mask.mask(src,boundpolygon,crop=True,filled=False)[0]) #extract mask
	
eastdims = (np.minimum(diagupp[0],diaglow[0]),np.maximum(diagupp[0],diaglow[0]),outimage.shape[1])
northdims = (np.minimum(diagupp[1],diaglow[1]),np.maximum(diagupp[1],diaglow[1]),outimage.shape[0])
easting_interpolation = np.linspace(eastdims[0],eastdims[1],int(eastdims[2]))
northing_interpolation = np.linspace(northdims[0],northdims[1],int(northdims[2]))
def log_updated_accounts(updatedAccounts, sN=None, vMN=None, activeDate=None):
    """
    ListOfTuples -> None
       updatedAccounts: ListOfTuples | A list of the accounts that were
            updated in the choosePlayers function. Format: 
                (username, p1, p2) 
            where p2 and p2 are TuplesOfStrings of format 
                (firstName, lastName, teamAbbreviation)
        sN: int | "strategy number" (see strategyNumber.txt)
        vMN: int | virtual Machine Number.

    Writes info about updated accounts to minion account files
    """
    import os

    ## type check
    assert type(updatedAccounts) == list
    assert type(sN) == int
    assert type(vMN) == int
    assert type(activeDate) == date

    ## Let the user know which account file we are updating
    minionAF = Filepath.get_minion_account_file(sN=sN, vMN=vMN)
    print "--> Updating accounts file: {}".format(minionAF)

    ## If the minion spreadsheet hasn't been initalized yet, do so
    if not os.path.isfile(minionAF):
        fullDF = pd.read_excel(Filepath.get_accounts_file(),
                               sheetname='Production',
                               parse_cols='A:F')
        minionDF = fullDF[fullDF.Strategy == sN][fullDF.VM == vMN]
        minionDF.to_excel(
            minionAF,
            sheet_name='Production',
            index=False  # no extra column of row indices  
        )

    ## Get the minion spreadsheet corresponding to this sN and vMN
    dateFormatted = __get_date_formatted_for_excel(activeDate)
    minionDF = pd.read_excel(minionAF, sheetname='Production')

    ## Create the series corresponding to today's player selections
    if dateFormatted in minionDF.columns:
        accountInfoL = list(minionDF[dateFormatted])
        del minionDF[dateFormatted]
    else:
        accountInfoL = ['' for i in range(len(minionDF))]
    for account in updatedAccounts:
        accountIndex = minionDF.Email[ minionDF.Email == \
                                       account[0]].index[0]
        accountInfoL[accountIndex] = 'Done. 1: {}, 2: {}'.format(
            account[1], account[2])

    ## Put the dataframe together and print it to file
    newDF = pd.concat(
        [minionDF, pd.Series(accountInfoL, name=dateFormatted)], axis=1)
    newDF.to_excel(
        minionAF,
        sheet_name='Production',
        index=False  # no extra column for row indices
    )
Beispiel #20
0
    writer.save()


if __name__ == '__main__':
    """
    Usage: 
        1) ./accounts.py N
           -> creates and logs N new accounts
        2) ./accounts.py num
           -> returns the number of accounts created and logged to date
    """
    assert len(sys.argv) == 2

    ## Is this a type 2 call?
    if sys.argv[-1] == 'num':
        df = pd.read_excel(Filepath.get_accounts_file())
        print "Num Accounts: {}".format(len(df))

    ## Else its a type 1 call
    else:
        numAccounts = int(sys.argv[1])
        origCount = numAccounts
        # make accounts in sets of 50 so that in case something bad happens,
        #  we dont lose e.g 1000 accounts
        blockSize = 20
        while numAccounts > 0:
            if numAccounts < blockSize:
                main(numAccounts)
                break
            else:
                print "********** CREATING IN CHUNKS OF {}:.Completed {} of {} ***********".format(
Beispiel #21
0
def main(N):
    """
    int -> None

    Creates N unique beatthestreak accounts, and claims mulligans for each account.
    Stores all username and password info in btsAccounts.xlsx, sheetname "Production"

       IMPORTANT: Does not actually make email addresses for the accounts. 
    If any account gets to over 40 hits in a row, we'll go and MANUALLY
    make an email account and validate it. 
    """
    newUsernamesL = []
    newMLBPasswordsL = []
    usernameStarters = [
        'faiyam', 'rahman', 'bts', 'metro', 'williams', 'grassfed', 'daft',
        'fossil', 'water', 'earth'
    ]

    ## read in the production sheet to get the already existing accounts
    # Column A: id
    # Column B: Email
    # Column C: EmailPassword
    # Column D: MLBPassword
    df = pd.io.excel.read_excel(Filepath.get_accounts_file(),
                                sheetname='Production',
                                parse_cols='A,B,C,D')

    ## Create N new fake email addresses. We'll go and ACTUALLY make them
    ## if they reach a certain plateau streak length
    listOfEmails = list(
        df.Email)  # need a list to check if an email has already been used
    i = 0
    while (i < N):
        username = random.choice(usernameStarters) + "." + \
                    random.choice(usernameStarters) + "." + \
                    str(random.randint(1,2014000)) + '@faiyamrahman.com'
        # make sure we don't repeat an address
        if (username in listOfEmails) or (username in newUsernamesL):
            continue
        newUsernamesL.append(username)
        i += 1

    for username in newUsernamesL:
        time.sleep(5)  # give it some time to clean things up
        print "\n--> CREATING ACCOUNT NUMBER: {0} of {1}".format(
            newUsernamesL.index(username) + 1, len(newUsernamesL))

        ## Wrap this in a try except in case selenium fails us
        accountMade, mulliganClaimed = (False, False)
        attemptNum = 0
        while True:
            try:
                attemptNum += 1
                ## Create a beatthestreak account on espn and kill the browser
                if not accountMade:
                    password = '******'
                    make_espn_bts_account(username, password)
                    accountMade = True
                ## Claim the bots mulligan
                if not mulliganClaimed:
                    claim_mulligan(username, password)  # uses its own browser
                    print "--> Mulligan claimed :)"
                    mulliganClaimed = True
            except:
                print "--> Attempt {} of 5 failed".format(attemptNum)
                if attemptNum > 5:  # if we've tried this u and p more than 5 times, tell us what's up
                    raise
                # Otherwise try again
                continue
            else:
                break

        ## Hold on to the data to add to the btsAccounts excel file
        newMLBPasswordsL.append(password)

    ## add to the dataframe and replace the Production sheet
    # make sure the excel file has the column headers we expect
    assert df.columns[0] == 'ID'
    assert df.columns[1] == 'Email'
    assert df.columns[2] == 'EmailPassword'
    assert df.columns[3] == 'MLBPassword'
    # make a dataframe containing the new info
    firstID = len(df.ID)
    idL = [firstID + i for i in range(0, len(newUsernamesL))]
    # outlook 365 aliases don't have their own passwords
    newEmailPasswordsL = ['n/a' for password in newMLBPasswordsL]
    extraDF = pd.concat([
        pd.Series(idL, name='ID'),
        pd.Series(newUsernamesL, name='Email'),
        pd.Series(newEmailPasswordsL, name='EmailPassword'),
        pd.Series(newMLBPasswordsL, name='MLBPassword')
    ],
                        axis=1)
    # create a dataframe with all the info and write it to file
    newDF = pd.concat([df, extraDF])
    writer = pd.ExcelWriter(Filepath.get_accounts_file())
    newDF.to_excel(writer, index=False, sheet_name='Production')
    writer.save()
Beispiel #22
0
def main(N):
    """
    int -> None

    Creates N unique beatthestreak accounts, and claims mulligans for each account.
    Stores all username and password info in btsAccounts.xlsx, sheetname "Production"

       IMPORTANT: Does not actually make email addresses for the accounts. 
    If any account gets to over 40 hits in a row, we'll go and MANUALLY
    make an email account and validate it. 
    """
    newUsernamesL = []
    newMLBPasswordsL = []
    usernameStarters = [ 'faiyam', 'rahman', 'bts', 'metro', 'williams', 
                         'grassfed', 'daft', 'fossil', 'water', 'earth']

    ## read in the production sheet to get the already existing accounts
       # Column A: id
       # Column B: Email
       # Column C: EmailPassword
       # Column D: MLBPassword
    df = pd.io.excel.read_excel(Filepath.get_accounts_file(), 
                sheetname='Production', parse_cols='A,B,C,D')

    ## Create N new fake email addresses. We'll go and ACTUALLY make them
    ## if they reach a certain plateau streak length
    listOfEmails = list(df.Email) # need a list to check if an email has already been used
    i = 0
    while (i < N):
        username = random.choice(usernameStarters) + "." + \
                    random.choice(usernameStarters) + "." + \
                    str(random.randint(1,2014000)) + '@faiyamrahman.com'
        # make sure we don't repeat an address
        if (username in listOfEmails) or (username in newUsernamesL):
            continue
        newUsernamesL.append(username)
        i += 1

    for username in newUsernamesL:
        time.sleep(5) # give it some time to clean things up
        print "\n--> CREATING ACCOUNT NUMBER: {0} of {1}".format(newUsernamesL.index(username) + 1, len(newUsernamesL))
        
        ## Wrap this in a try except in case selenium fails us
        accountMade, mulliganClaimed = (False, False)
        attemptNum = 0
        while True:
            try:
                attemptNum += 1
                ## Create a beatthestreak account on espn and kill the browser
                if not accountMade:
                    password = '******'
                    make_espn_bts_account(username, password)
                    accountMade = True
                ## Claim the bots mulligan 
                if not mulliganClaimed:
                    claim_mulligan(username, password) # uses its own browser
                    print "--> Mulligan claimed :)"
                    mulliganClaimed = True
            except:
                print "--> Attempt {} of 5 failed".format(attemptNum)
                if attemptNum > 5: # if we've tried this u and p more than 5 times, tell us what's up
                    raise
                # Otherwise try again
                continue
            else:
                break

        

        ## Hold on to the data to add to the btsAccounts excel file
        newMLBPasswordsL.append(password)

    ## add to the dataframe and replace the Production sheet
        # make sure the excel file has the column headers we expect
    assert df.columns[0] == 'ID'
    assert df.columns[1] == 'Email'
    assert df.columns[2] == 'EmailPassword'
    assert df.columns[3] == 'MLBPassword'
        # make a dataframe containing the new info
    firstID = len(df.ID)
    idL = [firstID + i for i in range(0, len(newUsernamesL))]
            # outlook 365 aliases don't have their own passwords
    newEmailPasswordsL = ['n/a' for password in newMLBPasswordsL]
    extraDF = pd.concat([pd.Series(idL, name='ID'), 
                         pd.Series(newUsernamesL, name='Email'), 
                         pd.Series(newEmailPasswordsL, name='EmailPassword'), 
                         pd.Series(newMLBPasswordsL, name='MLBPassword')], 
                         axis=1)
        # create a dataframe with all the info and write it to file
    newDF = pd.concat([df, extraDF])
    writer = pd.ExcelWriter(Filepath.get_accounts_file())
    newDF.to_excel(writer,
                   index=False, 
                   sheet_name='Production') 
    writer.save()
def log_updated_accounts(updatedAccounts, sN=None, vMN=None, activeDate=None):
    """
    ListOfTuples -> None
       updatedAccounts: ListOfTuples | A list of the accounts that were
            updated in the choosePlayers function. Format: 
                (username, p1, p2) 
            where p2 and p2 are TuplesOfStrings of format 
                (firstName, lastName, teamAbbreviation)
        sN: int | "strategy number" (see strategyNumber.txt)
        vMN: int | virtual Machine Number.

    Writes info about updated accounts to minion account files
    """
    import os

    ## type check
    assert type(updatedAccounts) == list
    assert type(sN) == int
    assert type(vMN) == int
    assert type(activeDate) == date

    ## Let the user know which account file we are updating
    minionAF = Filepath.get_minion_account_file(sN=sN, vMN=vMN)
    print "--> Updating accounts file: {}".format(minionAF)

    ## If the minion spreadsheet hasn't been initalized yet, do so
    if not os.path.isfile(minionAF): 
        fullDF = pd.read_excel( Filepath.get_accounts_file(), 
                                sheetname='Production',
                                parse_cols= 'A:F' )
        minionDF = fullDF[fullDF.Strategy == sN][fullDF.VM == vMN]
        minionDF.to_excel( minionAF, 
                           sheet_name='Production', 
                           index=False # no extra column of row indices  
                         ) 

    ## Get the minion spreadsheet corresponding to this sN and vMN
    dateFormatted = __get_date_formatted_for_excel(activeDate)
    minionDF = pd.read_excel( minionAF, 
                              sheetname='Production' )

    ## Create the series corresponding to today's player selections
    if dateFormatted in minionDF.columns: 
        accountInfoL = list(minionDF[dateFormatted])
        del minionDF[dateFormatted]
    else:
        accountInfoL = ['' for i in range(len(minionDF))]
    for account in updatedAccounts:
        accountIndex = minionDF.Email[ minionDF.Email == \
                                       account[0]].index[0]
        accountInfoL[accountIndex] = 'Done. 1: {}, 2: {}'.format(
                                         account[1], account[2])

    ## Put the dataframe together and print it to file
    newDF = pd.concat( [minionDF, 
                        pd.Series(accountInfoL, name=dateFormatted)], 
                        axis=1)
    newDF.to_excel( minionAF, 
                    sheet_name='Production', 
                    index=False # no extra column for row indices
                  )
Beispiel #24
0
    def report_mass_results(self, **kwargs):
        """
        simYearL: List of simulation years for mass simulation
        batAveYearL: List of batting average years for mass simulation
        NL: List of N values for mass simulation
        PL: List of P values for mass simulation
        minBatAveL: list of minBatAve values for mass simulation
        numSuccessL: List of number of Successes for mass simulation
        percentSuccessL: List of percent of successes for mass simulation
        numFailL: list of number of failures for mass simulation
        percentFailL: list of percent failures for mass simulation
        simYearRange: (lowest_sim_year, highest_sim_year)
        simMinBatRange: (lowest simYear-batAveYear, highest simYear-batAveYear)
        NRange: (lowest N, highest N)
        PRange: (lowest P, highset P)

        Reports mass simulation results to excel spreadsheet
        """
        npsim = self.get_npsim()

        ## Create series corresponding to columns of csv
        simYearS = Series(kwargs['simYearL'], name='Sim Year')
        batAveYearS = Series(kwargs['batAveYearL'], name='Bat Ave Year')
        nS = Series(kwargs['NL'], name='N')
        pS = Series(kwargs['PL'], name='P')
        minBatAveS = Series(kwargs['minBatAveL'], name='Min Bat Ave')
        successesS = Series(kwargs['numSuccessL'], name='Successes')
        perSuccessS = Series(kwargs['percentSuccessL'], 
                             name='Successes (1=100%)')
        failureS = Series(kwargs['numFailL'], name='Failures')
        perFailureS = Series(kwargs['percentFailL'], name='Failures (%) (1=100%)')
        percUniqueBotsS = Series(kwargs['percUniqueBotsL'], name='(%) Unique Bots')
        minPAS = Series(kwargs['minPAL'], name='min PA')
        methodL = [self.selMethods[methodIndex] for methodIndex 
            in kwargs['methodL']]
        methodS = Series(methodL, name='Method')
        topStreakAveS = Series(kwargs['topStreakAveL'], name='Mean(topFiveStreaks)')
        oneStreakS = Series(kwargs['oneStreakL'], name='1 Streak')
        twoStreakS = Series(kwargs['twoStreakL'], name='2 Streak')
        threeStreakS = Series(kwargs['threeStreakL'], name='3 Streak')
        fourStreakS = Series(kwargs['fourStreakL'], name='4 Streak')
        fiveStreakS = Series(kwargs['fiveStreakL'], name='5 Streak')
        doubleDownS = Series(kwargs['doubleDownL'], name='DoubleDown?')
        startDateS = Series(kwargs['startDateL'], name='start date')
        endDateS = Series(kwargs['endDateL'], name='end date')
        minERAS = Series(kwargs['minERAL'], name='min ERA')

        ## construct dataframe
        df = concat([simYearS, batAveYearS, nS, pS, minPAS, minBatAveS], axis=1)
        if len(minERAS) > 0:
            df = concat([df, minERAS], axis=1) 
        df = concat([ df, successesS, perSuccessS, failureS, perFailureS, 
                      percUniqueBotsS, doubleDownS, topStreakAveS, oneStreakS, 
                      twoStreakS, threeStreakS, fourStreakS, fiveStreakS, 
                      startDateS, endDateS, methodS], axis=1)

        ## Write the info to an excel spreadsheet
        if self.test == True: # debugging code
            writer = ExcelWriter(Filepath.get_mass_results_file(
                simYearRange=kwargs['simYearRange'], 
                sMBRange=kwargs['simMinBatRange'], 
                NRange=kwargs['NRange'], PRange=kwargs['PRange'], 
                minPARange=kwargs['minPARange'], 
                minERARange=kwargs['minERARange'], 
                method=npsim.method, test=True))
        else:
            writer = ExcelWriter(Filepath.get_mass_results_file(
                simYearRange=kwargs['simYearRange'], 
                sMBRange=kwargs['simMinBatRange'], 
                NRange=kwargs['NRange'], PRange=kwargs['PRange'], 
                minPARange=kwargs['minPARange'], 
                minERARange=kwargs['minERARange'], 
                method=npsim.method))
        df.to_excel(writer, index=False, sheet_name='Meta')

        writer.save()