def __construct_bat_ave_csv(self, year): # pragma: no cover """ int -> None year: int | the year for which the csv file should be constructed Produces a csv with lahmanIDs, corresponding batting averages, and plate appearances in year year, sorted by batting average column. Saves it to file. (Helper function for _calc_players) """ self._check_year(year) # Set initial variables batAveCol = 'batAve' + str(year) + 'Sorted' # get series of unique playerIDs corresponding to given year df = pd.read_csv(Filepath.get_lahman_file("Batting"), usecols=['playerID', 'yearID', 'AB']) df = df[df.yearID == year] uniqueIDArray = Series(df.playerID.values.ravel()).unique() # uniqueIDArray is of type ndarray # Initalize progressbar widgets = [' Creating batting average csv for year %s ' % year, Timer(), ' ', Percentage()] pbar = ProgressBar(maxval=len(uniqueIDArray), widgets=widgets).start() # calculate batting averages, firstName, lastName, retrosheetID and plate appearances batAveList, plateAppearList = [], [] firstNameList, lastNameList = [], [] retrosheetIDList = [] for index, lahmanID in enumerate(uniqueIDArray): player = Player(lahmanID, year) batAveList.append(player.get_bat_ave()) plateAppearList.append(Researcher.num_plate_appearances(year, player)) firstNameList.append(player.get_first_name()) lastNameList.append(player.get_last_name()) retrosheetIDList.append(player.get_retrosheet_id()) pbar.update(index) pbar.finish() # kill the proress bar # Write the data to csv batAveS = Series(batAveList, name=batAveCol) firstNameS = Series(firstNameList, name='FirstName') lastNameS = Series(lastNameList, name='LastName') retrosheetIDS = Series(retrosheetIDList, name='RetrosheetID') plateAppearS = Series(plateAppearList, name='PA') uniqueIDS = Series(uniqueIDArray, name='lahmanID') df = pd.concat([uniqueIDS, firstNameS, lastNameS, retrosheetIDS, batAveS, plateAppearS], axis=1) df.sort(columns=batAveCol, ascending=False, inplace=True) df.to_csv(path_or_buf=Filepath.get_retrosheet_file(folder='persistent', fileF='batAve', year=year), index=False)
def __get_dataframes_for_choose_players(**kwargs): """ kwargs -> pd.DataFrame sN: int | Strategy Number vMN: int | Virtual Machine Number num: int | number of unhandled accounts to return activeDate: datetime.date | date for which we should parse the dataframe Returns two pandas dataframes. One holds num of the unhandled accounts corresponding to strategy sN and virtual machine vMN. The second holds all unhandled accounts corresponding to strategy sN and virtual machine vMN """ import os ### Type check assert type(kwargs['sN']) == int assert type(kwargs['vMN']) == int assert type(kwargs['num']) == int assert type(kwargs['activeDate']) == date ### Read in the minion accounts file if available minionPath = Filepath.get_minion_account_file(sN=kwargs['sN'], vMN=kwargs['vMN']) if os.path.isfile(minionPath): dfPath = minionPath ### Otherwise get the master file else: dfPath = Filepath.get_accounts_file() df = pd.read_excel(dfPath, sheetname='Production') ### Let the user know what's up print "--> Getting accounts file {}".format(dfPath) ### Parse it down to include only what we want # If it's the master accounts file, only include # those accounts with this strategy number and virtual machine number if dfPath != minionPath: df = df[df.Strategy == kwargs['sN']][df.VM == kwargs['vMN']] # Only include those accounts that haven't yet been updated dateFormatted = __get_date_formatted_for_excel(activeDate) if dateFormatted in df.columns: df = df[pd.isnull(df[dateFormatted] )] # pd.isnull checks for NaNs (unhandled accounts) # Only include the columns we want df = df[['ID', 'Email', 'MLBPassword', 'Strategy', 'VM']] return df[0:kwargs['num']], df
def __get_dataframes_for_choose_players(**kwargs): """ kwargs -> pd.DataFrame sN: int | Strategy Number vMN: int | Virtual Machine Number num: int | number of unhandled accounts to return activeDate: datetime.date | date for which we should parse the dataframe Returns two pandas dataframes. One holds num of the unhandled accounts corresponding to strategy sN and virtual machine vMN. The second holds all unhandled accounts corresponding to strategy sN and virtual machine vMN """ import os ### Type check assert type(kwargs['sN']) == int assert type(kwargs['vMN']) == int assert type(kwargs['num']) == int assert type(kwargs['activeDate']) == date ### Read in the minion accounts file if available minionPath = Filepath.get_minion_account_file( sN=kwargs['sN'], vMN=kwargs['vMN']) if os.path.isfile(minionPath): dfPath = minionPath ### Otherwise get the master file else: dfPath = Filepath.get_accounts_file() df = pd.read_excel( dfPath, sheetname='Production' ) ### Let the user know what's up print "--> Getting accounts file {}".format(dfPath) ### Parse it down to include only what we want # If it's the master accounts file, only include # those accounts with this strategy number and virtual machine number if dfPath != minionPath: df = df[df.Strategy == kwargs['sN']][df.VM == kwargs['vMN']] # Only include those accounts that haven't yet been updated dateFormatted = __get_date_formatted_for_excel(activeDate) if dateFormatted in df.columns: df = df[pd.isnull(df[dateFormatted])] # pd.isnull checks for NaNs (unhandled accounts) # Only include the columns we want df = df[['ID', 'Email', 'MLBPassword', 'Strategy', 'VM']] return df[0:kwargs['num']], df
def fetch_retrosheet_id_from_name(self): """ None -> string Produces retrosheet id of self from self.firstName and self.LastName and potentially self.debut """ # open retrosheet id file and get rows corresponding to name df = pd.read_csv(Filepath.get_retrosheet_file( folder='base', fileF='id')) df = df[df.FIRST == self.get_first_name()] df = df[df.LAST == self.get_last_name()] if len(df) == 0: # if no rows found, raise an exception raise NoPlayerException("No player found with name %s" % \ self.get_first_name()+ " " + self.get_last_name()) if len(df) == 1: # if 1 row found, unique player found. return id return df.ID.item() # else len(df) > 1. If debut date given, find corresponding id. # Otherwise, prompt user for debut date and find id i = 0 while self.get_debut() not in df.DEBUT.values: if i > 0: print "\nYou mistyped. Try again" print "\nMultiple ids found. What was " + \ "%s's debut date? Options:" % self.get_name() for debut in df.DEBUT: print debut self.set_debut(str(raw_input())) i += 1 for debut in df.DEBUT: # find right debut date and return id if datetime.strptime(debut, '%m/%d/%Y') == \ datetime.strptime(self.debut, '%m/%d/%Y'): return df[df.DEBUT == debut].ID.item()
def __set_lahman_id(self): # get lahman master csv with playerID and retroID rows df = pd.read_csv(Filepath.get_lahman_file("master"), usecols=["playerID", "retroID"]) # Get lahmanId corresponding to self's retrosheet id return df[df.retroID == self.get_retrosheet_id()]['playerID'].item()
def reportUnusedPlayers(sN, vMN, activeDate): """ int int -> None Comapares the global list "eligiblePlayers" to the listed players in the minion Account file for sN and vMN and logs player selection rates to the log file For example, if eligiblePlayers = (p1, p2, p3) and 3 accounts chose p1, 5 accounts chose p2 and 0 accounts chose p3, then it will write: **** Player Selection Rates **** p2: 5 p1: 3 p3: 0 """ global logEligiblePlayers global ignorePlayers global playerExceptions ### Read in the minion accounts file minionPath = Filepath.get_minion_account_file(sN=sN, vMN=vMN) df = pd.read_excel( minionPath, sheetname='Production' ) ### Let the user know what's up print "--> Reporting Player selection rates for {}".format(minionPath) ### Only include column with today's selections df = df[__get_date_formatted_for_excel(activeDate)] ### Compare to eligible players and construct selection counts playerCounts = {} for player in logEligiblePlayers: playerCounts[player] = 0 for selection in df: for player in logEligiblePlayers: if str(player) in selection: playerCounts[player] += 1 ### Organize the player counts sortedPlayerCounts = [] for player, count in playerCounts.iteritems(): sortedPlayerCounts.append((player, count)) sortedPlayerCounts.sort(key=lambda x: x[1]) ### Log counts to file logger = getLogger(activeDate=activeDate, sN=sN, vMN=vMN) ### Tell us what values the global variables had logger.info("\n\n**** logEligiblePlayers ****\n" + str(logEligiblePlayers)) logger.info("\n\n**** ignorePlayers ****\n" + str(ignorePlayers)) logger.info("\n\n**** playerExceptions ****\n" + str(playerExceptions)) info = "\n\n**** Player Selection Rates ****\n" for player, count in sortedPlayerCounts: info = info + "\n --->{}: {}".format(player, count) logger.info(info)
def reportUnusedPlayers(sN, vMN, activeDate): """ int int -> None Comapares the global list "eligiblePlayers" to the listed players in the minion Account file for sN and vMN and logs player selection rates to the log file For example, if eligiblePlayers = (p1, p2, p3) and 3 accounts chose p1, 5 accounts chose p2 and 0 accounts chose p3, then it will write: **** Player Selection Rates **** p2: 5 p1: 3 p3: 0 """ global logEligiblePlayers global ignorePlayers global playerExceptions ### Read in the minion accounts file minionPath = Filepath.get_minion_account_file(sN=sN, vMN=vMN) df = pd.read_excel(minionPath, sheetname='Production') ### Let the user know what's up print "--> Reporting Player selection rates for {}".format(minionPath) ### Only include column with today's selections df = df[__get_date_formatted_for_excel(activeDate)] ### Compare to eligible players and construct selection counts playerCounts = {} for player in logEligiblePlayers: playerCounts[player] = 0 for selection in df: for player in logEligiblePlayers: if str(player) in selection: playerCounts[player] += 1 ### Organize the player counts sortedPlayerCounts = [] for player, count in playerCounts.iteritems(): sortedPlayerCounts.append((player, count)) sortedPlayerCounts.sort(key=lambda x: x[1]) ### Log counts to file logger = getLogger(activeDate=activeDate, sN=sN, vMN=vMN) ### Tell us what values the global variables had logger.info("\n\n**** logEligiblePlayers ****\n" + str(logEligiblePlayers)) logger.info("\n\n**** ignorePlayers ****\n" + str(ignorePlayers)) logger.info("\n\n**** playerExceptions ****\n" + str(playerExceptions)) info = "\n\n**** Player Selection Rates ****\n" for player, count in sortedPlayerCounts: info = info + "\n --->{}: {}".format(player, count) logger.info(info)
def fetch_retrosheet_id_from_lahman_ID(self): """ None -> string Produces retrosheet id of self from self.lId """ # open lahman master.csv, get right row and return id df = pd.read_csv(Filepath.get_lahman_file("master"), usecols=['playerID', 'retroID']) return df[df.playerID == self.lId].retroID.item()
def get_num_accounts(sN=None, vMN=None, getRemaining=True, activeDate=None): """ int int bool -> int sN: Strategy Number vMN: virtual Machine Number remaining: Indicates whether or not to count a player iff he hasn't already been assigned to today. Returns the number of accounts correspodning to strategy number sN and virtual machine vMN. If required==True, then only returns the number of accounts that have yet to be assigned to """ import os ## Type check assert type(sN) == int assert type(vMN) == int assert type(getRemaining) == bool ## Assign initial variables day = __get_date_formatted_for_excel(activeDate) minionPath = Filepath.get_minion_account_file(sN=sN, vMN=vMN) ## If the minion account File exists, get it if os.path.isfile(minionPath): minionDF = pd.read_excel( minionPath, sheetname="Production") # If appropriate, parse out all accounts that have already been handled if getRemaining and (day in minionDF.columns): # pd.isnull checks for NaNs minionDF = minionDF[pd.isnull(minionDF[day])] ## Otherwise get the accounts from the master accounts file else: fullDF = pd.read_excel( Filepath.get_accounts_file(), sheetname='Production', parse_cols= 'A:F' ) minionDF = fullDF[fullDF.Strategy == sN][fullDF.VM == vMN] ## return the length of the dataframe return len(minionDF)
def get_num_accounts(sN=None, vMN=None, getRemaining=True, activeDate=None): """ int int bool -> int sN: Strategy Number vMN: virtual Machine Number remaining: Indicates whether or not to count a player iff he hasn't already been assigned to today. Returns the number of accounts correspodning to strategy number sN and virtual machine vMN. If required==True, then only returns the number of accounts that have yet to be assigned to """ import os ## Type check assert type(sN) == int assert type(vMN) == int assert type(getRemaining) == bool ## Assign initial variables day = __get_date_formatted_for_excel(activeDate) minionPath = Filepath.get_minion_account_file(sN=sN, vMN=vMN) ## If the minion account File exists, get it if os.path.isfile(minionPath): minionDF = pd.read_excel(minionPath, sheetname="Production") # If appropriate, parse out all accounts that have already been handled if getRemaining and (day in minionDF.columns): # pd.isnull checks for NaNs minionDF = minionDF[pd.isnull(minionDF[day])] ## Otherwise get the accounts from the master accounts file else: fullDF = pd.read_excel(Filepath.get_accounts_file(), sheetname='Production', parse_cols='A:F') minionDF = fullDF[fullDF.Strategy == sN][fullDF.VM == vMN] ## return the length of the dataframe return len(minionDF)
def getLogger(activeDate, sN, vMN): ## Create logger that handles file logging logger = logging.getLogger() # handler to write to logs fileHandler = logging.FileHandler(Filepath.get_log_file(activeDate, sN, vMN)) formatter = logging.Formatter('%(asctime)s %(message)s\n') fileHandler.setFormatter(formatter) # add handlers to logger logger.addHandler(fileHandler) # Set logger level logger.setLevel(20) # info level return logger
def __calc__players(self, year, minPA): """ int int -> ListOfTuples(player, player.bat_ave) Calculates the top P players with at least minPA plate appearances with respect to batting average in season year """ self._check_year(year) # set initial variables minPA = minPA # minimum plate appearances to qualify for calculation players = [] # check if the file with batting Averages for year year has # already been constructed, if not construct it if not os.path.isfile(Filepath.get_retrosheet_file(folder='persistent', fileF='batAve', year=year)): self.__construct_bat_ave_csv(year) # pragma: no cover # Construct a list of the top P players df = DataFrame.from_csv(Filepath.get_retrosheet_file(folder='persistent', fileF='batAve', year=year)) lenPlayers, P = 0, self.get_p() append = players.append for lahmanID, firstName, lastName, retrosheetID, batAve, PA in df.itertuples(): if lenPlayers == P: # we've got all the players break if PA >= minPA: # make sure the player has enough plate appearances player = Player(lahmanID, year, batAve=batAve, firstName=firstName, lastName=lastName, retrosheetID=retrosheetID) append(player) # append(Player(lahmanID, year)) lenPlayers += 1 return players
def getLogger(activeDate, sN, vMN): ## Create logger that handles file logging logger = logging.getLogger() # handler to write to logs fileHandler = logging.FileHandler( Filepath.get_log_file(activeDate, sN, vMN)) formatter = logging.Formatter('%(asctime)s %(message)s\n') fileHandler.setFormatter(formatter) # add handlers to logger logger.addHandler(fileHandler) # Set logger level logger.setLevel(20) # info level return logger
def _set_bat_ave(self, year): """ int -> float year: int | year as a 4 digit int Produces the season batting average of self in year year, rounded off to 3 decimal places """ # Read in relevant columns from batting.csv df = pd.read_csv(Filepath.get_lahman_file("batting"), usecols=['playerID', 'yearID', 'AB', 'H']) # Getting batting stats for player in given year lId = self.get_lahman_id() batting_stats_df = df[df.playerID == lId][df.yearID == year] # Sum over all the hits and divide by the sum over all at-bats # accounts for players who were traded mid season via summing return round(sum(batting_stats_df.H) / sum(batting_stats_df.AB), 3)
def report_results(self, test=False, method=None): """ bool String -> None test: bool | True if a test run, false otherwise method: int | the index of player selection method used in the simulationm Produces results of self.npsim in an excel file """ npsim = self.get_npsim() ## Initalize variables numTopBots = 2 # number of top bot histories to report firstBot = npsim.get_bots()[0] firstBotHist = firstBot.get_history() firstTuple = firstBotHist[0] startDate = firstTuple[4] endDate = npsim.get_bots()[0].get_history()[-1][4] writer = ExcelWriter(Filepath.get_results_file( simYear=npsim.get_sim_year(), batAveYear=npsim.get_bat_year(), N=npsim.get_n(), P=npsim.get_p(), startDate=startDate, endDate=endDate, minPA=npsim.minPA, minERA=npsim.minERA, selectionMethodNumber=method, doubleDown=npsim.doubleDown, test=test)) ## calculate best bots npsim.get_bots().sort(key=lambda bot: bot.get_max_streak_length()) npsim.get_bots().reverse() bestBots = npsim.get_bots() ## report sim metadata self.__report_sim_metadata_results_excel(writer, method=method) ## report results for top performing bots for bot in bestBots[0:numTopBots]: self.__report_bot_results_to_excel(bot, writer) ## report bots metadata self.__report_bots_metadata_results_excel(writer) ## save everthing to file writer.save()
from scan import Scan, Particleset import numpy as np from glob import glob from filepath import Filepath from os.path import join lazfile_path = "/home/dunbar/Research/helheim/data/lazfiles" lazfiles = glob(join(lazfile_path, "*.laz")) lazfiles = [Filepath(file) for file in lazfiles] lazfiles.sort(key=lambda i: i.datetime) xbounds, ybounds = (535400.00 + 10, 536400.00 + 10), (7358200.00 + 10, 7359800 + 10) for filepath in lazfiles: scan = Scan(filepath) orig = np.max(scan.file.points.shape) print(f"\n Down-sampling {scan.filepath.filepath}\n") points = np.squeeze(scan.bounds(xbounds, ybounds)) shuffleinds = np.random.shuffle(np.arange(np.max(points.shape))) points = np.array(points)[shuffleinds] points = np.squeeze(points[::2].transpose()) print(f"\n Reduction: {points.shape/orig}\n") scan.writefile(scan.filepath.filepath.replace(".laz", ".dslaz"), points)
sheet_name='Production') writer.save() if __name__ == '__main__': """ Usage: 1) ./accounts.py N -> creates and logs N new accounts 2) ./accounts.py num -> returns the number of accounts created and logged to date """ assert len(sys.argv) == 2 ## Is this a type 2 call? if sys.argv[-1] == 'num': df = pd.read_excel(Filepath.get_accounts_file()) print "Num Accounts: {}".format(len(df)) ## Else its a type 1 call else: numAccounts = int(sys.argv[1]) origCount = numAccounts # make accounts in sets of 50 so that in case something bad happens, # we dont lose e.g 1000 accounts blockSize = 20 while numAccounts > 0: if numAccounts < blockSize: main(numAccounts) break else: print "********** CREATING IN CHUNKS OF {}:.Completed {} of {} ***********".format(
import rasterio from rasterio import mask def bound_box(pointa,pointb): minx,miny = np.minimum(pointa[0],pointb[0]), np.minimum(pointa[1],pointb[1]) maxx,maxy = np.maximum(pointa[0],pointb[0]), np.maximum(pointa[1],pointb[1]) bbox = box(minx,miny,maxx,maxy) geo = gpd.GeoDataFrame({'geometry': bbox}, index=[0],crs=32624) #24N return [json.loads(geo.to_json())['features'][0]['geometry']] DEM_path = "/home/dunbar/Research/helheim/data/interpolated_dem" lazfile_path = "/home/dunbar/Research/helheim/data/lazfiles" raster_path = "/home/dunbar/Research/helheim/data/2016_cpd_vels" lazfiles = glob(join(lazfile_path,"*.laz")) velorasters = glob(join(raster_path,"*.tif")) lazfiles = [ Filepath(x) for x in lazfiles ] lazfiles.sort(key= lambda i: i.datetime) cpdvels = [Filepath(x) for x in velorasters] cpdvels.sort(key = lambda i: i.datetime) diagupp, diaglow = (535000.00,7359250.00),(537000.00,7358250.00) boundpolygon = bound_box(diagupp,diaglow) with rasterio.open(cpdvels[0].filepath) as src: outimage = np.squeeze(mask.mask(src,boundpolygon,crop=True,filled=False)[0]) #extract mask eastdims = (np.minimum(diagupp[0],diaglow[0]),np.maximum(diagupp[0],diaglow[0]),outimage.shape[1]) northdims = (np.minimum(diagupp[1],diaglow[1]),np.maximum(diagupp[1],diaglow[1]),outimage.shape[0]) easting_interpolation = np.linspace(eastdims[0],eastdims[1],int(eastdims[2])) northing_interpolation = np.linspace(northdims[0],northdims[1],int(northdims[2]))
def log_updated_accounts(updatedAccounts, sN=None, vMN=None, activeDate=None): """ ListOfTuples -> None updatedAccounts: ListOfTuples | A list of the accounts that were updated in the choosePlayers function. Format: (username, p1, p2) where p2 and p2 are TuplesOfStrings of format (firstName, lastName, teamAbbreviation) sN: int | "strategy number" (see strategyNumber.txt) vMN: int | virtual Machine Number. Writes info about updated accounts to minion account files """ import os ## type check assert type(updatedAccounts) == list assert type(sN) == int assert type(vMN) == int assert type(activeDate) == date ## Let the user know which account file we are updating minionAF = Filepath.get_minion_account_file(sN=sN, vMN=vMN) print "--> Updating accounts file: {}".format(minionAF) ## If the minion spreadsheet hasn't been initalized yet, do so if not os.path.isfile(minionAF): fullDF = pd.read_excel(Filepath.get_accounts_file(), sheetname='Production', parse_cols='A:F') minionDF = fullDF[fullDF.Strategy == sN][fullDF.VM == vMN] minionDF.to_excel( minionAF, sheet_name='Production', index=False # no extra column of row indices ) ## Get the minion spreadsheet corresponding to this sN and vMN dateFormatted = __get_date_formatted_for_excel(activeDate) minionDF = pd.read_excel(minionAF, sheetname='Production') ## Create the series corresponding to today's player selections if dateFormatted in minionDF.columns: accountInfoL = list(minionDF[dateFormatted]) del minionDF[dateFormatted] else: accountInfoL = ['' for i in range(len(minionDF))] for account in updatedAccounts: accountIndex = minionDF.Email[ minionDF.Email == \ account[0]].index[0] accountInfoL[accountIndex] = 'Done. 1: {}, 2: {}'.format( account[1], account[2]) ## Put the dataframe together and print it to file newDF = pd.concat( [minionDF, pd.Series(accountInfoL, name=dateFormatted)], axis=1) newDF.to_excel( minionAF, sheet_name='Production', index=False # no extra column for row indices )
writer.save() if __name__ == '__main__': """ Usage: 1) ./accounts.py N -> creates and logs N new accounts 2) ./accounts.py num -> returns the number of accounts created and logged to date """ assert len(sys.argv) == 2 ## Is this a type 2 call? if sys.argv[-1] == 'num': df = pd.read_excel(Filepath.get_accounts_file()) print "Num Accounts: {}".format(len(df)) ## Else its a type 1 call else: numAccounts = int(sys.argv[1]) origCount = numAccounts # make accounts in sets of 50 so that in case something bad happens, # we dont lose e.g 1000 accounts blockSize = 20 while numAccounts > 0: if numAccounts < blockSize: main(numAccounts) break else: print "********** CREATING IN CHUNKS OF {}:.Completed {} of {} ***********".format(
def main(N): """ int -> None Creates N unique beatthestreak accounts, and claims mulligans for each account. Stores all username and password info in btsAccounts.xlsx, sheetname "Production" IMPORTANT: Does not actually make email addresses for the accounts. If any account gets to over 40 hits in a row, we'll go and MANUALLY make an email account and validate it. """ newUsernamesL = [] newMLBPasswordsL = [] usernameStarters = [ 'faiyam', 'rahman', 'bts', 'metro', 'williams', 'grassfed', 'daft', 'fossil', 'water', 'earth' ] ## read in the production sheet to get the already existing accounts # Column A: id # Column B: Email # Column C: EmailPassword # Column D: MLBPassword df = pd.io.excel.read_excel(Filepath.get_accounts_file(), sheetname='Production', parse_cols='A,B,C,D') ## Create N new fake email addresses. We'll go and ACTUALLY make them ## if they reach a certain plateau streak length listOfEmails = list( df.Email) # need a list to check if an email has already been used i = 0 while (i < N): username = random.choice(usernameStarters) + "." + \ random.choice(usernameStarters) + "." + \ str(random.randint(1,2014000)) + '@faiyamrahman.com' # make sure we don't repeat an address if (username in listOfEmails) or (username in newUsernamesL): continue newUsernamesL.append(username) i += 1 for username in newUsernamesL: time.sleep(5) # give it some time to clean things up print "\n--> CREATING ACCOUNT NUMBER: {0} of {1}".format( newUsernamesL.index(username) + 1, len(newUsernamesL)) ## Wrap this in a try except in case selenium fails us accountMade, mulliganClaimed = (False, False) attemptNum = 0 while True: try: attemptNum += 1 ## Create a beatthestreak account on espn and kill the browser if not accountMade: password = '******' make_espn_bts_account(username, password) accountMade = True ## Claim the bots mulligan if not mulliganClaimed: claim_mulligan(username, password) # uses its own browser print "--> Mulligan claimed :)" mulliganClaimed = True except: print "--> Attempt {} of 5 failed".format(attemptNum) if attemptNum > 5: # if we've tried this u and p more than 5 times, tell us what's up raise # Otherwise try again continue else: break ## Hold on to the data to add to the btsAccounts excel file newMLBPasswordsL.append(password) ## add to the dataframe and replace the Production sheet # make sure the excel file has the column headers we expect assert df.columns[0] == 'ID' assert df.columns[1] == 'Email' assert df.columns[2] == 'EmailPassword' assert df.columns[3] == 'MLBPassword' # make a dataframe containing the new info firstID = len(df.ID) idL = [firstID + i for i in range(0, len(newUsernamesL))] # outlook 365 aliases don't have their own passwords newEmailPasswordsL = ['n/a' for password in newMLBPasswordsL] extraDF = pd.concat([ pd.Series(idL, name='ID'), pd.Series(newUsernamesL, name='Email'), pd.Series(newEmailPasswordsL, name='EmailPassword'), pd.Series(newMLBPasswordsL, name='MLBPassword') ], axis=1) # create a dataframe with all the info and write it to file newDF = pd.concat([df, extraDF]) writer = pd.ExcelWriter(Filepath.get_accounts_file()) newDF.to_excel(writer, index=False, sheet_name='Production') writer.save()
def main(N): """ int -> None Creates N unique beatthestreak accounts, and claims mulligans for each account. Stores all username and password info in btsAccounts.xlsx, sheetname "Production" IMPORTANT: Does not actually make email addresses for the accounts. If any account gets to over 40 hits in a row, we'll go and MANUALLY make an email account and validate it. """ newUsernamesL = [] newMLBPasswordsL = [] usernameStarters = [ 'faiyam', 'rahman', 'bts', 'metro', 'williams', 'grassfed', 'daft', 'fossil', 'water', 'earth'] ## read in the production sheet to get the already existing accounts # Column A: id # Column B: Email # Column C: EmailPassword # Column D: MLBPassword df = pd.io.excel.read_excel(Filepath.get_accounts_file(), sheetname='Production', parse_cols='A,B,C,D') ## Create N new fake email addresses. We'll go and ACTUALLY make them ## if they reach a certain plateau streak length listOfEmails = list(df.Email) # need a list to check if an email has already been used i = 0 while (i < N): username = random.choice(usernameStarters) + "." + \ random.choice(usernameStarters) + "." + \ str(random.randint(1,2014000)) + '@faiyamrahman.com' # make sure we don't repeat an address if (username in listOfEmails) or (username in newUsernamesL): continue newUsernamesL.append(username) i += 1 for username in newUsernamesL: time.sleep(5) # give it some time to clean things up print "\n--> CREATING ACCOUNT NUMBER: {0} of {1}".format(newUsernamesL.index(username) + 1, len(newUsernamesL)) ## Wrap this in a try except in case selenium fails us accountMade, mulliganClaimed = (False, False) attemptNum = 0 while True: try: attemptNum += 1 ## Create a beatthestreak account on espn and kill the browser if not accountMade: password = '******' make_espn_bts_account(username, password) accountMade = True ## Claim the bots mulligan if not mulliganClaimed: claim_mulligan(username, password) # uses its own browser print "--> Mulligan claimed :)" mulliganClaimed = True except: print "--> Attempt {} of 5 failed".format(attemptNum) if attemptNum > 5: # if we've tried this u and p more than 5 times, tell us what's up raise # Otherwise try again continue else: break ## Hold on to the data to add to the btsAccounts excel file newMLBPasswordsL.append(password) ## add to the dataframe and replace the Production sheet # make sure the excel file has the column headers we expect assert df.columns[0] == 'ID' assert df.columns[1] == 'Email' assert df.columns[2] == 'EmailPassword' assert df.columns[3] == 'MLBPassword' # make a dataframe containing the new info firstID = len(df.ID) idL = [firstID + i for i in range(0, len(newUsernamesL))] # outlook 365 aliases don't have their own passwords newEmailPasswordsL = ['n/a' for password in newMLBPasswordsL] extraDF = pd.concat([pd.Series(idL, name='ID'), pd.Series(newUsernamesL, name='Email'), pd.Series(newEmailPasswordsL, name='EmailPassword'), pd.Series(newMLBPasswordsL, name='MLBPassword')], axis=1) # create a dataframe with all the info and write it to file newDF = pd.concat([df, extraDF]) writer = pd.ExcelWriter(Filepath.get_accounts_file()) newDF.to_excel(writer, index=False, sheet_name='Production') writer.save()
def log_updated_accounts(updatedAccounts, sN=None, vMN=None, activeDate=None): """ ListOfTuples -> None updatedAccounts: ListOfTuples | A list of the accounts that were updated in the choosePlayers function. Format: (username, p1, p2) where p2 and p2 are TuplesOfStrings of format (firstName, lastName, teamAbbreviation) sN: int | "strategy number" (see strategyNumber.txt) vMN: int | virtual Machine Number. Writes info about updated accounts to minion account files """ import os ## type check assert type(updatedAccounts) == list assert type(sN) == int assert type(vMN) == int assert type(activeDate) == date ## Let the user know which account file we are updating minionAF = Filepath.get_minion_account_file(sN=sN, vMN=vMN) print "--> Updating accounts file: {}".format(minionAF) ## If the minion spreadsheet hasn't been initalized yet, do so if not os.path.isfile(minionAF): fullDF = pd.read_excel( Filepath.get_accounts_file(), sheetname='Production', parse_cols= 'A:F' ) minionDF = fullDF[fullDF.Strategy == sN][fullDF.VM == vMN] minionDF.to_excel( minionAF, sheet_name='Production', index=False # no extra column of row indices ) ## Get the minion spreadsheet corresponding to this sN and vMN dateFormatted = __get_date_formatted_for_excel(activeDate) minionDF = pd.read_excel( minionAF, sheetname='Production' ) ## Create the series corresponding to today's player selections if dateFormatted in minionDF.columns: accountInfoL = list(minionDF[dateFormatted]) del minionDF[dateFormatted] else: accountInfoL = ['' for i in range(len(minionDF))] for account in updatedAccounts: accountIndex = minionDF.Email[ minionDF.Email == \ account[0]].index[0] accountInfoL[accountIndex] = 'Done. 1: {}, 2: {}'.format( account[1], account[2]) ## Put the dataframe together and print it to file newDF = pd.concat( [minionDF, pd.Series(accountInfoL, name=dateFormatted)], axis=1) newDF.to_excel( minionAF, sheet_name='Production', index=False # no extra column for row indices )
def report_mass_results(self, **kwargs): """ simYearL: List of simulation years for mass simulation batAveYearL: List of batting average years for mass simulation NL: List of N values for mass simulation PL: List of P values for mass simulation minBatAveL: list of minBatAve values for mass simulation numSuccessL: List of number of Successes for mass simulation percentSuccessL: List of percent of successes for mass simulation numFailL: list of number of failures for mass simulation percentFailL: list of percent failures for mass simulation simYearRange: (lowest_sim_year, highest_sim_year) simMinBatRange: (lowest simYear-batAveYear, highest simYear-batAveYear) NRange: (lowest N, highest N) PRange: (lowest P, highset P) Reports mass simulation results to excel spreadsheet """ npsim = self.get_npsim() ## Create series corresponding to columns of csv simYearS = Series(kwargs['simYearL'], name='Sim Year') batAveYearS = Series(kwargs['batAveYearL'], name='Bat Ave Year') nS = Series(kwargs['NL'], name='N') pS = Series(kwargs['PL'], name='P') minBatAveS = Series(kwargs['minBatAveL'], name='Min Bat Ave') successesS = Series(kwargs['numSuccessL'], name='Successes') perSuccessS = Series(kwargs['percentSuccessL'], name='Successes (1=100%)') failureS = Series(kwargs['numFailL'], name='Failures') perFailureS = Series(kwargs['percentFailL'], name='Failures (%) (1=100%)') percUniqueBotsS = Series(kwargs['percUniqueBotsL'], name='(%) Unique Bots') minPAS = Series(kwargs['minPAL'], name='min PA') methodL = [self.selMethods[methodIndex] for methodIndex in kwargs['methodL']] methodS = Series(methodL, name='Method') topStreakAveS = Series(kwargs['topStreakAveL'], name='Mean(topFiveStreaks)') oneStreakS = Series(kwargs['oneStreakL'], name='1 Streak') twoStreakS = Series(kwargs['twoStreakL'], name='2 Streak') threeStreakS = Series(kwargs['threeStreakL'], name='3 Streak') fourStreakS = Series(kwargs['fourStreakL'], name='4 Streak') fiveStreakS = Series(kwargs['fiveStreakL'], name='5 Streak') doubleDownS = Series(kwargs['doubleDownL'], name='DoubleDown?') startDateS = Series(kwargs['startDateL'], name='start date') endDateS = Series(kwargs['endDateL'], name='end date') minERAS = Series(kwargs['minERAL'], name='min ERA') ## construct dataframe df = concat([simYearS, batAveYearS, nS, pS, minPAS, minBatAveS], axis=1) if len(minERAS) > 0: df = concat([df, minERAS], axis=1) df = concat([ df, successesS, perSuccessS, failureS, perFailureS, percUniqueBotsS, doubleDownS, topStreakAveS, oneStreakS, twoStreakS, threeStreakS, fourStreakS, fiveStreakS, startDateS, endDateS, methodS], axis=1) ## Write the info to an excel spreadsheet if self.test == True: # debugging code writer = ExcelWriter(Filepath.get_mass_results_file( simYearRange=kwargs['simYearRange'], sMBRange=kwargs['simMinBatRange'], NRange=kwargs['NRange'], PRange=kwargs['PRange'], minPARange=kwargs['minPARange'], minERARange=kwargs['minERARange'], method=npsim.method, test=True)) else: writer = ExcelWriter(Filepath.get_mass_results_file( simYearRange=kwargs['simYearRange'], sMBRange=kwargs['simMinBatRange'], NRange=kwargs['NRange'], PRange=kwargs['PRange'], minPARange=kwargs['minPARange'], minERARange=kwargs['minERARange'], method=npsim.method)) df.to_excel(writer, index=False, sheet_name='Meta') writer.save()