예제 #1
0
def main():
    #Record what time we start, figure out how many pages we're going to scrape,
    #and set the pool to run with 8 workers.
    start = time.time()
    pageCount = getPageCount()
    pool = Pool(processes=8)

    #resultSet will be a list of lists with each sublist containing the output
    #from processPage
    resultSet = pool.map(processPage, range(1, pageCount + 1))

    #This code just itereates through the result set and combines them
    ages = []
    noAgeCount = 0
    usersProcessed = 0
    for entry in resultSet:
        ages += entry[0]
        noAgeCount += entry[1]
        usersProcessed += entry[2]

    #Figure out how long the script took to run
    end = time.time()
    diff = round((end - start) / 60, 1)

    #Save the age data and then print some basic info
    gen.listToCSV('CD-Ages-' + time.strftime('%Y-%m-%d'), ages)
    print(noAgeCount, 'out of', usersProcessed, 'had no age set. :(')
    print('Scraping run took', diff, 'minutes.')
예제 #2
0
def saveTeamList(year):
    fileExists, fullPath = filePathHandler('teams', None, 'teams', year)

    fileExists = False

    fileExists = False

    if not fileExists:
        try:
            teams = []
            for page in range(0, 16):
                teams += tba.teams(page, year, False, True)

            gen.listToCSV(fullPath, teams)
        except Exception as e:
            print(e)
예제 #3
0
baseURLs = {
    'facebook-profile': 'www.facebook.com/',
    'twitter-profile': 'www.twitter.com/',
    'youtube-profile': 'www.youtube.com/',
    'github-profile': 'www.github.com/',
    'instagram-profile': 'www.instagram.com/',
    'periscope-profile': 'www.periscope.com/'
}

profileTypes = [
    'facebook-profile', 'twitter-profile', 'youtube-profile', 'github-profile',
    'instagram-profile', 'periscope-profile'
]

teamData = []

for team in tba.event_teams(event, False, True):
    teamMedia = tba.team_profiles(team)
    outString = team + ', '

    for profile in profileTypes:
        foundMatch = False
        for prof in teamMedia:
            if prof['type'] == profile:
                outString += baseURLs[profile] + prof['foreign_key']
        outString += ', '
    teamData.append(outString)

gen.listToCSV(event + 'socialData', teamData)
예제 #4
0
import gen

tba = gen.setup()

dcmp = '2019chcmp'

gen.listToCSV(dcmp + ' Teams', sorted([int(team[3:]) for team in tba.event_teams(dcmp, keys = True)]))
예제 #5
0
import gen
from tqdm import tqdm

tba = gen.setup()

#Set this to None for ALL teams
year = 2019

teamList = []
for page in tqdm(range(0, 40)):
    currentTeams = tba.teams(page, year, False, True)

    if currentTeams == []:
        break
    else:
        teamList += currentTeams

if year is not None:
    fileKey = str(year) + 'TeamKeys'
else:
    fileKey = 'allTeamKeys'
gen.listToCSV(fileKey, teamList)
예제 #6
0
import gen

year = 2019

tba = gen.setup()

teams = []
for dist in tba.districts(year):
    teams += [
        int(team[3:]) for team in tba.district_teams(dist['key'], False, True)
    ]

gen.listToCSV(str(year) + ' District Teams', teams)
예제 #7
0
def saveUpdateDate(updateDate):
    gen.listToCSV(baseFolder + str(year) + '/UpdateDate', [updateDate])