def downloadTrailer(trailerUrl, filePath): """ Download the specified trailer. @param trailerUrl: Trailer URL (ex: /trailer/the-terminal/trailer) @param filePath: Path to save trailer """ trailerHtml = util.getHtml(trailerUrl) videoNumbers = re.findall(VIDEONUM_REGEX, trailerHtml) flashUrl = FLASH_URL.replace('{{videonum}}', videoNumbers[0]) flashResponse = util.getHtml(flashUrl) fileUrl = re.findall(FLASH_REGEX, flashResponse)[0] util.downloadFile(fileUrl, filePath)
def main(): htmls = util.getHtml(sys.argv, selenium, BOOKIE_NAME) print("*************MAIN print htmls**********") # print (htmls[0]) players = [] try: fd = open("marathonbet_basket.csv", "w") except: print("Error while file creation") print("----------------------------------------") header = "ID,Local, Visitor, Date, 1., 2.\n" fd.write(header) counter = 1 for html in htmls: print("+++++++++++++++++++++++++++++++++++++++") print(html) line = "" players.extend(getTeams(html)) players.extend(getDate(html)) players.extend(getBets(html)) printable = set(string.printable) line = str(counter) + ',' + str(getTeams(html)[0]) + ',' + str( getTeams(html)[1]) + ',' + str(getDate(html)[0].replace( '\n', '')) + ',' + str(getBets(html)[0]) + ',' + str( getBets(html)[1]) + '\n' fd.write(line) counter = counter + 1 print("+++++++++++++++++++++++++++++++++++++++") #util.output(sys.argv, players) fd.close() print(players)
def getTrailerUrls(movieUrl): """ Return the trailers on the specified link. @param movieUrl: URL to movie info on TrailerAddict """ tag = movieUrl.split('/')[-1] movieHtml = util.getHtml(movieUrl) movieRegex = MOVIE_REGEX.replace('{{tag}}', tag) results = re.findall(movieRegex, movieHtml) trailerUrls = map(lambda r: TABASE_URL.replace('{{path}}', r), results) return list(set(trailerUrls)) # Remove Duplicates
def downloadTrailer(trailerUrl, filePath): """ Download the specified trailer. @param trailerUrl: Trailer URL (ex: /trailer/the-terminal/trailer) @param filePath: Path to save trailer """ videoHtml = util.getHtml(trailerUrl) videoId = re.findall(ID_REGEX, trailerUrl)[0][0] tParam = re.findall(T_PARAM_REGEX, videoHtml)[0] fileUrl = VIDEO_URL % (videoId, tParam) util.downloadFile(fileUrl, filePath)
def crawlGooseUserList(): #this function useless, need to login first req = loginWeibo() #req = requests base_url = 'http://d.weibo.com/230771_-_EXPERTUSER?page=%d#Pl_Core_F4RightUserList__4' page_num = 22 for page_index in range(1, page_num): url = base_url % page_index path = 'tmp' r = getHtml(url, path, req = req, save = 'db') print r break
def search(title): """ Search for the specified movie. @param title: Title of the movie to search for """ # Fetch the HTML for the search page query = title.replace(" ", "+") searchUrl = SEARCH_URL.replace('{{query}}', query) searchHtml = util.getHtml(searchUrl) results = re.findall(SEARCH_REGEX, searchHtml) # Parse and return the search results searchResults = [] for result in results: searchResult = {} searchResult['url'] = TABASE_URL.replace('{{path}}', result[0]) searchResult['title'] = result[1] searchResult['year'] = result[2] searchResults.append(searchResult) return searchResults
def getUserWeibo(req=requests): user = getUserList() for u in user: print u,'begin' u = u.strip() base_url = 'http://weibo.cn/%s?filter=1&page=' % u print base_url pagenum = 200 getnum = False i = 1 while i <= pagenum: url = base_url + str(i) print url try: r = getHtml(url, '', req = req, save = 'db', tag='second') if getnum == False: pagenum = getPageNum(r) getnum = True except: pass i += 1 print u,'done' return r
def main(): htmls = util.getHtml(sys.argv, selenium, BOOKIE_NAME) players = [] for html in htmls: players.extend(getPlayers(html)) util.output(sys.argv, players)
def main(): htmls = util.getHtml(sys.argv, selenium, BOOKIE_NAME) players = [] for html in htmls: players.extend(getPlayers(html)) util.printPlayers(players)