Ejemplo n.º 1
0
def downloadTrailer(trailerUrl, filePath):
    """ Download the specified trailer.
        @param trailerUrl: Trailer URL (ex: /trailer/the-terminal/trailer)
        @param filePath: Path to save trailer
    """
    trailerHtml = util.getHtml(trailerUrl)
    videoNumbers = re.findall(VIDEONUM_REGEX, trailerHtml)
    flashUrl = FLASH_URL.replace('{{videonum}}', videoNumbers[0])
    flashResponse = util.getHtml(flashUrl)
    fileUrl = re.findall(FLASH_REGEX, flashResponse)[0]
    util.downloadFile(fileUrl, filePath)
Ejemplo n.º 2
0
def main():
    htmls = util.getHtml(sys.argv, selenium, BOOKIE_NAME)
    print("*************MAIN print htmls**********")
    # print (htmls[0])
    players = []
    try:
        fd = open("marathonbet_basket.csv", "w")
    except:
        print("Error while file creation")
    print("----------------------------------------")
    header = "ID,Local, Visitor, Date, 1., 2.\n"
    fd.write(header)
    counter = 1
    for html in htmls:
        print("+++++++++++++++++++++++++++++++++++++++")
        print(html)
        line = ""
        players.extend(getTeams(html))
        players.extend(getDate(html))
        players.extend(getBets(html))
        printable = set(string.printable)
        line = str(counter) + ',' + str(getTeams(html)[0]) + ',' + str(
            getTeams(html)[1]) + ',' + str(getDate(html)[0].replace(
                '\n', '')) + ',' + str(getBets(html)[0]) + ',' + str(
                    getBets(html)[1]) + '\n'
        fd.write(line)
        counter = counter + 1
        print("+++++++++++++++++++++++++++++++++++++++")
    #util.output(sys.argv, players)
    fd.close()
    print(players)
Ejemplo n.º 3
0
def getTrailerUrls(movieUrl):
    """ Return the trailers on the specified link.
        @param movieUrl: URL to movie info on TrailerAddict
    """
    tag = movieUrl.split('/')[-1]
    movieHtml = util.getHtml(movieUrl)   
    movieRegex = MOVIE_REGEX.replace('{{tag}}', tag)
    results = re.findall(movieRegex, movieHtml)
    trailerUrls = map(lambda r: TABASE_URL.replace('{{path}}', r), results)
    return list(set(trailerUrls))  # Remove Duplicates
Ejemplo n.º 4
0
def downloadTrailer(trailerUrl, filePath):
    """ Download the specified trailer.
        @param trailerUrl: Trailer URL (ex: /trailer/the-terminal/trailer)
        @param filePath: Path to save trailer
    """
    videoHtml = util.getHtml(trailerUrl)
    videoId = re.findall(ID_REGEX, trailerUrl)[0][0]
    tParam = re.findall(T_PARAM_REGEX, videoHtml)[0]
    fileUrl = VIDEO_URL % (videoId, tParam)
    util.downloadFile(fileUrl, filePath)
Ejemplo n.º 5
0
def crawlGooseUserList():
    #this function useless, need to login first
    req = loginWeibo()
    #req = requests
    base_url = 'http://d.weibo.com/230771_-_EXPERTUSER?page=%d#Pl_Core_F4RightUserList__4'
    page_num = 22
    for page_index in range(1, page_num):

        url = base_url % page_index
        path = 'tmp'
        r = getHtml(url, path, req = req, save = 'db')
        print r
        break
Ejemplo n.º 6
0
def search(title):
    """ Search for the specified movie.
        @param title: Title of the movie to search for
    """
    # Fetch the HTML for the search page
    query = title.replace(" ", "+")
    searchUrl = SEARCH_URL.replace('{{query}}', query)
    searchHtml = util.getHtml(searchUrl)
    results = re.findall(SEARCH_REGEX, searchHtml)
    # Parse and return the search results
    searchResults = []
    for result in results:
        searchResult = {}
        searchResult['url'] = TABASE_URL.replace('{{path}}', result[0])
        searchResult['title'] = result[1]
        searchResult['year'] = result[2]
        searchResults.append(searchResult)
    return searchResults
Ejemplo n.º 7
0
def getUserWeibo(req=requests):
    user = getUserList()
    for u in user:
        print u,'begin'
        u = u.strip()
        base_url = 'http://weibo.cn/%s?filter=1&page=' % u
        print base_url
        pagenum = 200
        getnum = False
        i = 1
        while i <= pagenum:
            url = base_url + str(i)
            print url
            try:
                r = getHtml(url, '', req = req, save = 'db', tag='second')
                if getnum == False:
                    pagenum = getPageNum(r)
                    getnum = True
            except:
                pass
            i += 1
        print u,'done'
    return r
Ejemplo n.º 8
0
def main():
  htmls = util.getHtml(sys.argv, selenium, BOOKIE_NAME)
  players = []
  for html in htmls:
    players.extend(getPlayers(html))
  util.output(sys.argv, players)
Ejemplo n.º 9
0
def main():
  htmls = util.getHtml(sys.argv, selenium, BOOKIE_NAME)
  players = []
  for html in htmls:
    players.extend(getPlayers(html))
  util.printPlayers(players)