Example #1
0
def crawl_webPage(data):
    url_temp = "http://securities.stanford.edu/filings-case.html?id=%s"
    for i in range(1, len(data)):
        url_path = url_temp % (data[i])
        print i, url_path
        url_data = myLib.myUrl(url_path)
        with open("./sample/%s" % data[i], "w") as f:
            for line in url_data:
                f.write(line + "\n")

        sleep_time = random.randint(1, 3)
        time.sleep(sleep_time)
Example #2
0
def crawl_webPage(data):
    url_temp="http://securities.stanford.edu/filings-case.html?id=%s"
    for i in range(1, len(data)):
        url_path = url_temp%(data[i])
        print i, url_path
        url_data = myLib.myUrl(url_path)
        with open("./sample/%s"%data[i], "w") as f:
            for line in url_data:
                f.write(line + "\n")

        sleep_time = random.randint(1,3)
        time.sleep(sleep_time)
Example #3
0
def extractDataFromURL(data):
    fd = open("./result", "a")
    url_temp = "http://securities.stanford.edu/filings-case.html?id=%s"
    for i in range(1, len(data)):
        url_path = url_temp % (data[i])
        print i, url_path,
        url_data = myLib.myUrl(url_path)
        parser = CaseParser.Context(CaseParser.StartState())
        pm = CaseParser.ParserManager(url_data, parser)
        pm.startParse()
        showResult(fd, data[i], pm.result())
        sleep_time = random.randint(1, 3)
        time.sleep(sleep_time)

    fd.close()
Example #4
0
def extractDataFromURL(data):
    fd = open("./result", "a")
    url_temp="http://securities.stanford.edu/filings-case.html?id=%s"
    for i in range(1, len(data)):
        url_path = url_temp%(data[i])
        print i, url_path,
        url_data = myLib.myUrl(url_path)
        parser = CaseParser.Context(CaseParser.StartState())
        pm = CaseParser.ParserManager(url_data, parser)
        pm.startParse()
        showResult(fd, data[i], pm.result())
        sleep_time = random.randint(1,3)
        time.sleep(sleep_time)
        
    fd.close()