def fetch_test1(match_number): crawl_url_1 = 'http://211.151.108.43/soccer/match/' crawl_url_2 = '/ah/handicap/27/' webpage = crawl_url_1 + str(match_number) + crawl_url_2 entry = data_class_test.odds_trend() entry = crawl_engine.capture_matchesdata(webpage) database_conn.db_conn(entry)
def fetch_byterm(seed): crawl_url_1 = 'http://211.151.108.43/soccer/match/' crawl_url_2 = '/ah/handicap/27/' mlist = crawl_engine.get_zucai(seed) for i in mlist: webpage = crawl_url_1 + i + crawl_url_2 entry = data_class_test.odds_trend() entry = crawl_engine.capture_matchesdata(webpage) database_conn.db_conn(entry)
def capture_matchesdata(pageurl): page1 = urllib2.urlopen(pageurl, timeout=3) pages1 = page1.read() page2 = urllib2.urlopen(pageurl[:-12], timeout=3) pages2 = page2.read() mnumber = pageurl[35:41] # except socket.timeout, e: # raise capture_matchesdata("there is a error:%r" % e) # pass if if_datas_(pages1): st = pages2.find("matchTeam") ed = pages2.find("table", st) pages3 = pages2[st:ed] pages4 = pages2[st - 300 : st - 185] list1 = pages1[pages1.find("data1=") + 6 : pages1.find(";", pages1.find("data1="))] list1 = ast.literal_eval(list1) list2 = pages1[pages1.find("data2=") + 6 : pages1.find(";", pages1.find("data2="))] list2 = ast.literal_eval(list2) str1 = pages1[pages1.find("homeName") + 10 : pages1.find(";", pages1.find("homeName")) - 1] str2 = pages1[pages1.find("awayName") + 10 : pages1.find(";", pages1.find("awayName")) - 1] cstr1 = str1.decode("GBK").encode("utf-8") cstr2 = str2.decode("GBK").encode("utf-8") res_page = SpanParser() res_page.feed(pages3) res = res_page.get_result() if len(res) == 2: ress = "%s:%s" % (res[0], res[1]) else: ress = "no scores" time_page = PParser() time_page.feed(pages4) time = "".join(time_page.get_result()) _particular_match_ = data_class_test.odds_trend() _particular_match_._init_(list1, list2, cstr1, cstr2, ress, time, mnumber) return _particular_match_ page1.close() page2.close()
import psycopg2 import crawl_engine import data_class_test def db_conn(d_entry): conn = psycopg2.connect(database='postgres',user='******',password='******',host= '54.186.47.255',options='-c statement_timeout=100') cur = conn.cursor() isql = "INSERT INTO _odds_trend(hname,aname,hodds,aodds,result,mtime,mnumber) VALUES(%s,%s,%s,%s,%s,%s,%s);" ssql = "INSERT INTO DD_table(matches,ddlist) VALUES(%s,%s);" cur.execute(isql,(d_entry.home_name,d_entry.away_name,str(d_entry.home_odds),str(d_entry.away_odds),d_entry.result,d_entry.match_time,d_entry.match_no)) ddlist = data_class_test.ddlist_restruct(d_entry) matches = d_entry.match_no cur.execute(ssql,(matches,ddlist)) conn.commit() cur.close() conn.close() webpage = 'http://211.151.108.43/soccer/match/742165/ah/handicap/27/' entry = data_class_test.odds_trend() entry = crawl_engine.capture_matchesdata(webpage) #db_conn(entry)