def get_races(eventlist): for race in eventlist: url = race['url'] try: r = requests.get(url) except: time.sleep(3) r = requests.get(url) racelist = [] soup = bs.BeautifulSoup(r.text,'lxml') for tr in soup.find_all('table'): tds = tr.find_all('td') length = len(tds) for i in range(0,length,8): #print(tds[i]) #print(tds) x = tds[2+i].text.strip(' \t\n\r') # print("X = " + x) #print(dir(x)) # x = str(x) #print(type(x)) x = x.replace(" ", "") url = tds[0+i].find('a') #print(url) url = 'http://www.equibase.com' + url.get('href') tabledic = { 'Race: ' : tds[0+i].text, 'URL' : url, 'Purse' : tds[1+i].text, 'Race Type' : x, 'Distance' : tds[3+i].text, 'Surface' : tds[4+i].text, 'Starters' : tds[5+i].text, 'Est. Post' : tds[6+i].text, 'Horses' : [], } #print(type(tabledic)) racelist.append(tabledic) race['races'] = get_horses(racelist) jsonero = json.dumps(eventlist) jsonic = json.loads(jsonero) print("DATE:", date) #datum o = Country('1','America',jsonero,date) #datum o.save() datic = datetime.date.today() d = str(datic) filename = 'USA' + d + '.json' path = "USFiles" fullpath = os.path.join(path, filename) f = open(fullpath,'w') f.write(jsonero) f.close()
def get_races(eventlist): for race in eventlist: global date url = race['url'] date = race['date'] try: r = requests.get(url) except: time.sleep(6) r = requests.get(url) racelist = [] soup = bs.BeautifulSoup(r.text, 'lxml') for tr in soup.find_all('table'): tds = tr.find_all('td') length = len(tds) for i in range(0, length, 7): #print(tds[i]) #print(tds) x = tds[2 + i].text.strip(' \t\n\r') # print("X = " + x) #print(dir(x)) # x = str(x) #print(type(x)) x = x.replace(" ", "") # print(tds[0]) url = tds[0 + i].find('a') #print(url) url = 'http://www.equibase.com' + url.get('href') tabledic = { 'Race: ': tds[0 + i].text, 'URL': url, 'Purse': tds[1 + i].text, 'Race Type': x, 'Distance': tds[3 + i].text, 'Surface': tds[4 + i].text, 'Starters': tds[5 + i].text, 'Est. Post': tds[6 + i].text, 'Horses': [], } #print(type(tabledic)) racelist.append(tabledic) race['races'] = get_horses(racelist) jsonero = json.dumps(eventlist) print("DATE:", date) #datum o = Country('3', 'England', jsonero, date) #datum o.save() r = requests.put('https://konji-187909.appspot.com/api/regions/uk', json=jsonero) f = open('inbreds.json', 'w') f.write(jsonero) f.close()
def get_races(eventlist): for race in eventlist: url = race['url'] try: r = requests.get(url) except: time.sleep(6) r = requests.get(url) racelist = [] soup = bs.BeautifulSoup(r.text, 'lxml') for tr in soup.find_all('table'): tds = tr.find_all('td') length = len(tds) for i in range(0, length, 8): #print(tds[i]) #print(tds) x = tds[2 + i].text.strip(' \t\n\r') # print("X = " + x) #print(dir(x)) # x = str(x) #print(type(x)) x = x.replace(" ", "") url = tds[0 + i].find('a') #print(url) url = 'http://www.equibase.com' + url.get('href') tabledic = { 'Race: ': tds[0 + i].text, 'URL': url, 'Purse': tds[1 + i].text, 'Race Type': x, 'Distance': tds[3 + i].text, 'Surface': tds[4 + i].text, 'Starters': tds[5 + i].text, 'Est. Post': tds[6 + i].text, 'Horses': [], } #print(type(tabledic)) racelist.append(tabledic) race['races'] = get_horses(racelist) jsonero = json.dumps(eventlist) print("DATE:", date) #datum o = Country('1', 'America', jsonero, date) #datum o.save() noder = requests.post('replaceme.com', json=jsonero)
def get_races(eventlist): for race in eventlist: url = race['url'] try: r = requests.get(url) except: time.sleep(3) r = requests.get(url) racelist = [] soup = bs.BeautifulSoup(r.text, 'lxml') for tr in soup.find_all('table'): tds = tr.find_all('td') length = len(tds) for i in range(0, length, 8): #print(tds[i]) #print(tds) x = tds[2 + i].text.strip(' \t\n\r') # print("X = " + x) #print(dir(x)) # x = str(x) #print(type(x)) x = x.replace(" ", "") url = tds[0 + i].find('a') #print(url) url = 'http://www.equibase.com' + url.get('href') tabledic = { 'Race: ': tds[0 + i].text, 'URL': url, 'Purse': tds[1 + i].text, 'Race Type': x, 'Distance': tds[3 + i].text, 'Surface': tds[4 + i].text, 'Starters': tds[5 + i].text, 'Est. Post': tds[6 + i].text, 'Horses': [], } #print(type(tabledic)) racelist.append(tabledic) race['races'] = get_horses(racelist) jsonero = json.dumps(eventlist) jsonic = json.loads(jsonero) print("DATE:", date) #datum o = Country('1', 'America', jsonero, date) #datum o.save() datic = datetime.date.today() d = str(datic) filename = 'USA' + d + '.json' path = "USFiles" fullpath = os.path.join(path, filename) f = open(fullpath, 'w') f.write(jsonero) f.close() p = Podesavanja.objects.get(id=1) p.is_scraping = 0 p.save() headers = {'Authorization': 'Zm9ybXVsYTE='} scaling_payload = { "min": "0", "required": "0", "max": "9", } rer = requests.patch('http://159.65.107.239:8889/api/scaling', json=scaling_payload, headers=headers) print(rer)
def get_races(events): global fontic formData = {'raceYmd': 20171209, 'command': 'displayRaceList'} kk = requests.post('http://210.145.16.108/jair/SelectRace.do', data=formData) soup = bs.BeautifulSoup(kk.text, 'lxml') #print(soup) #print(time.strftime("%Y/%m/%d")) soup = soup.find('table', attrs={'width': 584}) tr = soup.find_all('tr') lop = 0 ev = 0 for trs in tr[1:len(tr)]: tds = trs.find_all('td') for i in range(0, len(tds), 3): #tds0 - vremeprva, #tds1 uputstva za slanje linka #tds2 - vremedruga, #tds3 uputstva za slanje linka time = tds[0 + i].text.replace(" ", "") inform = tds[1 + i] a = inform.find('a') #informator za post rikvest #print(inform) #print(a) kek = (a.get('href')[19:400].replace("(", "").replace( ")", "").replace(" ", "")).strip(' \t\n\r') #isto kao dolje kek = kek.split(',') nl = [] for word in kek: #uklanja /n i ostalo word = word.strip(" \t\n\r ' ") #print(word) nl.append(word) res = { 'command': 'dispRaceResult', 'raceY': nl[0], 'raceMd': nl[1], 'raceJoCd': nl[2], 'raceKai': nl[3], 'raceHi': nl[4], 'raceNo': nl[5], 'dataKbn': nl[6], } race = { 'time': time, 'instr': res, } uuu = int(i / 3) events[uuu]['races'].append(race) # W#event[lop]['races'] = #p#rint(res) for me in events: for race in (me['races']): #horselist = [] #print(race['time']) no = race['instr']['raceNo'].replace(" ", "") print(race['time'] + " - " + race['instr']['raceNo']) #print(events) if (no == ''): print("no race") else: url = 'http://210.145.16.108/jair/SelectDenma.do' #formData = {'command': 'dispRaceResult', 'raceY': '2017', 'raceMd': '1126', 'raceJoCd': '05', 'raceKai': '05', 'raceHi': '08', 'raceNo': '01', 'dataKbn': '7'} formData = race['instr'] req = requests.post(url, data=formData) soup = bs.BeautifulSoup(req.text, 'lxml') tablic = soup.find_all('table', attrs={ 'cellspacing': 0, 'cellpadding': 1, 'width': 720, 'bgcolor': '#ffffff', 'border': 1 }) print(tablic) table = soup.find('table', attrs={ 'cellspacing': 0, 'cellpadding': 0, 'width': 720, 'bgcolor': '#ffffff', 'border': 0 }) tr = table.find_all('tr') newtr = tablic[1].find_all('tr') length = len(tr) eventlist = [] #print("kek", length) horselist = [] for i in range(5, length, 1): print(i) tds = tr[i].find_all('td') newtds = newtr[i - 4].find_all('td') #print(newtds) hor = newtds[4].find_all('font') siredam = newtds[2].find_all('font') print(tds[2].text + " " + tds[3].text + " Jockey: " + hor[0].text + " Trainer: " + hor[1].text + "Sire" + siredam[0].text + "dam" + siredam[1].text) print("itsthis") horseurl = 'http://www.equineline.com/Free5XPedigreeSearchResults.cfm?horse_name=' + tds[ 3].text + '&page_state=LIST_HITS&foaling_year=&dam_name=&include_sire_line=Y' print(horseurl) print("itsthis") while (1): proxies = { 'http': 'http://35.231.21.43:8888', 'https': 'https://35.231.21.43:8888', } headers = { 'user-agent': 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)', 'origin': 'https://www.equibase.com', 'x-requested-with': 'XMLHttpRequest' } try: ###proxies = get_proxy() horsereq = requests.get(horseurl, headers=headers, timeout=9, proxies=proxies) except: print('error') tim.sleep(12) horsereq = requests.get(horseurl, headers=headers, timeout=9, proxies=proxies) continue else: print("mek") break soup = bs.BeautifulSoup(horsereq.text, 'lxml') h4 = soup.find('h4') print(h4) if (str(h4) == '<h4><strong>No Matches Found</strong></h4>' ): print("Horse doesn't exist in DB") inftab = 'n/a' else: #print(soup) try: horsrl = soup.find('a').get('href') except: print("Captcha error") try: ime = horsereq.history[0].headers[ 'x-cache-proxyname'] except: print("ime error") tim.sleep(3) else: payld = { 'name': ime, } print(ime) headers = {'Authorization': 'd2VhcmVzZWN1cmU='} ipic = proxies.get('http') ipic = ipic[0:-1] + '9' print(ipic) stop = requests.post(ipic + '/instances/stop', json=payld, headers=headers) print("success") #tim.sleep(12) ###proxies = get_proxy() #tim.sleep(6) while (1): proxies = { 'http': 'http://35.231.21.43:8888', 'https': 'https://35.231.21.43:8888', } try: ##proxies = get_proxy() horsereq = requests.get(horseurl, headers=headers, timeout=9, proxies=proxies) soup = bs.BeautifulSoup( horsereq.text, 'lxml') horsrl = soup.find('a').get('href') except: try: ime = horsereq.history[0].headers[ 'x-cache-proxyname'] print(ime) payld = { 'name': ime, } headers = { 'Authorization': 'd2VhcmVzZWN1cmU=' } ipic = proxies.get('http') ipic = ipic[0:-1] + '9' print(ipic) stop = requests.post(ipic + '/instances/stop', json=payld, headers=headers) continue except: tim.sleep(2) else: break #soup = bs.BeautifulSoup(horsereq.text, 'lxml') #h#orsrl = soup.find('a').get('href') url = 'http://www.equineline.com/' + horsrl start = url.find('reference_number=') end = url.find('®istry') refnum = url[start + 17:end] print(refnum) link = 'http://www.equineline.com/Free5XPedigreeNickingDisplay.cfm?page_state=DISPLAY_REPORT&reference_number=' + refnum #print(url) #print(link) while (1): try: ##proxies = get_proxy() maker = requests.get(link, headers=headers, timeout=9, proxies=proxies) supica = bs.BeautifulSoup(maker.text, 'lxml') table = supica.find('table') if (table is None): ##proxies = get_proxy() #soup = bs.BeautifulSoup(r.text,'lxml') try: a = supica.find('a').get('href') except: a = '' if (a == 'mailto:[email protected]'): print("NO horse") table = 'Notable' else: print("stvorena") maker = requests.get(link, headers=headers, timeout=9, proxies=proxies) supica = bs.BeautifulSoup( maker.text, 'lxml') table = supica.find('table') except: continue else: break #print(supica) # table = supica.find('table') #print(table) #print(type(table)) while (1): try: if (table is None): tim.sleep(6) ##proxies = get_proxy() raise EnvironmentError else: break except: while (1): try: maker = requests.get(link, headers=headers, timeout=9) except: continue else: break break supica = bs.BeautifulSoup(maker.text, 'lxml') table = supica.find('table') if (table == 'Notable'): inftab = 'n/a' else: inftab = get_table(table) ud = str(uuid.uuid4()) horsedic = { 'P#': tds[2].text, 'Name': tds[3].text, 'Claim': 'No claim', 'Wgt': tds[5].text, 'Jockey': hor[0].text, 'Trainer': hor[1].text, 'Sire': siredam[0].text, 'Dam': siredam[1].text, 'info': inftab, 'uuid': ud, } print(horsedic) horselist.append(horsedic) race['horses'] = horselist #print(horselist) print(events) #print(formData) #print(formData.get('raceYmd')) ##date = (formData.get('raceYmd')) o = Country('4', 'Japan', events, fontic) o.save() f = open('nippon2.json', 'w') jsonero = json.dumps(events) f.write(jsonero) f.close()
def get_races(events): global fontic formData = {'raceYmd': 20171126, 'command': 'displayRaceList'} kk = requests.post('http://210.145.16.108/jair/SelectRace.do', headers=headers, data=formData) soup = bs.BeautifulSoup(kk.text, 'lxml') #print(soup) #print(time.strftime("%Y/%m/%d")) soup = soup.find('table', attrs={'width': 584}) tr = soup.find_all('tr') lop = 0 ev = 0 for trs in tr[1:len(tr)]: tds = trs.find_all('td') #tds0 - vremeprva, #tds1 uputstva za slanje linka #tds2 - vremedruga, #tds3 uputstva za slanje linka time = tds[lop + ev].text.replace(" ", "") inform = tds[lop + ev + 1] if (time == ''): print("nista zovo") if (ev == 2 and lop == 1): time = tds[lop + ev].text.replace(" ", "") elif (lop == 1 and ev == 0): time = tds[0].text.replace(" ", "") elif (lop == 0 and ev == 2): time = tds[lop].text.replace(" ", "") inform = tds[lop + ev + 1 + 1] #print(inform) #print(lop,ev) #print(instr) ev = 0 a = inform.find('a') #informator za post rikvest #print(inform) #print(a) kek = (a.get('href')[19:400].replace("(", "").replace(")", "").replace( " ", "")).strip(' \t\n\r') #isto kao dolje kek = kek.split(',') nl = [] for word in kek: #uklanja /n i ostalo word = word.strip(" \t\n\r ' ") #print(word) nl.append(word) res = { 'command': 'dispRaceResult', 'raceY': nl[0], 'raceMd': nl[1], 'raceJoCd': nl[2], 'raceKai': nl[3], 'raceHi': nl[4], 'raceNo': nl[5], 'dataKbn': nl[6], } race = { 'time': time, 'instr': res, } if (ev == 4): ev = 0 else: ev += 2 events[lop]['races'].append(race) if (lop == 0): lop = 1 else: lop = 0 # W#event[lop]['races'] = #p#rint(res) for me in events: for race in (me['races']): #horselist = [] #print(race['time']) print(race['time'] + " - " + race['instr']['raceNo']) #print(events) #cellspacing="0" cellpadding="1" width="720" bgcolor="#ffffff" border="1"> url = 'http://210.145.16.108/jair/SelectDenma.do' #formData = {'command': 'dispRaceResult', 'raceY': '2017', 'raceMd': '1126', 'raceJoCd': '05', 'raceKai': '05', 'raceHi': '08', 'raceNo': '01', 'dataKbn': '7'} formData = race['instr'] req = requests.post(url, data=formData) soup = bs.BeautifulSoup(req.text, 'lxml') tablic = soup.find_all('table', attrs={ 'cellspacing': 0, 'cellpadding': 1, 'width': 720, 'bgcolor': '#ffffff', 'border': 1 }) #print(tablic[1]) table = soup.find('table', attrs={ 'cellspacing': 0, 'cellpadding': 0, 'width': 720, 'bgcolor': '#ffffff', 'border': 0 }) tr = table.find_all('tr') newtr = tablic[1].find_all('tr') length = len(tr) eventlist = [] #print("kek", length) horselist = [] for i in range(5, length, 1): print(i) tds = tr[i].find_all('td') newtds = newtr[i - 4].find_all('td') #print(newtds) hor = newtds[4].find_all('font') siredam = newtds[2].find_all('font') print(tds[2].text + " " + tds[3].text + " Jockey: " + hor[0].text + " Trainer: " + hor[1].text + "Sire" + siredam[0].text + "dam" + siredam[1].text) print("itsthis") horseurl = 'http://www.equineline.com/Free5XPedigreeSearchResults.cfm?horse_name=' + tds[ 3].text + '&page_state=LIST_HITS&foaling_year=&dam_name=&include_sire_line=Y' print(horseurl) try: horsereq = requests.get(horseurl, headers=headers) except: tim.sleep(6) horsereq = requests.get(horseurl, headers=headers) soup = bs.BeautifulSoup(horsereq.text, 'lxml') h4 = soup.find('h4') print(h4) if (str(h4) == '<h4><strong>No Matches Found</strong></h4>'): print("Horse doesn't exist in DB") inftab = 'n/a' else: #print(soup) try: horsrl = soup.find('a').get('href') except: print("Captcha error") tim.sleep(6) tim.sleep(6) horsereq = requests.get(horseurl, headers=headers) soup = bs.BeautifulSoup(horsereq.text, 'lxml') try: horsrl = soup.find('a').get('href') except: print("-- MENJAJ --") tim.sleep(12) horsereq = requests.get(horseurl, headers=headers) soup = bs.BeautifulSoup(horsereq.text, 'lxml') try: horsrl = soup.find('a').get('href') except: print("-- MENJAJ --") tim.sleep(12) horsereq = requests.get(horseurl, headers=headers) soup = bs.BeautifulSoup(horsereq.text, 'lxml') horsrl = soup.find('a').get('href') url = 'http://www.equineline.com/' + horsrl start = url.find('reference_number=') end = url.find('®istry') refnum = url[start + 17:end] print(refnum) link = 'http://www.equineline.com/Free5XPedigreeNickingDisplay.cfm?page_state=DISPLAY_REPORT&reference_number=' + refnum #print(url) #print(link) try: maker = requests.get(link, headers=headers) except: tim.sleep(6) maker = requests.get(link, headers=headers) supica = bs.BeautifulSoup(maker.text, 'lxml') #print(supica) table = supica.find('table') #print(table) #print(type(table)) if (table is None): tim.sleep(6) print("how") maker = requests.get(link, headers=headers) supica = bs.BeautifulSoup(maker.text, 'lxml') table = supica.find('table') if (table is None): print("how") tim.sleep(6) maker = requests.get(link, headers=headers) supica = bs.BeautifulSoup(maker.text, 'lxml') table = supica.find('table') inftab = get_table(table) ud = str(uuid.uuid4()) horsedic = { 'P#': tds[2].text, 'Name': tds[3].text, 'Claim': 'No claim', 'Wgt': tds[5].text, 'Jockey': hor[0].text, 'Trainer': hor[1].text, 'Sire': siredam[0].text, 'Dam': siredam[1].text, 'info': inftab, 'uuid': ud, } print(horsedic) horselist.append(horsedic) race['horses'] = horselist #print(horselist) print(events) #print(formData) #print(formData.get('raceYmd')) ##date = (formData.get('raceYmd')) o = Country('4', 'Japan', events, fontic) o.save() f = open('nippon2.json', 'w') jsonero = json.dumps(events) f.write(jsonero) f.close()