Beispiel #1
0
def get_races(eventlist):
    for race in eventlist:
        url = race['url']
        try:
            r = requests.get(url)
        except:
            time.sleep(3)
            r = requests.get(url)
        racelist = []
        soup = bs.BeautifulSoup(r.text,'lxml')
        for tr in soup.find_all('table'):
            tds = tr.find_all('td')
            length = len(tds)
            for i in range(0,length,8):
                #print(tds[i])
                #print(tds)
                x = tds[2+i].text.strip(' \t\n\r')
            # print("X = " + x)
                #print(dir(x))
            # x = str(x)
                #print(type(x))
                x = x.replace(" ", "")
                url = tds[0+i].find('a')
                #print(url)
                url = 'http://www.equibase.com' + url.get('href')
                tabledic = {
                    'Race: ' : tds[0+i].text,
                    'URL' : url,
                    'Purse' : tds[1+i].text,
                    'Race Type' : x,
                    'Distance' : tds[3+i].text,
                    'Surface' : tds[4+i].text,
                    'Starters' : tds[5+i].text,
                    'Est. Post' : tds[6+i].text,
                    'Horses' : [],
                }
                #print(type(tabledic))
                racelist.append(tabledic)
        race['races'] = get_horses(racelist)
    jsonero = json.dumps(eventlist)
    jsonic = json.loads(jsonero)
    print("DATE:", date) #datum
    o = Country('1','America',jsonero,date) #datum
    o.save()
    datic = datetime.date.today()
    d = str(datic)
    filename = 'USA' + d + '.json'
    path = "USFiles"
    fullpath = os.path.join(path, filename)
    f = open(fullpath,'w')
    f.write(jsonero)
    f.close()
Beispiel #2
0
def get_races(eventlist):
    for race in eventlist:
        global date
        url = race['url']
        date = race['date']
        try:
            r = requests.get(url)
        except:
            time.sleep(6)
            r = requests.get(url)
        racelist = []
        soup = bs.BeautifulSoup(r.text, 'lxml')
        for tr in soup.find_all('table'):
            tds = tr.find_all('td')
            length = len(tds)
            for i in range(0, length, 7):
                #print(tds[i])
                #print(tds)
                x = tds[2 + i].text.strip(' \t\n\r')
                # print("X = " + x)
                #print(dir(x))
                # x = str(x)
                #print(type(x))
                x = x.replace(" ", "")
                # print(tds[0])
                url = tds[0 + i].find('a')
                #print(url)
                url = 'http://www.equibase.com' + url.get('href')
                tabledic = {
                    'Race: ': tds[0 + i].text,
                    'URL': url,
                    'Purse': tds[1 + i].text,
                    'Race Type': x,
                    'Distance': tds[3 + i].text,
                    'Surface': tds[4 + i].text,
                    'Starters': tds[5 + i].text,
                    'Est. Post': tds[6 + i].text,
                    'Horses': [],
                }
                #print(type(tabledic))
                racelist.append(tabledic)
        race['races'] = get_horses(racelist)
    jsonero = json.dumps(eventlist)
    print("DATE:", date)  #datum
    o = Country('3', 'England', jsonero, date)  #datum
    o.save()
    r = requests.put('https://konji-187909.appspot.com/api/regions/uk',
                     json=jsonero)
    f = open('inbreds.json', 'w')
    f.write(jsonero)
    f.close()
Beispiel #3
0
def get_races(eventlist):
    for race in eventlist:
        url = race['url']
        try:
            r = requests.get(url)
        except:
            time.sleep(6)
            r = requests.get(url)
        racelist = []
        soup = bs.BeautifulSoup(r.text, 'lxml')
        for tr in soup.find_all('table'):
            tds = tr.find_all('td')
            length = len(tds)
            for i in range(0, length, 8):
                #print(tds[i])
                #print(tds)
                x = tds[2 + i].text.strip(' \t\n\r')
                # print("X = " + x)
                #print(dir(x))
                # x = str(x)
                #print(type(x))
                x = x.replace(" ", "")
                url = tds[0 + i].find('a')
                #print(url)
                url = 'http://www.equibase.com' + url.get('href')
                tabledic = {
                    'Race: ': tds[0 + i].text,
                    'URL': url,
                    'Purse': tds[1 + i].text,
                    'Race Type': x,
                    'Distance': tds[3 + i].text,
                    'Surface': tds[4 + i].text,
                    'Starters': tds[5 + i].text,
                    'Est. Post': tds[6 + i].text,
                    'Horses': [],
                }
                #print(type(tabledic))
                racelist.append(tabledic)
        race['races'] = get_horses(racelist)
    jsonero = json.dumps(eventlist)
    print("DATE:", date)  #datum
    o = Country('1', 'America', jsonero, date)  #datum
    o.save()

    noder = requests.post('replaceme.com', json=jsonero)
def get_races(eventlist):
    for race in eventlist:
        url = race['url']
        try:
            r = requests.get(url)
        except:
            time.sleep(3)
            r = requests.get(url)
        racelist = []
        soup = bs.BeautifulSoup(r.text, 'lxml')
        for tr in soup.find_all('table'):
            tds = tr.find_all('td')
            length = len(tds)
            for i in range(0, length, 8):
                #print(tds[i])
                #print(tds)
                x = tds[2 + i].text.strip(' \t\n\r')
                # print("X = " + x)
                #print(dir(x))
                # x = str(x)
                #print(type(x))
                x = x.replace(" ", "")
                url = tds[0 + i].find('a')
                #print(url)
                url = 'http://www.equibase.com' + url.get('href')
                tabledic = {
                    'Race: ': tds[0 + i].text,
                    'URL': url,
                    'Purse': tds[1 + i].text,
                    'Race Type': x,
                    'Distance': tds[3 + i].text,
                    'Surface': tds[4 + i].text,
                    'Starters': tds[5 + i].text,
                    'Est. Post': tds[6 + i].text,
                    'Horses': [],
                }
                #print(type(tabledic))
                racelist.append(tabledic)
        race['races'] = get_horses(racelist)
    jsonero = json.dumps(eventlist)
    jsonic = json.loads(jsonero)
    print("DATE:", date)  #datum
    o = Country('1', 'America', jsonero, date)  #datum
    o.save()
    datic = datetime.date.today()
    d = str(datic)
    filename = 'USA' + d + '.json'
    path = "USFiles"
    fullpath = os.path.join(path, filename)
    f = open(fullpath, 'w')
    f.write(jsonero)
    f.close()
    p = Podesavanja.objects.get(id=1)
    p.is_scraping = 0
    p.save()
    headers = {'Authorization': 'Zm9ybXVsYTE='}
    scaling_payload = {
        "min": "0",
        "required": "0",
        "max": "9",
    }
    rer = requests.patch('http://159.65.107.239:8889/api/scaling',
                         json=scaling_payload,
                         headers=headers)
    print(rer)
Beispiel #5
0
def get_races(events):
    global fontic
    formData = {'raceYmd': 20171209, 'command': 'displayRaceList'}
    kk = requests.post('http://210.145.16.108/jair/SelectRace.do',
                       data=formData)
    soup = bs.BeautifulSoup(kk.text, 'lxml')
    #print(soup)
    #print(time.strftime("%Y/%m/%d"))
    soup = soup.find('table', attrs={'width': 584})
    tr = soup.find_all('tr')
    lop = 0
    ev = 0
    for trs in tr[1:len(tr)]:
        tds = trs.find_all('td')
        for i in range(0, len(tds), 3):

            #tds0 - vremeprva, #tds1  uputstva za slanje linka
            #tds2 - vremedruga, #tds3 uputstva za slanje linka
            time = tds[0 + i].text.replace(" ", "")
            inform = tds[1 + i]

            a = inform.find('a')  #informator za post rikvest
            #print(inform)
            #print(a)
            kek = (a.get('href')[19:400].replace("(", "").replace(
                ")", "").replace(" ", "")).strip(' \t\n\r')  #isto kao dolje
            kek = kek.split(',')
            nl = []
            for word in kek:  #uklanja /n i ostalo
                word = word.strip(" \t\n\r ' ")
                #print(word)
                nl.append(word)
            res = {
                'command': 'dispRaceResult',
                'raceY': nl[0],
                'raceMd': nl[1],
                'raceJoCd': nl[2],
                'raceKai': nl[3],
                'raceHi': nl[4],
                'raceNo': nl[5],
                'dataKbn': nl[6],
            }
            race = {
                'time': time,
                'instr': res,
            }
            uuu = int(i / 3)
            events[uuu]['races'].append(race)

    # W#event[lop]['races'] =
    #p#rint(res)
    for me in events:
        for race in (me['races']):
            #horselist = []
            #print(race['time'])
            no = race['instr']['raceNo'].replace(" ", "")
            print(race['time'] + " - " + race['instr']['raceNo'])
            #print(events)
            if (no == ''):
                print("no race")
            else:

                url = 'http://210.145.16.108/jair/SelectDenma.do'
                #formData = {'command': 'dispRaceResult', 'raceY': '2017', 'raceMd': '1126', 'raceJoCd': '05', 'raceKai': '05', 'raceHi': '08', 'raceNo': '01', 'dataKbn': '7'}
                formData = race['instr']
                req = requests.post(url, data=formData)
                soup = bs.BeautifulSoup(req.text, 'lxml')
                tablic = soup.find_all('table',
                                       attrs={
                                           'cellspacing': 0,
                                           'cellpadding': 1,
                                           'width': 720,
                                           'bgcolor': '#ffffff',
                                           'border': 1
                                       })
                print(tablic)
                table = soup.find('table',
                                  attrs={
                                      'cellspacing': 0,
                                      'cellpadding': 0,
                                      'width': 720,
                                      'bgcolor': '#ffffff',
                                      'border': 0
                                  })
                tr = table.find_all('tr')
                newtr = tablic[1].find_all('tr')
                length = len(tr)
                eventlist = []
                #print("kek", length)
                horselist = []
                for i in range(5, length, 1):
                    print(i)
                    tds = tr[i].find_all('td')
                    newtds = newtr[i - 4].find_all('td')
                    #print(newtds)
                    hor = newtds[4].find_all('font')
                    siredam = newtds[2].find_all('font')
                    print(tds[2].text + " " + tds[3].text + " Jockey: " +
                          hor[0].text + " Trainer: " + hor[1].text + "Sire" +
                          siredam[0].text + "dam" + siredam[1].text)
                    print("itsthis")
                    horseurl = 'http://www.equineline.com/Free5XPedigreeSearchResults.cfm?horse_name=' + tds[
                        3].text + '&page_state=LIST_HITS&foaling_year=&dam_name=&include_sire_line=Y'
                    print(horseurl)
                    print("itsthis")
                    while (1):
                        proxies = {
                            'http': 'http://35.231.21.43:8888',
                            'https': 'https://35.231.21.43:8888',
                        }
                        headers = {
                            'user-agent':
                            'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)',
                            'origin': 'https://www.equibase.com',
                            'x-requested-with': 'XMLHttpRequest'
                        }
                        try:
                            ###proxies = get_proxy()
                            horsereq = requests.get(horseurl,
                                                    headers=headers,
                                                    timeout=9,
                                                    proxies=proxies)
                        except:
                            print('error')
                            tim.sleep(12)
                            horsereq = requests.get(horseurl,
                                                    headers=headers,
                                                    timeout=9,
                                                    proxies=proxies)
                            continue
                        else:
                            print("mek")
                            break
                    soup = bs.BeautifulSoup(horsereq.text, 'lxml')
                    h4 = soup.find('h4')
                    print(h4)
                    if (str(h4) == '<h4><strong>No Matches Found</strong></h4>'
                        ):
                        print("Horse doesn't exist in DB")
                        inftab = 'n/a'
                    else:
                        #print(soup)
                        try:
                            horsrl = soup.find('a').get('href')
                        except:
                            print("Captcha error")
                            try:
                                ime = horsereq.history[0].headers[
                                    'x-cache-proxyname']
                            except:
                                print("ime error")
                                tim.sleep(3)
                            else:
                                payld = {
                                    'name': ime,
                                }
                                print(ime)
                                headers = {'Authorization': 'd2VhcmVzZWN1cmU='}
                                ipic = proxies.get('http')
                                ipic = ipic[0:-1] + '9'
                                print(ipic)
                                stop = requests.post(ipic + '/instances/stop',
                                                     json=payld,
                                                     headers=headers)
                            print("success")
                            #tim.sleep(12)
                            ###proxies = get_proxy()
                            #tim.sleep(6)
                            while (1):
                                proxies = {
                                    'http': 'http://35.231.21.43:8888',
                                    'https': 'https://35.231.21.43:8888',
                                }
                                try:
                                    ##proxies = get_proxy()
                                    horsereq = requests.get(horseurl,
                                                            headers=headers,
                                                            timeout=9,
                                                            proxies=proxies)
                                    soup = bs.BeautifulSoup(
                                        horsereq.text, 'lxml')
                                    horsrl = soup.find('a').get('href')
                                except:
                                    try:
                                        ime = horsereq.history[0].headers[
                                            'x-cache-proxyname']
                                        print(ime)
                                        payld = {
                                            'name': ime,
                                        }
                                        headers = {
                                            'Authorization': 'd2VhcmVzZWN1cmU='
                                        }
                                        ipic = proxies.get('http')
                                        ipic = ipic[0:-1] + '9'
                                        print(ipic)
                                        stop = requests.post(ipic +
                                                             '/instances/stop',
                                                             json=payld,
                                                             headers=headers)
                                        continue
                                    except:
                                        tim.sleep(2)
                                else:
                                    break
                            #soup = bs.BeautifulSoup(horsereq.text, 'lxml')
                        #h#orsrl = soup.find('a').get('href')
                        url = 'http://www.equineline.com/' + horsrl
                        start = url.find('reference_number=')
                        end = url.find('&registry')
                        refnum = url[start + 17:end]
                        print(refnum)
                        link = 'http://www.equineline.com/Free5XPedigreeNickingDisplay.cfm?page_state=DISPLAY_REPORT&reference_number=' + refnum
                        #print(url)
                        #print(link)
                        while (1):
                            try:
                                ##proxies = get_proxy()
                                maker = requests.get(link,
                                                     headers=headers,
                                                     timeout=9,
                                                     proxies=proxies)
                                supica = bs.BeautifulSoup(maker.text, 'lxml')
                                table = supica.find('table')
                                if (table is None):
                                    ##proxies = get_proxy()
                                    #soup = bs.BeautifulSoup(r.text,'lxml')
                                    try:
                                        a = supica.find('a').get('href')
                                    except:
                                        a = ''
                                    if (a == 'mailto:[email protected]'):
                                        print("NO horse")
                                        table = 'Notable'
                                    else:
                                        print("stvorena")
                                        maker = requests.get(link,
                                                             headers=headers,
                                                             timeout=9,
                                                             proxies=proxies)
                                        supica = bs.BeautifulSoup(
                                            maker.text, 'lxml')
                                        table = supica.find('table')
                            except:
                                continue
                            else:
                                break
                        #print(supica)
                    # table = supica.find('table')
                    #print(table)
                    #print(type(table))
                        while (1):
                            try:
                                if (table is None):
                                    tim.sleep(6)
                                    ##proxies = get_proxy()
                                    raise EnvironmentError
                                else:
                                    break
                            except:
                                while (1):
                                    try:
                                        maker = requests.get(link,
                                                             headers=headers,
                                                             timeout=9)
                                    except:
                                        continue
                                    else:
                                        break
                                break

                                supica = bs.BeautifulSoup(maker.text, 'lxml')
                                table = supica.find('table')

                        if (table == 'Notable'):
                            inftab = 'n/a'
                        else:
                            inftab = get_table(table)
                    ud = str(uuid.uuid4())
                    horsedic = {
                        'P#': tds[2].text,
                        'Name': tds[3].text,
                        'Claim': 'No claim',
                        'Wgt': tds[5].text,
                        'Jockey': hor[0].text,
                        'Trainer': hor[1].text,
                        'Sire': siredam[0].text,
                        'Dam': siredam[1].text,
                        'info': inftab,
                        'uuid': ud,
                    }
                    print(horsedic)
                    horselist.append(horsedic)
                race['horses'] = horselist
            #print(horselist)
    print(events)
    #print(formData)
    #print(formData.get('raceYmd'))
    ##date =  (formData.get('raceYmd'))
    o = Country('4', 'Japan', events, fontic)
    o.save()
    f = open('nippon2.json', 'w')
    jsonero = json.dumps(events)
    f.write(jsonero)
    f.close()
Beispiel #6
0
def get_races(events):
    global fontic
    formData = {'raceYmd': 20171126, 'command': 'displayRaceList'}
    kk = requests.post('http://210.145.16.108/jair/SelectRace.do',
                       headers=headers,
                       data=formData)
    soup = bs.BeautifulSoup(kk.text, 'lxml')
    #print(soup)
    #print(time.strftime("%Y/%m/%d"))
    soup = soup.find('table', attrs={'width': 584})
    tr = soup.find_all('tr')
    lop = 0
    ev = 0
    for trs in tr[1:len(tr)]:
        tds = trs.find_all('td')
        #tds0 - vremeprva, #tds1  uputstva za slanje linka
        #tds2 - vremedruga, #tds3 uputstva za slanje linka
        time = tds[lop + ev].text.replace(" ", "")
        inform = tds[lop + ev + 1]
        if (time == ''):
            print("nista zovo")
            if (ev == 2 and lop == 1):
                time = tds[lop + ev].text.replace(" ", "")
            elif (lop == 1 and ev == 0):
                time = tds[0].text.replace(" ", "")
            elif (lop == 0 and ev == 2):
                time = tds[lop].text.replace(" ", "")
                inform = tds[lop + ev + 1 + 1]

        #print(inform)
        #print(lop,ev)
        #print(instr)
        ev = 0
        a = inform.find('a')  #informator za post rikvest
        #print(inform)
        #print(a)
        kek = (a.get('href')[19:400].replace("(", "").replace(")", "").replace(
            " ", "")).strip(' \t\n\r')  #isto kao dolje
        kek = kek.split(',')
        nl = []
        for word in kek:  #uklanja /n i ostalo
            word = word.strip(" \t\n\r ' ")
            #print(word)
            nl.append(word)
        res = {
            'command': 'dispRaceResult',
            'raceY': nl[0],
            'raceMd': nl[1],
            'raceJoCd': nl[2],
            'raceKai': nl[3],
            'raceHi': nl[4],
            'raceNo': nl[5],
            'dataKbn': nl[6],
        }
        race = {
            'time': time,
            'instr': res,
        }
        if (ev == 4):
            ev = 0
        else:
            ev += 2
        events[lop]['races'].append(race)
        if (lop == 0):
            lop = 1
        else:
            lop = 0
    # W#event[lop]['races'] =
    #p#rint(res)
    for me in events:
        for race in (me['races']):
            #horselist = []
            #print(race['time'])
            print(race['time'] + " - " + race['instr']['raceNo'])
            #print(events)
            #cellspacing="0" cellpadding="1" width="720" bgcolor="#ffffff" border="1">
            url = 'http://210.145.16.108/jair/SelectDenma.do'
            #formData = {'command': 'dispRaceResult', 'raceY': '2017', 'raceMd': '1126', 'raceJoCd': '05', 'raceKai': '05', 'raceHi': '08', 'raceNo': '01', 'dataKbn': '7'}
            formData = race['instr']
            req = requests.post(url, data=formData)
            soup = bs.BeautifulSoup(req.text, 'lxml')
            tablic = soup.find_all('table',
                                   attrs={
                                       'cellspacing': 0,
                                       'cellpadding': 1,
                                       'width': 720,
                                       'bgcolor': '#ffffff',
                                       'border': 1
                                   })
            #print(tablic[1])
            table = soup.find('table',
                              attrs={
                                  'cellspacing': 0,
                                  'cellpadding': 0,
                                  'width': 720,
                                  'bgcolor': '#ffffff',
                                  'border': 0
                              })
            tr = table.find_all('tr')
            newtr = tablic[1].find_all('tr')
            length = len(tr)
            eventlist = []
            #print("kek", length)
            horselist = []
            for i in range(5, length, 1):
                print(i)
                tds = tr[i].find_all('td')
                newtds = newtr[i - 4].find_all('td')
                #print(newtds)
                hor = newtds[4].find_all('font')
                siredam = newtds[2].find_all('font')
                print(tds[2].text + " " + tds[3].text + " Jockey: " +
                      hor[0].text + " Trainer: " + hor[1].text + "Sire" +
                      siredam[0].text + "dam" + siredam[1].text)
                print("itsthis")
                horseurl = 'http://www.equineline.com/Free5XPedigreeSearchResults.cfm?horse_name=' + tds[
                    3].text + '&page_state=LIST_HITS&foaling_year=&dam_name=&include_sire_line=Y'
                print(horseurl)
                try:
                    horsereq = requests.get(horseurl, headers=headers)
                except:
                    tim.sleep(6)
                    horsereq = requests.get(horseurl, headers=headers)
                soup = bs.BeautifulSoup(horsereq.text, 'lxml')
                h4 = soup.find('h4')
                print(h4)
                if (str(h4) == '<h4><strong>No Matches Found</strong></h4>'):
                    print("Horse doesn't exist in DB")
                    inftab = 'n/a'
                else:
                    #print(soup)
                    try:
                        horsrl = soup.find('a').get('href')
                    except:
                        print("Captcha error")
                        tim.sleep(6)
                        tim.sleep(6)
                        horsereq = requests.get(horseurl, headers=headers)
                        soup = bs.BeautifulSoup(horsereq.text, 'lxml')
                    try:
                        horsrl = soup.find('a').get('href')
                    except:
                        print("-- MENJAJ --")
                        tim.sleep(12)
                        horsereq = requests.get(horseurl, headers=headers)
                        soup = bs.BeautifulSoup(horsereq.text, 'lxml')
                        try:
                            horsrl = soup.find('a').get('href')
                        except:
                            print("-- MENJAJ --")
                            tim.sleep(12)
                            horsereq = requests.get(horseurl, headers=headers)
                            soup = bs.BeautifulSoup(horsereq.text, 'lxml')
                    horsrl = soup.find('a').get('href')
                    url = 'http://www.equineline.com/' + horsrl
                    start = url.find('reference_number=')
                    end = url.find('&registry')
                    refnum = url[start + 17:end]
                    print(refnum)
                    link = 'http://www.equineline.com/Free5XPedigreeNickingDisplay.cfm?page_state=DISPLAY_REPORT&reference_number=' + refnum
                    #print(url)
                    #print(link)
                    try:
                        maker = requests.get(link, headers=headers)
                    except:
                        tim.sleep(6)
                        maker = requests.get(link, headers=headers)
                    supica = bs.BeautifulSoup(maker.text, 'lxml')
                    #print(supica)
                    table = supica.find('table')
                    #print(table)
                    #print(type(table))
                    if (table is None):
                        tim.sleep(6)
                        print("how")
                        maker = requests.get(link, headers=headers)
                        supica = bs.BeautifulSoup(maker.text, 'lxml')
                        table = supica.find('table')
                        if (table is None):
                            print("how")
                            tim.sleep(6)

                            maker = requests.get(link, headers=headers)
                            supica = bs.BeautifulSoup(maker.text, 'lxml')
                            table = supica.find('table')
                    inftab = get_table(table)
                ud = str(uuid.uuid4())
                horsedic = {
                    'P#': tds[2].text,
                    'Name': tds[3].text,
                    'Claim': 'No claim',
                    'Wgt': tds[5].text,
                    'Jockey': hor[0].text,
                    'Trainer': hor[1].text,
                    'Sire': siredam[0].text,
                    'Dam': siredam[1].text,
                    'info': inftab,
                    'uuid': ud,
                }
                print(horsedic)
                horselist.append(horsedic)
            race['horses'] = horselist
            #print(horselist)
    print(events)
    #print(formData)
    #print(formData.get('raceYmd'))
    ##date =  (formData.get('raceYmd'))
    o = Country('4', 'Japan', events, fontic)
    o.save()
    f = open('nippon2.json', 'w')
    jsonero = json.dumps(events)
    f.write(jsonero)
    f.close()