Ejemplos de soup en Python

Lenguaje de programación: Python

Namespace/Package Name: djyptestutils

Método / Función: soup

Ejemplos en hotexamples.com: 17

Python soup - 17 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de djyptestutils.soup extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

def check_user_profile(geocacher):
    url = 'http://www.geocaching.su/profile.php?pid=%s'%geocacher.pid
    loaded = False
    cnter = 0
    fh = open('cant_open_profile.txt', 'w')
    while not loaded and cnter < 100:
        try:
            yplib.get(url)
            loaded = True
        except BrowserStateError:
            cnter += 1

    fh.close()
    if not loaded:
        print('cannot go to %s' % url)
        fh.write(url)
        return False

    soup=yplib.soup()
    tbl = soup.find('table', {'class':'pages'})
    rows = tbl.findAll('tr')
    all_cells = []
    for row in rows:
        cells = row.findAll('th')
        for cell in cells:
            all_cells.append(cell.text.encode('utf8'))

    user = Cacher()
    user.pid = geocacher.pid
    user.uid = get_uid(tbl)
    user.nickname = text_or_none(all_cells[1])
    user.name = text_or_none(all_cells[2])
    user.birstday = strdate_or_none(all_cells[3])
    user.sex = sex_or_none(all_cells[4])
    user.country = text_or_none(all_cells[5])
    user.oblast = text_or_none(all_cells[6])
    user.town = text_or_none(all_cells[7])
    user.phone = text_or_none(all_cells[9])
    user.icq = text_or_none(all_cells[10])
    if user.icq and not user.icq.isdigit():
        user.icq = None
    user.web = text_or_none(all_cells[11])
    gps = text_or_none(all_cells[15])
    user.gps = None#gps[:255].encode if gps else None
    user.created_caches = int_or_none(all_cells[18])
    user.found_caches = int_or_none(all_cells[19])
    user.photo_albums = int_or_none(all_cells[21])
    if len(all_cells) > 23:
        user.register_date = date_or_none(all_cells[-3])
        if user.register_date is None:
            user.register_date = date_or_none(all_cells[-2])
        user.last_login = date_or_none(all_cells[-2])
        user.forum_posts = int_or_none(all_cells[-1])


    geocacher.__dict__.update(user.__dict__)
    print('save', geocacher.pid)
    geocacher.save()

    return True

Ejemplo n.º 2

Mostrar archivo

def main():
    LOAD_ = True
    
    start = time() 
   
    yplib.setUp()
    yplib.set_debugging(False)
    
    
    if LOAD_:
        GeoRUSSubject.objects.all().delete()
        yplib.get('http://ru.wikipedia.org/wiki/%D0%9A%D0%BE%D0%B4%D1%8B_%D1%81%D1%83%D0%B1%D1%8A%D0%B5%D0%BA%D1%82%D0%BE%D0%B2_%D0%A0%D0%BE%D1%81%D1%81%D0%B8%D0%B9%D1%81%D0%BA%D0%BE%D0%B9_%D0%A4%D0%B5%D0%B4%D0%B5%D1%80%D0%B0%D1%86%D0%B8%D0%B8')
        soup=yplib.soup()
        tbl = soup.find('table', {'class': "sortable standard"})
        rows = tbl.findAll('tr')
        for row in rows:
            cells = row.findAll('td')
            print cells
            if cells:
                subject = GeoRUSSubject(country_iso='RU', geoname_id=0)
                cell = cells[0]
                a = cell.find('a')
                if a:
                    subject.name = a.text
                    subject.ascii_name = cells[1].text
                    subject.code = cells[2].text
                    subject.gai_code = cells[3].text
                    subject.iso_3166_2_code = cells[4].text
                    
                    subject.save()

    elapsed = time() - start
    print "Elapsed time -->", elapsed

Ejemplo n.º 3

Mostrar archivo

Archivo: get_UA_admin_sub_from_wiki.py Proyecto: kostin-aleks/gpsfun

def main():
    LOAD_ = True

    start = time()

    yplib.setUp()
    yplib.set_debugging(False)

    if LOAD_:
        GeoUKRSubject.objects.all().delete()
        yplib.get('http://en.wikipedia.org/wiki/ISO_3166-2:UA')
        soup = yplib.soup()
        tbl = soup.find('table', {'class': "wikitable sortable"})
        rows = tbl.findAll('tr')
        for row in rows:
            cells = row.findAll('td')
            if cells:
                subject = GeoUKRSubject(country_iso='UA', geoname_id=0)
                cell = cells[1]
                a = cell.find('a')
                if a:
                    subject.ascii_name = a.text
                    subject.name = ''
                    fullcode = cells[0].text.split('-')
                    subject.code = fullcode[1]

                    subject.save()

    elapsed = time() - start
    print "Elapsed time -->", elapsed

Ejemplo n.º 4

Mostrar archivo

Archivo: update_gcsu_geocachers.py Proyecto: kostin-aleks/gpsfun

def main():
    start = time()

    yplib.setUp()
    yplib.set_debugging(False)

    r = yplib.post2('http://www.geocaching.su/?pn=108',
                    (('Log_In', 'Log_In'), ('email', '*****@*****.**'),
                     ('passwd', 'zaebalixakeryvas'), ('longterm', '1')))

    soup = yplib.soup()

    a = soup.find('a', attrs={'class': "profilelink"}, text='galdor')
    if not a:
        print 'Authorization failed'
        return False

    excluded_id = [118575, 111821, 109578, 96417]

    all_id = []
    for k in range(10):
        r = yplib.post2('http://www.geocaching.su/?pn=108',
                        (('sort', '2'), ('page', str(k)), ('in_page', '1000'),
                         ('updown', '2')))
        soup = yplib.soup()
        a_list = soup.findAll('a', {'class': "profilelink"})
        t = re.compile('\?pid=(\d+)')
        for a in a_list[:-1]:
            if a.get('onclick'):
                user_id = t.findall(a['onclick'])[0]
                #login = a.text.encode('utf8')
                if not (user_id in all_id) and not (user_id in excluded_id):
                    all_id.append(user_id)
        check_id_list(all_id)

    elapsed = time() - start
    print "Elapsed time -->", elapsed
    log('upd_gcsu_cachers', 'OK')

Ejemplo n.º 5

Mostrar archivo

def main():
    if not switch_off_status_updated():
        return False
    
    LOAD_CACHES = True
    
    start = time() 
    
    yplib.setUp()
    yplib.set_debugging(False)

    r = yplib.post2('http://www.geocaching.su/?pn=108',
            (('Log_In','Log_In'), ('email', '*****@*****.**'), ('passwd','zaebalixakeryvas'), ('longterm', '1')))

    soup=yplib.soup()

    a = soup.find('a', attrs={'class':"profilelink"}, text='galdor')
    if not a:
        print 'Authorization failed'
        return False
    
    if LOAD_CACHES:
        #Cach.objects.all().delete()
        cntr_list = []
        t = re.compile('\<td\>(\w\w\d+)\<\/td\>')
        for p in range(120):
            item_list = []
            r = yplib.post2('http://www.geocaching.su/?pn=101',
                (('sort','1'), ('page', str(p)), ('in_page','100'), ('finded','1'), ('y','0'), ('x','0'), ('updown', '1')))
            html = yplib.show()
            code_list = t.findall(html)
            for code in code_list:
                pid = code[2:]
                item_list.append({'id': pid, 'code': code})
            
            if item_list == cntr_list:
                break
            else:
                cntr_list = item_list
                check_cach_list(item_list)
            
    switch_on_status_updated()
    log('gcsu_caches', 'OK')
            
    elapsed = time() - start
    print "Elapsed time -->", elapsed

Ejemplo n.º 6

Mostrar archivo

def main():
    start = time()

    yplib.setUp()
    yplib.set_debugging(False)
    r = yplib.post2('http://www.geocaching.su/?pn=108',
                    (('Log_In', 'Log_In'), ('email', '*****@*****.**'),
                     ('passwd', 'zaebalixakeryvas'), ('longterm', '1')))

    soup = yplib.soup()

    a = soup.find('a', attrs={'class': "profilelink"}, text='galdor')
    if not a:
        print 'Authorization failed'
        return False

    #all_updated = False
    t = re.compile('\<td\>(\w\w\d+)\<\/td\>')

    for p in range(30):
        item_list = []
        r = yplib.post2('http://www.geocaching.su/?pn=101',
                        (('sort', '1'), ('page', str(p)), ('in_page', '1000'),
                         ('finded', '1'), ('y', '0'), ('x', '0'),
                         ('updown', '1')))
        html = yplib.show()
        code_list = t.findall(html)
        for code in code_list:
            pid = code[2:]
            item_list.append({'id': pid, 'code': code})
        print 'count %s' % len(item_list)
        check_cach_list(item_list)

    log('upd_gcsu_caches', 'OK')

    elapsed = time() - start
    print "Elapsed time -->", elapsed

Ejemplo n.º 7

Mostrar archivo

def check_cach(cach_pid):
    def get_coordinates(cell):
        coordinates = cell.text
        parts = t2.findall(coordinates)[0]
        if len(parts) == 4:
            ns_degree, ns_minute, ew_degree, ew_minute = parts
        parts = t3.findall(coordinates)
        NS = parts[0]
        parts = t4.findall(coordinates)
        EW = parts[0]
        
        return ns_degree, ns_minute, ew_degree, ew_minute, NS, EW
    
    def get_type(cell):
        return cell.text
    
    def get_class(cell):
        class_ = None
        if cell:
            parts = cell.contents
            items = []
            for p in parts:
                txt = p.string
                if txt and nottag(txt):
                    items.append(txt)
            class_ = ';'.join(items)
        return class_
    
    def get_mestnost(cell):
        oblast = country = None
        parts = cell.contents
        if len(parts):
            country = parts[0]
        if len(parts) > 2:
            oblast = parts[2]
        return country, oblast 
    
    def get_dostupnost(cell):
        parts = cell.contents
        dostupnost = parts[0].split(':')[1].strip()
        mestnost = parts[2].split(':')[1].strip()
        return dostupnost, mestnost
    
    def get_town(cell):
        return cell.text
        
    def get_grade(cell):
        grade = None
        if cell.img:
            grade = cell.img.get('title')
        return grade
    
    def get_attributes(element):
        attr = None
        items = []
        imgs =  element.findAll('img')
        for img in imgs:
            if 'images/attrib/' in img.get('src'):
                items.append(img.get('title'))
            attr = ';'.join(items)
        return attr 
    
    url = 'http://www.geocaching.su/?pn=101&cid=%d'%int(cach_pid)
    try:
        yplib.get(url)
    except:
        print 'exception'
        return False
    soup=yplib.soup()

    h = soup.find('h1', {'class':'hdr'})
    t = re.compile('([^\[]+)\[.+\]')
    t1 = re.compile('[^\[]+\[([^\[\]]+\/[^\[\]]+)\]')
    t2 = re.compile('[N,S]\s(\d+)\&\#176\;\s([\d\.]+).+[E,W]\s(\d+)\&\#176\;\s([\d\.]+)')
    t3 = re.compile('([N,S]\s\d+\&\#176\;\s[\d\.]+.)')
    t4 = re.compile('([E,W]\s\d+\&\#176\;\s[\d\.]+.)')
    t5 = re.compile('WinPopup\(\'profile\.php\?pid\=(\d+)')
    
    name = None
    items = t.findall(h.text)
    if items:
        name = items[0]
    full_code = None
    items = t1.findall(h.text)
    if items:
        full_code = items[0]
        type_code, pid = full_code.split('/')
    
    tbl = soup.find('table', attrs={'cellpadding':3, 'width':160})
    rows = tbl.findAll('tr')
    
    ns_degree = ns_minute = ew_degree = ew_minute = NS = EW = None
    country = oblast = town = None
    dostupnost = mestnost = None
    cach_type = cach_class = None
    grade = attr = None
    
    act = None
    for row in rows:
        tds = row.findAll('td')
        ths = row.findAll('th')
        td = None
        if tds:
            td = tds[0]
        
        cell = None
        if act:
            if ths:
                cell = ths[0]
            elif tds:
                cell = tds[1]
            if act == 'coord':
                ns_degree, ns_minute, ew_degree, ew_minute, NS, EW = get_coordinates(cell)
            if act == 'mestnost':
                country, oblast = get_mestnost(cell)
            if act == 'dostupnost':
                dostupnost, mestnost = get_dostupnost(cell)
            if act == 'town':
                town = get_town(cell)
            if act == 'grade':
                grade = get_grade(cell)
            act = None
        
        if td and td.text.startswith(u'Тип:'):
            cach_type = get_type(tds[1])
            act = None
        if td and td.text.startswith(u'Класс:'):
            cach_class = get_class(tds[1])
            act = None
        if td and td.text.startswith(u'КООРДИНАТЫ'):
            act = 'coord'
        if td and td.text.startswith(u'МЕСТНОСТЬ'):
            act = 'mestnost'
        if td and td.text.startswith(u'БЛИЖАЙШИЙ'):
            act = 'town'
        if td and td.text.startswith(u'ОЦЕНКИ'):
            act = 'dostupnost'
        if td and td.text.startswith(u'РЕЙТИНГ'):
            act = 'grade'
        if td and td.text.startswith(u'АТРИБУТЫ'):
            attr = get_attributes(tbl)
            act = None
    
    created_by = created_date = changed_date = coauthors = None
    div = soup.findAll('div', attrs={'style':'padding: 5px; font-family: Verdana; font-weight: bold;'})[0]
    a = div.a
    if a:
        onclick = a.get('onclick')
        if onclick:
            pid = t5.findall(onclick)
            if pid:
                created_by = int(pid[0])

    parts = div.contents
    for p in parts:
        txt = p.string
        #if txt:
            #print txt.encode('utf8'), type(txt)
            
        if txt and nottag(txt):
            txt = txt.string.strip()
            if txt.startswith(u'Создан:'):
                items = txt.split()
                if len(items) == 2:
                    created_date = items[1]
                    if created_date:
                        day, month, year = [int(s) for s in created_date.split('.')]
                    created_date = date(year, month, day)

            if txt.startswith(u'(отредактирован'):
                txt = txt[1:-1]
                items = txt.split()
                if len(items) == 2:
                    changed_date = items[1]
                    if changed_date:
                        day, month, year = [int(s) for s in changed_date.split('.')]
                    changed_date = date(year, month, day)

            if txt.startswith(u'Компаньоны:'):
                coauthors = 'yes'
                
    the_cach = TheCach()
    the_cach.pid = cach_pid
    the_cach.code = '%s%s' % (type_code, the_cach.pid)
    the_cach.type_code = type_code
    #print    
    #print cach.pid
    #print '|%s|'%the_cach.code.encode('utf8')
    the_cach.name = text_or_none(name)
    the_cach.cach_type = text_or_none(cach_type)
    the_cach.cach_class = text_or_none(cach_class)
    the_cach.loc_NS = char_or_none(NS)
    the_cach.loc_EW = char_or_none(EW)
    the_cach.loc_NS_degree = int_or_none(ns_degree)
    the_cach.loc_EW_degree = int_or_none(ew_degree)
    the_cach.loc_NS_minute = float_or_none(ns_minute)
    the_cach.loc_EW_minute = float_or_none(ew_minute)
    the_cach.country = text_or_none(country)
    the_cach.oblast = text_or_none(oblast)
    the_cach.town = text_or_none(town)
    the_cach.dostupnost = int_or_none(dostupnost)
    the_cach.mestnost = int_or_none(mestnost)
    the_cach.grade = float_or_none(grade)
    the_cach.cach_attr = text_or_none(attr)
    the_cach.created_by = created_by
    the_cach.created_date = created_date
    the_cach.changed_date = changed_date
    the_cach.coauthors = coauthors
    
    print the_cach.name.encode('utf8')
    geocache = get_object_or_none(Cach, pid=cach_pid)
    if geocache is not None:
        update_geocache(geocache, the_cach)
    else:
        cach = Cach.objects.create(pid=cach_pid)
        cach.__dict__.update(the_cach.__dict__)
        print 'save', cach.pid
        cach.save()
        #nc += 1
    #if True:
        #cach.__dict__.update(the_cach.__dict__)
        #print 'save', cach.pid
        #cach.save()
    
    return True

Ejemplo n.º 8

Mostrar archivo

def main():
    if not switch_off_status_updated():
        return False

    LOAD_GEOCACHERS = False
    LOAD_ABSENT_GEOCACHERS = False

    start = time()

    cursor = connection.cursor()

    cursor.execute('select * from geocacher')

    yplib.setUp()
    yplib.set_debugging(False)


    r = yplib.post2('http://www.geocaching.su/?pn=108',
            (('Log_In','Log_In'), ('email', '*****@*****.**'), ('passwd','zaebalixakeryvas'), ('longterm', '1')))

    soup=yplib.soup()

    a = soup.find('a', attrs={'class':"profilelink"}, text='galdor')
    if not a:
        print('Authorization failed')
        return False

    if LOAD_GEOCACHERS:
        Geocacher.objects.all().delete()
        cntr_list = []
        all_id = []
        for p in range(2500):
            print('page', p + 1)
            #if p < 0:
                #continue

            user_list = []
            r = yplib.post2('http://www.geocaching.su/?pn=108',
                (('sort','1'), ('page', str(p)),
                 ('in_page','100'), ('updown', '1')))
            soup=yplib.soup()
            a_list = soup.findAll('a', {'class':"profilelink"})
            t = re.compile('\?pid=(\d+)')
            for a in a_list[:-1]:
                if a.get('onclick'):
                    #print p.findall(a['onclick']), a.text.encode('utf8')
                    user_id = t.findall(a['onclick'])[0]
                    login = a.text.encode('utf8')
                    if not (user_id in all_id):
                        user_list.append({'id': user_id, 'login': login})
                        all_id.append(user_id)
            #user_list = user_list[:-1]
            if user_list == cntr_list:
                break
            else:
                cntr_list = user_list
                #print len(user_list)
                #return
                check_id_list(user_list)
                #break
                #check_id_list([{'id': 15957, 'login': u'Кривич'}])
            #break

    if LOAD_ABSENT_GEOCACHERS:
        pid_list = (469, 406, 1224, 4400, 11910,  4456, 13439,  7707, 8887, 3156, 8094)
        user_list = [{'id': pid, 'login': u''} for pid in pid_list]

        check_id_list(user_list)

    elapsed = time() - start
    print("Elapsed time -->", elapsed)
    switch_on_status_updated()
    log('gcsu_geocachers', 'OK')

Ejemplo n.º 9

Mostrar archivo

def geocacher_format_insert_string(pid):
    # try open profile
    fields = str(pid)
    url = 'http://www.geocaching.su/profile.php?pid={}'.format(pid)
    loaded = False
    cnter = 0
    fh = open('cant_open_profile.txt', 'w')

    while not loaded and cnter < 100:
        try:
            yplib.get(url)
            loaded = True
        except BrowserStateError:
            cnter += 1

    if not loaded:
        print('cannot go to %s' % url)
        fh.write(url)
        return False

    fh.close()

    # processing profile
    soup=yplib.soup()
    tbl = soup.find('table', {'class':'pages'})
    rows = tbl.findAll('tr')
    all_cells = []
    theuser = {}
    for row in rows:
        cells = row.findAll('th')
        for cell in cells:
            all_cells.append(cell.text.encode('utf8'))
        title_cells = row.findAll('td')
        data_cells = row.findAll('th')
        if len(title_cells) == 1:
            title_cell = title_cells[0]
            title = title_cell.text
            data = ''
            if len(data_cells):
                data_cell = data_cells[-1]
                data = data_cell.text
            if title.startswith(u'Псевдоним:'):
                theuser['nickname'] = data
                continue
            if title.startswith(u'Страна:'):
                theuser['country'] = data
                continue
            if title.startswith(u'Область:'):
                theuser['oblast'] = data
                continue
            if title.startswith(u'Нас.пункт'):
                theuser['town'] = data
                continue
            if title.startswith(u'Создал тайников:'):
                theuser['created'] = data
                continue
            if title.startswith(u'Нашел тайников:'):
                theuser['found'] = data
                continue
            if title.startswith(u'Рекомендовал тайников:'):
                theuser['recommended'] = data
                continue
            if title.startswith(u'Фотоальбомы:'):
                theuser['photo_albums'] = data
                continue
            if title.startswith(u'Был на сайте'):
                theuser['last_visited'] = data
                continue
            if title.startswith(u'Дата регистрации:'):
                theuser['registered'] = data
                continue
            if title.startswith(u'Сообщений в форумах:'):
                theuser['forum_posts'] = data

    #print theuser

    uid = get_uid(tbl)
    fields += ',{}'.format(int_field(uid))  #uid
    # pid uid nickname name birstday sex country oblast town phone icq web created_caches found_caches photo_albums register_date last_login forum_posts
    fields += ',{}'.format(text_field(theuser.get('nickname') or ''))  #nickname
    fields += ',{}'.format(text_field(all_cells[2]))  #name
    fields += ',{}'.format(date_field(all_cells[3]))  #birstday
    fields += ',{}'.format(sex_field(all_cells[4]))   #sex
    fields += ',{}'.format(text_field(theuser.get('country') or ''))  #country
    fields += ',{}'.format(text_field(theuser.get('oblast') or ''))  #oblast

    fields += ',{}'.format(text_field(theuser.get('town') or ''))  #town
    fields += ',{}'.format(text_field(all_cells[9]))  #phone

    fields += ',{}'.format(int_field(theuser.get('created') or 0))  #created_caches
    fields += ',{}'.format(int_field(theuser.get('found') or 0))  #found_caches
    fields += ',{}'.format(int_field(theuser.get('photo_albums') or 0))  #photo_albums
    #register_date = None
    #last_login = None
    #forum_posts = None
    #if len(all_cells) > 23:
        #register_date = date_or_none(all_cells[-3])
        #if register_date is None:
            #register_date = date_or_none(all_cells[-2])
        #last_login = date_or_none(all_cells[-2])
        #forum_posts = int_or_none(all_cells[-1])
    #import pdb; pdb.set_trace()
    fields += ',{}'.format(date_field(theuser.get('registered') or ''))  #register_date
    fields += ',{}'.format(date_field(theuser.get('last_visited') or ''))     #last_login
    fields += ',{}'.format(int_field(theuser.get('forum_posts') or 0))     #forum_posts

    return "({})".format(fields).replace('%', '%%')

Ejemplo n.º 10

Mostrar archivo

def main():
    LOAD_CACHES = True

    start = time()

    yplib.setUp()
    yplib.set_debugging(False)

    # log in
    r = yplib.post2('http://opencaching.pl/login.php',
                    (('LogMeIn', 'zaloguj'), ('email', 'kurianin'),
                     ('password', 'gjhjkjy'), ('action', 'login'),
                     ('target', 'index.php')))

    soup = yplib.soup()

    a = soup.find('a', text='kurianin')
    if not a:
        print 'Authorization failed'
        return False
    print 'OK'

    ## search page
    #r = yplib.get('http://opencaching.pl/search.php')
    #soup = yplib.soup()

    # get wpt file
    r = yplib.get(
        'http://opencaching.pl/search.php?searchto=searchbyname&showresult=1&expert=0&output=HTML&sort=bycreated&f_inactive=1&f_ignored=1&f_userfound=1&f_userowner=1&f_watched=0&f_geokret=0&country=PL&region=&cachetype=1111111110&cache_attribs=&cache_attribs_not=&cachesize_1=1&cachesize_2=1&cachesize_3=1&cachesize_4=1&cachesize_5=1&cachesize_6=1&cachesize_7=1&cachevote_1=-3&cachevote_2=3.000&cachenovote=1&cachedifficulty_1=1&cachedifficulty_2=5&cacheterrain_1=1&cacheterrain_2=5&cacherating=0&cachename=%25&cachename='
    )
    soup = yplib.soup(cp='utf8')
    link_to_wpt = ''

    #the_div = soup.find('div', {'class':"content2-pagetitle"})

    wpt_link = re.compile('ocpl\d+\.wpt\?.+count\=max.*')
    a_list = soup.findAll('a', {'class': "links", 'title': "Oziexplorer .wpt"})
    if a_list:
        for a in a_list:
            if a.get('href') and wpt_link.match(a.get('href')):
                link_to_wpt = a.get('href')
                break
    print link_to_wpt

    if link_to_wpt:
        r = yplib.get(link_to_wpt)
        soup = yplib.soup(cp='utf8')
        wpt = soup.text.split('\n')
    else:
        print 'oblom'
        return

    WPT_CODE = 10
    WPT_LAT = 2
    WPT_LON = 3
    WPT_TITLE = 1
    WPT_DATE = 4
    MY_CONSUMER_KEY = 'fky3LF9xvWz9y7Gs3tZ6'
    FIELDS = 'code|name|location|type|status|url|owner|date_created'
    geocach_api_request = 'http://opencaching.pl/okapi/services/caches/geocache?cache_code=%s&consumer_key=%s&fields=%s'

    geosite = Geosite.objects.get(code='OCPL')
    print geosite
    print len(wpt), 'points'
    k = 0
    uc = 0
    nc = 0
    for point in wpt:
        k += 1
        fields = point.split(',')
        if fields[0] == '-1':
            the_geothing = TheGeothing()
            the_geothing.pid = 1
            the_location = TheLocation()

            lat_degree = float(fields[WPT_LAT])
            the_location.NS_degree = lat_degree
            #the_location.NS_minute = (abs(lat_degree) - abs(the_location.NS_degree)) * 60
            lon_degree = float(fields[WPT_LON])
            the_location.EW_degree = lon_degree
            #the_location.EW_minute = (abs(lon_degree) - abs(the_location.EW_degree)) * 60

            code_str = fields[WPT_CODE]
            parts = code_str.split('/')
            if len(parts) == 4:
                cache_code = parts[0]
                the_geothing.code = cache_code
                the_geothing.name = fields[WPT_TITLE]
                geothing_items = Geothing.objects.filter(
                    code=the_geothing.code, geosite=geosite)
                if geothing_items.count() > 0:
                    geothing = geothing_items[0]
                    if the_geothing.name == geothing.name and not location_was_changed(
                            geothing.location, the_location):
                        continue

                url = geocach_api_request % (cache_code, MY_CONSUMER_KEY,
                                             FIELDS)
                try:
                    response = urllib2.urlopen(url)
                    json_str = response.read()
                    cache_data = json.loads(json_str)
                    if cache_data.get('status') != 'Available':
                        continue
                    #print cache_data.get('type')
                    the_geothing.type_code = OCPL_TYPES.get(
                        cache_data.get('type'))
                    #print the_geothing.type_code
                    cache_url = cache_data.get('url')
                    if not cache_url:
                        continue
                    p = re.compile(u'OP([\dA-F]+)$')
                    dgs = p.findall(cache_url)
                    the_geothing.pid = int(dgs[0], 16)
                    owner_name = ''
                    if cache_data.get('owner'):
                        owner_name = cache_data.get('owner').get('username')
                    the_geothing.author = owner_name

                    date_created = cache_data.get('date_created')
                    if date_created:
                        date_created = date_created[:10]
                        parts = date_created.split('-')
                        if parts and len(parts) == 3:
                            dt = datetime(int(parts[0]), int(parts[1]),
                                          int(parts[2]))
                            the_geothing.created_date = dt

                except:
                    print
                    print 'exception.'
                    print url
                    print cache_data
                    #break
                    continue

            if the_geothing.type_code in GEOCACHING_ONMAP_TYPES:
                geothing = get_object_or_none(Geothing,
                                              pid=the_geothing.pid,
                                              geosite=geosite)
                if geothing is not None:
                    update_geothing(geothing, the_geothing, the_location)
                    uc += 1
                else:
                    create_new_geothing(the_geothing, the_location, geosite)
                    nc += 1
            #break

    sql = """
    select COUNT(*)  
    FROM
    (
    select g.code as code, count(id) as cnt 
    from geothing g 
    group by g.code
    having cnt > 1
    ) as tbl 
    """
    dc = sql2val(sql)
    message = 'OK. updated %s, new %s, doubles %s' % (uc, nc, dc)
    log('map_ocpl_caches', message)
    elapsed = time() - start
    print "Elapsed time -->", elapsed

Ejemplo n.º 11

Mostrar archivo

def main():
    #if not switch_off_status_updated():
    #return False

    LOAD_CACHES = True
    LOAD_GEO_LOCATION = False

    start = time()

    yplib.setUp()
    yplib.set_debugging(False)

    r = yplib.post2('http://www.geocaching.su/?pn=108',
                    (('Log_In', 'Log_In'), ('email', '*****@*****.**'),
                     ('passwd', 'zaebalixakeryvas'), ('longterm', '1')))

    soup = yplib.soup()

    a = soup.find('a', attrs={'class': "profilelink"}, text='galdor')
    if not a:
        print 'Authorization failed'
        return False
    print 'OK'

    if LOAD_CACHES:

        r = yplib.get('http://www.geocaching.su/site/popup/selex.php')
        soup = yplib.soup()
        #print soup
        #html = yplib.show()

        chbox_list = soup.findAll('input', type='checkbox')
        regions = []
        #print chbox_list
        print
        for chbox in chbox_list:
            #print chbox.get('value')
            v = chbox.get('value')
            if v and chbox.get('name', '') == 'point[]':
                regions.append(v)
        print
        print regions
        data = [
            ('translit', '0'),
            ('fmt', 'wpt'),
            ('code_to_name', '1'),
            ('finded', '2'),
        ]
        for r in regions:
            data.append(('point[]', r))
        print
        print data
        print
        r = yplib.post2('http://www.geocaching.su/site/popup/export.php', data)
        soup = yplib.soup()
        txt = soup.text
        print txt
        return

        Cach.objects.all().delete()
        cntr_list = []
        t = re.compile('\<td\>(\w\w\d+)\<\/td\>')
        for p in range(100):
            item_list = []
            r = yplib.post2('http://www.geocaching.su/?pn=101',
                            (('sort', '1'), ('page', str(p)),
                             ('in_page', '100'), ('finded', '1'), ('y', '0'),
                             ('x', '0'), ('updown', '1')))
            html = yplib.show()
            code_list = t.findall(html)
            for code in code_list:
                pid = code[2:]
                item_list.append({'id': pid, 'code': code})

            if item_list == cntr_list:
                break
            else:
                cntr_list = item_list
                check_cach_list(item_list)
                #check_cach_list([{'id': 2746, 'code': 'EX2746'}])
            #break
    if LOAD_GEO_LOCATION:
        #.filter(pid=5408)
        for cach in Cach.objects.all():
            lat = cach.latitude_degree
            lng = cach.longitude_degree

            if lat is not None and lng is not None:
                url = 'http://ws.geonames.org/countrySubdivision?lat=%s&lng=%s&lang=ru' % (
                    lat, lng)
                print
                print cach.pid, url
                yplib.get(url)
                try:
                    soup = yplib.soup()
                except:
                    url = 'http://ws.geonames.org/countrySubdivision?lat=%s&lng=%s&lang=en' % (
                        lat, lng)
                    yplib.get(url)
                    soup = yplib.soup()
                item = soup.find('countrycode')
                if item:
                    cach.country_code = item.text.encode('utf8')

                if soup.admincode1:
                    cach.admin_code = soup.admincode1.text
                item = soup.find('code', {'type': 'FIPS10-4'})
                if item:
                    cach.code_fips10_4 = item.text
                item = soup.find('code', {'type': 'ISO3166-2'})
                if item:
                    cach.code_iso3166_2 = item.text
                item = soup.find('countryname')
                if item:
                    cach.country_name = item.text.encode('cp1251')
                if soup.adminname1:
                    cach.oblast_name = soup.adminname1.text.encode('cp1251')
                print cach.pid, cach.country_name, cach.oblast_name
                #print soup
                #print
                #print cach.pid
                cach.save()
            else:
                print cach.pid, lat, lng, cach.loc_NS, cach.loc_NS_degree, cach.loc_NS_minute, cach.loc_EW, cach.loc_EW_degree, cach.loc_EW_minute

            switch_on_status_updated()
            log('gcsu_caches', 'OK')

    elapsed = time() - start
    print "Elapsed time -->", elapsed

Ejemplo n.º 12

Mostrar archivo

def main():
    if not switch_off_status_updated():
        return False

    LOAD_CREATED_CACHE_LOGS = False
    LOAD_SEEK_CACHE_LOGS = False
    LOAD_RECOMMEND_CACHE_LOGS = False
    LOAD_PHOTOALBUM_LOGS = False

    start = time()

    yplib.setUp()
    yplib.set_debugging(False)


    r = yplib.post2('http://www.geocaching.su/?pn=108',
            (('Log_In', 'Log_In'), ('email', '*****@*****.**'),
             ('passwd', 'zaebalixakeryvas'), ('longterm', '1')))

    soup = yplib.soup()

    a = soup.find('a', attrs={'class': "profilelink"}, text='galdor')
    if not a:
        print 'Authorization failed'
        return False
    print
    print 'BEGIN'
    if LOAD_CREATED_CACHE_LOGS:
        LogCreateCach.objects.all().delete()
        print 'delete create logs'
        cachers = Geocacher.objects.all()
        print cachers.count()
        t = re.compile('\?pn\=101\&cid=(\d+)')
        t1 = re.compile(u'создан\s+(\d\d\.\d\d\.\d\d\d\d)')
        for cacher in cachers:
            if cacher.uid:
                print cacher.pid, cacher.uid
                url = 'http://www.geocaching.su/site/popup/userstat.php?s=1&uid=%s' % cacher.uid
                try:
                    yplib.get(url)
                except BrowserStateError:
                    continue
                soup = yplib.soup()
                tbl = soup.find('table', attrs={'class': 'pages'})
                if tbl:
                    #print tbl
                    rows = tbl.findAll('tr')
                    #print len(rows)
                    for row in rows:
                        cach_pid = created_date = None
                        coauthor = False
                        cell = row.find('td')
                        if cell:
                            #print cell
                            a_list = cell.findAll('a')
                            for a in a_list:
                                cach_pid = None
                                parts = t.findall(a['href'])
                                if len(parts):
                                    cach_pid = int(parts[0])

                            txt = cell.text
                            print cacher.pid, cach_pid, txt.encode('utf8')
                            if u'(соавтор)' in txt:
                                coauthor = True
                            found = t1.findall(txt)
                            if found:
                                created_date = found[0]
                                created_date = date_or_none(created_date)
                            if cach_pid:
                                the_log = LogCreateCach(
                                    author_pid=cacher.pid,
                                    cach_pid=cach_pid)
                                the_log.created_date = created_date
                                the_log.coauthor = coauthor
                                the_log.save()
                                print 'saved'

    if LOAD_SEEK_CACHE_LOGS:
        LogSeekCach.objects.all().delete()
        cachers = Geocacher.objects.all()
        t = re.compile('\?pn\=101\&cid=(\d+)')
        t1 = re.compile(u'создан\s+(\d\d\.\d\d\.\d\d\d\d)')
        t2 = re.compile(u'найден\s+(\d\d\.\d\d\.\d\d\d\d)')
        t3 = re.compile(u'оценен\s+на\s+(\d)')

        fh = open('cant_open_userstat.txt', 'w')
        for cacher in cachers:
            if cacher.uid:
                print cacher.pid, cacher.uid
                url = 'http://www.geocaching.su/site/popup/userstat.php?s=2&uid=%s' % cacher.uid

                loaded = False
                cnter = 0

                while not loaded and cnter < 100:
                    try:
                        yplib.get(url)
                        soup = yplib.soup()
                        loaded = True
                    except BrowserStateError:
                        cnter += 1
                if not loaded:
                    print 'cannot go to %s' % url
                    fh.write(url)

                tbl = soup.find('table', attrs={'class': 'pages'})
                if tbl:
                    rows = tbl.findAll('tr')
                    for row in rows:
                        cach_pid = found_date = grade = None
                        cell = row.find('td')
                        if cell:
                            a_list = cell.findAll('a')
                            for a in a_list:
                                cach_pid = None
                                parts = t.findall(a['href'])
                                if len(parts):
                                    cach_pid = int(parts[0])

                            txt = cell.text
                            found = t3.findall(txt)
                            if found:
                                g = found[0]
                                grade = int_or_none(g)
                            print cacher.pid, cach_pid, txt.encode('utf8')
                            found = t2.findall(txt)
                            if found:
                                found_date = found[0]
                                found_date = date_or_none(found_date)
                            if cach_pid:
                                the_log = LogSeekCach(
                                    cacher_pid=cacher.pid,
                                    cach_pid=cach_pid)
                                the_log.found_date = found_date
                                the_log.grade = grade
                                the_log.save()
                                print 'saved'
        fh.close()

    if LOAD_RECOMMEND_CACHE_LOGS:
        LogRecommendCach.objects.all().delete()
        cachers = Geocacher.objects.all()
        t = re.compile('\?pn\=101\&cid=(\d+)')

        for cacher in cachers:
            if cacher.uid:
                print cacher.pid, cacher.uid
                url = 'http://www.geocaching.su/site/popup/userstat.php?s=3&uid=%s' % cacher.uid
                yplib.get(url)
                soup = yplib.soup()
                tbl = soup.find('table', attrs={'class': 'pages'})
                if tbl:
                    rows = tbl.findAll('tr')
                    for row in rows:
                        cach_pid = found_date = grade = None
                        cell = row.find('td')
                        if cell:
                            a_list = cell.findAll('a')
                            for a in a_list:
                                cach_pid = None
                                parts = t.findall(a['href'])
                                if len(parts):
                                    cach_pid = int(parts[0])

                            txt = cell.text
                            print cacher.pid, cach_pid, txt.encode('utf8')
                            if cach_pid:
                                the_log = LogRecommendCach(
                                    cacher_pid=cacher.pid,
                                    cach_pid=cach_pid)
                                the_log.save()
                                print 'saved'

    if LOAD_PHOTOALBUM_LOGS:
        LogPhotoAlbum.objects.all().delete()
        cachers = Geocacher.objects.all()
        t = re.compile('showmemphotos\.php\?cid=(\d+)')

        for cacher in cachers:
            if cacher.uid:
                print cacher.pid, cacher.uid
                url = 'http://www.geocaching.su/site/popup/userstat.php?s=4&uid=%s' % cacher.uid
                yplib.get(url)
                soup = yplib.soup()
                tbl = soup.find('table', attrs={'class': 'pages'})
                if tbl:
                    rows = tbl.findAll('tr')
                    for row in rows:
                        cach_pid = found_date = grade = None
                        cell = row.find('td')
                        if cell:
                            a_list = cell.findAll('a')
                            for a in a_list:
                                cach_pid = None
                                parts = t.findall(a['href'])
                                if len(parts):
                                    cach_pid = int(parts[0])

                            txt = cell.text
                            print cacher.pid, cach_pid, txt.encode('utf8')
                            if cach_pid:
                                the_log = LogPhotoAlbum(
                                    cacher_pid=cacher.pid, cach_pid=cach_pid)
                                the_log.save()
                                print 'saved'

    elapsed = time() - start
    print "Elapsed time -->", elapsed
    switch_on_status_updated()
    log('gcsu_logs', 'OK')

Ejemplo n.º 13

Mostrar archivo

Archivo: set_geothing_location.py Proyecto: kostin-aleks/gpsfun

def main(processed_pid):
    LOAD_GEO_LOCATION = True
    LOAD_GOOGLE_LOCATION = True
    
    start = time() 
   
    if LOAD_GEO_LOCATION:
        for thing in Geothing.objects.all().extra(where=["country_code IS NULL OR admin_code IS NULL OR admin_code='777'"]).order_by('pid')[:100]:
            lat = thing.latitude_degree
            lng = thing.longitude_degree

            if lat is not None and lng is not None:
                cnt = 1
                r = 10
                admin_code = None
                while cnt < 2:
                    url = 'http://api.geonames.org/countrySubdivision?username=galdor&lat=%s&lng=%s&lang=en&radius=%d' % (lat, lng, r*cnt)
                    yplib.get(url)
                    try:
                        soup=yplib.soup()
                    except:
                        pass
                    if soup:
                        item = soup.find('countrysubdivision')
                        if item:
                            if soup.admincode1:
                                admin_code = soup.admincode1.text

                    if admin_code:
                        break
                    cnt += 1

                item = soup.find('countrycode')
                if item and item.text:
                    thing.country_code = item.text.encode('utf8')

                if soup.admincode1:
                    thing.admin_code = soup.admincode1.text

                item = soup.find('countryname')
                if item:
                    thing.country_name = item.text
                if soup.adminname1:
                    thing.oblast_name = soup.adminname1.text

                if thing.country_code and len(thing.country_code)==2:
                    thing.save()
            else:
                print 'no location', thing.pid, lat, lng, thing.location.NS, thing.location.NS_degree, thing.location.NS_minute, thing.location.EW, thing.location.EW_degree, thing.loc_EW_minute

    if LOAD_GOOGLE_LOCATION:
        for thing in Geothing.objects.all().extra(where=["country_code IS NULL OR country_name IS NULL OR admin_code IS NULL OR admin_code='777'"]).order_by('pid')[:100]:
            lat = thing.latitude_degree
            lng = thing.longitude_degree
            if lat is not None and lng is not None:
                admin_name = None
                country_code = None
                country_name = None
                admin_code = None
                url = 'http://maps.googleapis.com/maps/api/geocode/json?latlng=%s,%s&sensor=false' % (lat, lng)
                f = urllib2.urlopen(url)
                data = f.read()
                try:
                    r = json.loads(data)
                except Exception as e:
                    print type(e)
                    print e
                if r.get('status') == 'OK' and len(r.get('results')):
                    for result in r.get('results'):
                        if len(result.get('address_components')):
                            for address in  result.get('address_components'):
                                types = address.get("types")
                                if "country" in types and "political" in types:
                                    country_code = address.get("short_name")
                                if "administrative_area_level_1" in types and "political" in types:
                                    admin_name = address.get("short_name")
                                    if len(admin_name) < 6:
                                        admin_name = address.get("long_name")

                if country_code:
                    thing.country_code = country_code
                    thing.oblast = admin_name
                    thing.admin_code = get_admin_code_by_name(country_code, admin_name)
                    thing.save()
                else:
                    print  lat, lng, country_code, country_name, admin_name
            else:
                print thing.pid, lat, lng, thing.location.NS, thing.location.NS_degree, thing.location.NS_minute, thing.location.EW, thing.location.EW_degree, thing.loc_EW_minute

    sql = """
    UPDATE geothing gt
    LEFT JOIN oblast_subject os ON (
        gt.country_code=os.country_iso and gt.oblast=os.oblast
        )
    SET gt.admin_code=os.code
    WHERE os.id IS NOT NULL
    """
    r = execute_query(sql)
    
    sql = """
    UPDATE geothing 
    SET admin_code='777', 
    oblast_name='undefined subject' 
    WHERE country_code IS NOT NULL AND admin_code IS NULL
    """
    r = execute_query(sql)
    
    sql = """
    update geothing gt 
    left join geo_country c on gt.country_code=c.iso 
    set gt.country_name=c.name
    """
    r = execute_query(sql)
    
    sql = """
    update geothing gt 
    left join geo_country_subject c on gt.admin_code=c.code and gt.country_code=c.country_iso 
    set gt.oblast_name=c.name
    where gt.admin_code='777'
     """
    r = execute_query(sql)
    
    sql = """
    update geothing
    set country_code='RU',
    admin_code='82',
    country = 'Россия',
    oblast = 'Республика Крым',
    country_name = 'Russia',
    oblast_name = 'Respublika Krym'
    where country_code='UA' and admin_code='11'
     """
    r = execute_query(sql)

    sql = """SELECT COUNT(*) FROM geothing WHERE country_code IS NULL"""
    undefined_country_count = sql2val(sql)
    sql = """SELECT COUNT(*) FROM geothing WHERE admin_code IS NULL OR admin_code = '777'"""
    undefined_subject_count = sql2val(sql)
    undefined_count = '%s/%s' % (undefined_country_count, undefined_subject_count)
    
    log('map_set_location', 'OK %s'%undefined_count)
    elapsed = time() - start
    print "Elapsed time -->", elapsed

Ejemplo n.º 14

Mostrar archivo

def main():
    LOAD_CACHES = True

    start = time()

    yplib.setUp()
    yplib.set_debugging(False)

    r = yplib.post2('http://www.geocaching.su/?pn=108',
                    (('Log_In', 'Log_In'), ('email', '*****@*****.**'),
                     ('passwd', 'zaebalixakeryvas'), ('longterm', '1')))

    soup = yplib.soup()

    a = soup.find('a', attrs={'class': "profilelink"}, text='galdor')
    if not a:
        print 'Authorization failed'
        return False

    r = yplib.get('http://www.geocaching.su/site/popup/selex.php')
    soup = yplib.soup()

    chbox_list = soup.findAll('input', type='checkbox')
    regions = []

    for chbox in chbox_list:
        v = chbox.get('value')
        if v and chbox.get('name', '') == 'point[]':
            regions.append(v)

    data = [
        ('translit', '0'),
        ('fmt', 'wpt'),
        ('code_to_name', '1'),
        ('finded', '2'),
    ]
    for r in regions:
        data.append(('point[]', r))

    r = yplib.post2('http://www.geocaching.su/site/popup/export.php', data)
    soup = yplib.soup()
    wpt = soup.text.split('\n')

    WPT_CODE = 1
    WPT_LAT = 2
    WPT_LON = 3
    WPT_TITLE = 10
    WPT_DATE = 4

    geosite = Geosite.objects.get(code='GC_SU')

    print len(wpt), 'points'
    k = 0
    for point in wpt:
        k += 1
        fields = point.split(',')
        if fields[0].isdigit():
            the_geothing = TheGeothing()
            the_location = TheLocation()

            lat_degree = float(fields[WPT_LAT])
            the_location.NS_degree = lat_degree
            #the_location.NS_minute = (abs(lat_degree) - abs(the_location.NS_degree)) * 60
            lon_degree = float(fields[WPT_LON])
            the_location.EW_degree = lon_degree
            #the_location.EW_minute = (abs(lon_degree) - abs(the_location.EW_degree)) * 60

            p = re.compile('(\D+)(\d+)')
            dgs = p.findall(fields[WPT_CODE])
            if dgs:
                code_data = dgs[0]
                the_geothing.code = fields[WPT_CODE]
                the_geothing.pid = int(code_data[1])
                the_geothing.type_code = code_data[0]

            p = re.compile(u'(.+)от(.+)')
            dgs = p.findall(fields[WPT_TITLE])
            if dgs:
                title = dgs[0]
                the_geothing.name = title[0]
                the_geothing.author = title[1]

            d = float(fields[WPT_DATE])

            the_geothing.created_date = Dephi_date_to_python_date(d)

            if the_geothing.type_code in GEOCACHING_ONMAP_TYPES:
                geothing = get_object_or_none(Geothing,
                                              pid=the_geothing.pid,
                                              geosite=geosite)
                if geothing is not None:
                    update_geothing(geothing, the_geothing, the_location)
                else:
                    create_new_geothing(the_geothing, the_location, geosite)

    log('map_gcsu_caches', 'OK')
    elapsed = time() - start
    print "Elapsed time -->", elapsed

Ejemplo n.º 15

Mostrar archivo

Archivo: set_cach_location.py Proyecto: kostin-aleks/gpsfun

def main(processed_pid):
    #if not switch_off_status_updated():
        #return False
    
    LOAD_GEO_LOCATION = True
    
    start = time() 
   
    if LOAD_GEO_LOCATION:
        #.filter(pid=5408)
        #for cach in Cach.objects.all().filter(pid__gt=processed_pid).order_by('pid')[:1990]:
        for cach in Cach.objects.all().extra(where=["country_code IS NULL OR admin_code IS NULL OR admin_code='777'"]).order_by('pid')[:1000]:
            lat = cach.latitude_degree
            lng = cach.longitude_degree
            
            if lat is not None and lng is not None:
                d = ((0,0), (0.01,0), (-0.01,0), (0,0.01), (0,-0.01))
                cnt = 0
                while cnt < 5:
                    url = 'http://api.geonames.org/countrySubdivision?username=galdor&lat=%s&lng=%s&lang=en' % (lat+d[cnt][0], lng+d[cnt][1])
                    print
                    print cach.pid, url
                    yplib.get(url)
                    try:
                        soup = yplib.soup()
                    except:
                        url = 'http://api.geonames.org/countrySubdivision?username=galdor&lat=%s&lng=%s&lang=en' % (lat+d[cnt][0], lng+d[cnt][1])
                        yplib.get(url)
                        try:
                            soup = yplib.soup()
                        except:
                            soup = None
                    if soup:
                        item = soup.find('countrysubdivision')
                        if item:
                            break
                    cnt += 1
                
                if soup is None:
                    print cach.pid, lat, lng, cach.loc_NS, cach.loc_NS_degree, cach.loc_NS_minute, cach.loc_EW, cach.loc_EW_degree, cach.loc_EW_minute
                    continue
                item = soup.find('countrycode')
                if item and item.text:
                    cach.country_code = item.text.encode('utf8')
                
                if soup.admincode1 and soup.admincode1.text:
                    cach.admin_code = soup.admincode1.text
                item = soup.find('code', {'type':'FIPS10-4'})
                if item:
                    cach.code_fips10_4 = item.text
                item = soup.find('code', {'type':'ISO3166-2'})
                if item:
                    cach.code_iso3166_2 = item.text
                item = soup.find('countryname')
                if item:
                    cach.country_name = item.text.encode('cp1251')
                if soup.adminname1:
                    cach.oblast_name = soup.adminname1.text.encode('cp1251')
                #print cach.pid, cach.country_name, cach.country_code, cach.oblast_name          
                #print soup
                #print
                #print cach.pid
                if cach.country_code and len(cach.country_code) == 2:
                    cach.save()
            else:
                print cach.pid, lat, lng, cach.loc_NS, cach.loc_NS_degree, cach.loc_NS_minute, cach.loc_EW, cach.loc_EW_degree, cach.loc_EW_minute

    count_without_country = Cach.objects.filter(country_code__isnull=True).count()
    count_without_subject = Cach.objects.filter(admin_code__isnull=True).count()
    print '%s have no country' % count_without_country
    print '%s have no country subject' % count_without_subject
    
    sql = "UPDATE cach SET admin_code='777', oblast_name='undefined subject' WHERE country_code IS NOT NULL AND admin_code IS NULL"
    r = execute_query(sql)
    sql = """SELECT COUNT(*) FROM cach WHERE country_code IS NULL"""
    undefined_country_count = sql2val(sql)
    sql = """SELECT COUNT(*) FROM cach WHERE admin_code IS NULL OR admin_code = '777'"""
    undefined_subject_count = sql2val(sql)
    undefined_count = '%s/%s' % (undefined_country_count, undefined_subject_count)
    
    elapsed = time() - start
    print "Elapsed time -->", elapsed
    #switch_on_status_updated()
    log('gcsu_location', 'OK %s'%undefined_count)

Ejemplo n.º 16

Mostrar archivo

Archivo: update_gcsu_geocachers.py Proyecto: kostin-aleks/gpsfun

def add_geocacher(pid):
    url = 'http://www.geocaching.su/profile.php?pid=%s' % pid
    print url
    try:
        yplib.get(url)
    except BrowserStateError:
        pass

    try:
        soup = yplib.soup()
    except UnicodeDecodeError:
        print 'exception, pid=%s' % pid
        return
    tbl = soup.find('table', {'class': 'pages'})
    rows = tbl.findAll('tr')
    all_cells = []
    for row in rows:
        cells = row.findAll('th')
        for cell in cells:
            all_cells.append(cell.text.encode('utf8'))

    user = Cacher()
    user.pid = pid
    user.uid = get_uid(tbl)
    user.nickname = text_or_none(all_cells[1])
    if user.nickname:
        print user.nickname
    if user.nickname:
        user.nickname = user.nickname[:64]
    user.name = text_or_none(all_cells[2])
    user.birstday = strdate_or_none(all_cells[3])
    user.sex = sex_or_none(all_cells[4])
    user.country = text_or_none(all_cells[5])
    user.oblast = text_or_none(all_cells[6])
    user.town = text_or_none(all_cells[7])
    user.phone = text_or_none(all_cells[9])
    user.icq = text_or_none(all_cells[10])
    if user.icq and not user.icq.isdigit():
        user.icq = None
    user.web = text_or_none(all_cells[11])
    if user.web:
        user.web = user.web[:128]
    user.gps = None  #gps[:255].encode if gps else None
    user.created_caches = int_or_none(all_cells[18])
    user.found_caches = int_or_none(all_cells[19])
    user.photo_albums = int_or_none(all_cells[21])
    if len(all_cells) > 23:
        user.register_date = date_or_none(all_cells[-3])
        if user.register_date is None:
            user.register_date = date_or_none(all_cells[-2])
        user.last_login = date_or_none(all_cells[-2])
        user.forum_posts = int_or_none(all_cells[-1])

    geocacher = Geocacher.objects.create(pid=pid)
    geocacher.__dict__.update(user.__dict__)
    print 'save', geocacher.pid
    if user.web:
        print user.web
    geocacher.save()

    return True

Ejemplo n.º 17

Mostrar archivo

def main():
    #if not switch_off_status_updated():
    #return False

    LOAD_CREATED_CACHE_LOGS = True
    LOAD_SEEK_CACHE_LOGS = True
    LOAD_RECOMMEND_CACHE_LOGS = True
    LOAD_PHOTOALBUM_LOGS = True

    start = time()

    yplib.setUp()
    yplib.set_debugging(False)

    r = yplib.post2('http://www.geocaching.su/?pn=108',
                    (('Log_In', 'Log_In'), ('email', '*****@*****.**'),
                     ('passwd', 'zaebalixakeryvas'), ('longterm', '1')))

    soup = yplib.soup()

    a = soup.find('a', attrs={'class': "profilelink"}, text='galdor')
    if not a:
        print 'Authorization failed'
        return False
    print
    print 'BEGIN'
    fh = open('cant_open_user_profile.txt', 'w')
    if LOAD_CREATED_CACHE_LOGS:
        LogCreateCach.objects.all().update(updated=False)
        print 'updating of creating logs'
        cachers = Geocacher.objects.all().values_list('pid', 'uid')
        t = re.compile('\?pn\=101\&cid=(\d+)')
        t1 = re.compile(u'создан\s+(\d\d\.\d\d\.\d\d\d\d)')
        for cacher in cachers:
            if cacher[1]:
                url = 'http://www.geocaching.su/site/popup/userstat.php?s=1&uid=%s' % cacher[
                    1]
                try:
                    yplib.get(url)
                except BrowserStateError:
                    log_error(fh, cacher[1], 'bse')
                    continue
                soup = yplib.soup()
                tbl = soup.find('table', attrs={'class': 'pages'})
                if tbl:
                    rows = tbl.findAll('tr')
                    for row in rows:
                        cach_pid = created_date = None
                        coauthor = False
                        cell = row.find('td')
                        if cell:
                            a_list = cell.findAll('a')
                            for a in a_list:
                                cach_pid = None
                                parts = t.findall(a['href'])
                                if len(parts):
                                    cach_pid = int(parts[0])

                            txt = cell.text
                            if u'(соавтор)' in txt:
                                coauthor = True
                            found = t1.findall(txt)
                            if found:
                                created_date = found[0]
                                created_date = date_or_none(created_date)
                            if cach_pid:
                                print cacher[0], cach_pid, txt.encode('utf8')
                                the_log, created = LogCreateCach.objects.\
                                    get_or_create(
                                        author_pid=cacher[0],
                                        cach_pid=cach_pid)
                                the_log.created_date = created_date
                                the_log.coauthor = coauthor
                                the_log.updated = True
                                the_log.save()
                else:
                    log_error(fh, cacher[1], 'npc')
        LogCreateCach.objects.filter(updated=False).delete()

    if LOAD_SEEK_CACHE_LOGS:
        LogSeekCach.objects.all().update(updated=False)
        cachers = Geocacher.objects.all().values_list(
            'pid', 'uid')  #.filter(pid=18849)
        t = re.compile('\?pn\=101\&cid=(\d+)')
        t1 = re.compile(u'создан\s+(\d\d\.\d\d\.\d\d\d\d)')
        t2 = re.compile(u'найден\s+(\d\d\.\d\d\.\d\d\d\d)')
        t3 = re.compile(u'оценен\s+на\s+(\d)')

        for cacher in cachers:
            if cacher[1]:
                url = 'http://www.geocaching.su/site/popup/userstat.php?s=2&uid=%s' % cacher[
                    1]
                try:
                    yplib.get(url)
                    soup = yplib.soup()
                except BrowserStateError:
                    log_error(fh, cacher[1], 'bse')
                    continue

                tbl = soup.find('table', attrs={'class': 'pages'})
                if tbl:
                    rows = tbl.findAll('tr')
                    for row in rows:
                        cach_pid = found_date = grade = None
                        cell = row.find('td')
                        if cell:
                            a_list = cell.findAll('a')
                            for a in a_list:
                                cach_pid = None
                                parts = t.findall(a['href'])
                                if len(parts):
                                    cach_pid = int(parts[0])

                            txt = cell.text
                            found = t3.findall(txt)
                            if found:
                                g = found[0]
                                grade = int_or_none(g)
                            found = t2.findall(txt)
                            if found:
                                found_date = found[0]
                                found_date = date_or_none(found_date)
                            if cach_pid:
                                print cacher[0], cach_pid, txt.encode('utf8')
                                the_log, created = LogSeekCach.objects.\
                                    get_or_create(
                                        cacher_pid=cacher[0],
                                        cach_pid=cach_pid, )
                                the_log.found_date = found_date
                                the_log.grade = grade
                                the_log.updated = True
                                the_log.save()
                else:
                    log_error(fh, cacher[1], 'npf')
        LogSeekCach.objects.filter(updated=False).delete()

    if LOAD_RECOMMEND_CACHE_LOGS:
        LogRecommendCach.objects.all().update(updated=False)
        cachers = Geocacher.objects.all().values_list('pid', 'uid')
        t = re.compile('\?pn\=101\&cid=(\d+)')
        for cacher in cachers:
            if cacher[1]:
                url = 'http://www.geocaching.su/site/popup/userstat.php?s=3&uid=%s' % cacher[
                    1]
                try:
                    yplib.get(url)
                    soup = yplib.soup()
                except BrowserStateError:
                    log_error(fh, cacher[1], 'bse')
                    continue
                tbl = soup.find('table', attrs={'class': 'pages'})
                if tbl:
                    rows = tbl.findAll('tr')
                    for row in rows:
                        cach_pid = found_date = grade = None
                        cell = row.find('td')
                        if cell:
                            a_list = cell.findAll('a')
                            for a in a_list:
                                cach_pid = None
                                parts = t.findall(a['href'])
                                if len(parts):
                                    cach_pid = int(parts[0])

                            txt = cell.text
                            if cach_pid:
                                print cacher[0], cach_pid, txt.encode('utf8')
                                the_log, created = LogRecommendCach.\
                                    objects.get_or_create(
                                        cacher_pid=cacher[0],
                                        cach_pid=cach_pid)
                                the_log.updated = True
                                the_log.save()
                else:
                    log_error(fh, cacher[1], 'npr')
        LogRecommendCach.objects.filter(updated=False).delete()

    if LOAD_PHOTOALBUM_LOGS:
        LogPhotoAlbum.objects.all().update(updated=False)
        cachers = Geocacher.objects.all().values_list('pid', 'uid')
        t = re.compile('showmemphotos\.php\?cid=(\d+)')

        for cacher in cachers:
            if cacher[1]:
                url = 'http://www.geocaching.su/site/popup/userstat.php?s=4&uid=%s' % cacher[
                    1]
                try:
                    yplib.get(url)
                    soup = yplib.soup()
                except BrowserStateError:
                    log_error(fh, cacher[1], 'bse')
                    continue
                tbl = soup.find('table', attrs={'class': 'pages'})
                if tbl:
                    rows = tbl.findAll('tr')
                    for row in rows:
                        cach_pid = found_date = grade = None
                        cell = row.find('td')
                        if cell:
                            a_list = cell.findAll('a')
                            for a in a_list:
                                cach_pid = None
                                parts = t.findall(a['href'])
                                if len(parts):
                                    cach_pid = int(parts[0])

                            txt = cell.text
                            if cach_pid:
                                print cacher[0], cach_pid, txt.encode('utf8')
                                the_log, created = LogPhotoAlbum.\
                                    objects.get_or_create(
                                        cacher_pid=cacher[0],
                                        cach_pid=cach_pid)
                                the_log.updated = True
                                the_log.save()
                else:
                    log_error(fh, cacher[1], 'npp')
        LogPhotoAlbum.objects.filter(updated=False).delete()

    elapsed = time() - start
    print "Elapsed time -->", elapsed
    #switch_on_status_updated()
    log('gcsu_logs', 'OK')
    fh.close()