def runf(self, url, debug=False):
    """
    job is given, task_done will be called, but still need to handle various locks
    """
    try:
        for np in range(1, 500):
            pgurl = url + '&page=%d' % np

            try:
                page = urlutil.getpage_unicode(pgurl)
                res = myparser.parse_appstore_catpage_applink_fetcher(page)
                with self.state.resultlist_lock:
                    self.state.resultlist += res
                if len(res) < 50:
                    break
            except Exception as e:
                redprint('broken at page %d, excep msg is %s' % (np, str(e)))
                break

    except config.IDError as e:
        print("what is this id error ?")
        print(str(e))
        add_to_faillist(id)

    except IndexError as e:
        yellowprint('index error happends (app no dev?), id is %s' % id)
        add_to_faillist(id)
        self.state.faillist.append(id)
Esempio n. 2
0
    with open('ios-home-res.info') as f:
        li = f.readlines()
    urlli = [line.strip().split()[1] for line in li]

    con = []
    for i, caturl in enumerate(urlli):
        blueprint('done with cat %d' % i)
        for i, lturl in enumerate(makeurls_with_letters(caturl)):
            greenprint('done with letter %d' % i)
            yellowprint('collected ids %d' % len(con))
            for np in range(1, 500):
                print('processing page %d' % np)
                pgurl = lturl + '&page=%d' % np

                try:
                    page = urlutil.getpage_unicode(pgurl)
                    res = parse_appstore_catpage_applink_fetcher(page)
                    con += res
                    if len(res) < 50:
                        break
                except Exception as e:
                    redprint('broken at page %d, excep msg is %s' % (np, str(e)))
                    break

    with open('appstore_linkres.pickle', 'wb') as f:
        pickle.dump(con, f)

if __name__ == '__main__':
    page = urlutil.getpage_unicode('https://itunes.apple.com/us/app/kfz-abkurzungen/id463803338?mt=8')
    res = myparser.parse_appstore_apppage(page)