def check_proxy(p):
    try:
        fetch('http://baidu.com', proxy=p['address'])
    except RequestException:
        p.delete()
        return False
    return True
Example #2
0
def home():
    header = "Summarize WordPress Blog"
    form = NameForm()
    site_url = request.args.get('url')
    base_url = request.base_url

    if request.method == 'GET' and site_url != None:
        number_of_pages = request.args.get('pages')
        if number_of_pages != None:
            try:
                number_of_pages = int(number_of_pages)
            except:
                number_of_pages = 1
            form.number_of_pages.data = number_of_pages
        form.name.data = site_url
        lines = fetch(site_url, number_of_pages)
        query_url = get_query_url(base_url, site_url, number_of_pages)
        return render_template('search.html', pairs=lines, the_title=header, form=form, query_url=query_url)

    elif request.method == 'POST' and form.validate_on_submit():
        site_url = form.name.data
        number_of_pages = form.number_of_pages.data
        if number_of_pages is None:
            number_of_pages = DEFAULT_MAX_PAGE
        lines = fetch(site_url, number_of_pages)
        query_url = get_query_url(base_url, site_url, number_of_pages)
        return render_template('search.html', pairs=lines, the_title=header, form=form, query_url=query_url)

    return render_template('search.html', the_title=header, form=form)
Example #3
0
    async def main(future):
        async with aiohttp.ClientSession() as session:

            # get location data from api
            fetch_locations = [fetch(session, url) for url in location_queries]
            location_data = await asyncio.gather(*fetch_locations)

            # get weather data based on woeids
            weather_queries = []
            for row in location_data:
                woeid = str(row[0]['woeid'])
                weather_query = f'https://www.metaweather.com/api/location/{woeid}/'
                weather_queries.append(weather_query)

            fetch_weathers = [fetch(session, url) for url in weather_queries]
            weather_data = await asyncio.gather(*fetch_weathers)

            # create json object from locations and weather data
            results = {}
            for location, city in zip(locations, weather_data):
                results[location] = []
                weather = city['consolidated_weather']
                for day in weather:
                    date = day['applicable_date']
                    temp = day['the_temp']
                    description = day['weather_state_name']
                    forecast = Forecast(date, temp, description)
                    results[location].append(forecast)

            return results
Example #4
0
def check_proxy(p):
    try:
        fetch('http://baidu.com', proxy=p['address'])
    except RequestException:
        p.delete()
        return False
    return True
Example #5
0
def parse_source(source, idx, header):
    """
    Import data from a single source based on the data type.
    """
    path = '{}/{}'.format(config.workspace_dir, idx)
    if not os.path.exists(path):
        os.makedirs(path)

    cache_url = source[header.index('cache')]
    cache_filename = re.search('/[^/]*$', cache_url).group()
    fetch(cache_url, path + cache_filename)

    files = rlistdir(path)
    for f in files:
        if re.match('.*\.(zip|obj|exe)$', f):  # some files had mislabelled ext
            unzip(f, path)

    shapes = []
    files = rlistdir(path)
    for f in files:
        if re.match('.*\.({})$'.format('|'.join(config.fiona_extensions)), f):
            objs = import_with_fiona(f, source[0])
            for obj in objs:
                shapes.append(obj)
        elif re.match('.*\.csv$', f):
            objs = import_csv(f, source[0])
            for obj in objs:
                shapes.append(obj)

    shutil.rmtree(path)

    if not shapes:
        _L.warning('failed to parse source. did not find shapes. files in archive: {}'.format(files))

    return shapes
Example #6
0
def main():
    argvs = sys.argv
    if len(argvs) != 4:
        print(
            'usage:\n    delete_all_your_posts_in_direct_message.py <Slack Web API token> <Your Slack name> <Target user name>\n'
        )
        exit()

    end_point = 'https://slack.com/api/'
    token, your_name, target_user_name = argvs[1:]
    token = '?token=' + token

    # fetch users.list
    users_list = utils.fetch(end_point + 'users.list' + token)
    your_id = [
        member['id'] for member in users_list['members']
        if member.get('name') == your_name
    ][0]
    target_user_id = [
        member['id'] for member in users_list['members']
        if member.get('name') == target_user_name
    ][0]
    print('your_id: ' + your_id)
    print('target_user_id: ' + target_user_id)

    # fetch im.list
    im_list = utils.fetch(end_point + 'im.list' + token)
    target_im_id = [
        im['id'] for im in im_list['ims'] if im.get('user') == target_user_id
    ][0]
    print('target_im_id: ' + target_im_id)

    # fetch im.history
    im_history = utils.fetch_all_history(end_point + 'im.history' + token +
                                         '&channel=' + target_im_id +
                                         '&count=1000')
    your_posts_list = [
        message for message in im_history
        if message.get('user') == your_id and message.get('subtype', '') == ''
    ]

    # show your posts
    for message in your_posts_list:
        print(message['text'].replace('\n', ''), message['ts'])

    # chat.delete
    print('------------------------------')
    print('{0} 件削除します。よろしいですか?'.format(len(your_posts_list)))
    ans = utils.prompt()

    if ans == 'y' or ans == 'Y':
        for message in your_posts_list:
            print(message['text'].replace('\n', ''), message['ts'])
            delete_status = utils.fetch(end_point + 'chat.delete' + token +
                                        '&ts=' + message['ts'] + '&channel=' +
                                        target_im_id)
            print(delete_status)

        print('complete!!')
Example #7
0
def install():
    fetch("http://ftp.gnome.org/pub/gnome/sources/json-glib/0.16/json-glib-%(json-glib)s.tar.xz")
    extract("json-glib-%(json-glib)s.tar.xz")
    configure(
        "json-glib-%(json-glib)s", ["--prefix=%s" % env.prefix, "--disable-gcov", "--disable-introspection", "CC=clang"]
    )
    make("json-glib-%(json-glib)s")
    make("json-glib-%(json-glib)s", "install")
Example #8
0
def main():
    argvs = sys.argv
    if len(argvs) != 4:
        print(
            'usage:\n    delete_all_your_files_in_direct_message.py <Slack Web API token> <Your Slack name> <Target user name>\n'
        )
        exit()

    end_point = 'https://slack.com/api/'
    token, your_name, target_user_name = argvs[1:]
    token = '?token=' + token

    # fetch users.list
    users_list = utils.fetch(end_point + 'users.list' + token)
    your_id = [
        member['id'] for member in users_list['members']
        if member.get('name') == your_name
    ][0]
    target_user_id = [
        member['id'] for member in users_list['members']
        if member.get('name') == target_user_name
    ][0]
    print('your_id: ' + your_id)
    print('target_user_id: ' + target_user_id)

    # fetch im.list
    im_list = utils.fetch(end_point + 'im.list' + token)
    target_im_id = [
        im['id'] for im in im_list['ims'] if im.get('user') == target_user_id
    ][0]
    print('target_im_id: ' + target_im_id)

    # fetch files.list
    your_files_list = utils.fetch_all_files(end_point + 'files.list' + token +
                                            '&user='******'ims')
    ]

    # show your files
    for f in target_ims_your_files_list:
        print(f['id'], f['url_private'])

    # files.delete
    print('------------------------------')
    print('{0} 件削除します。よろしいですか?'.format(len(target_ims_your_files_list)))
    ans = utils.prompt()

    if ans == 'y' or ans == 'Y':
        for f in target_ims_your_files_list:
            print(f['id'], f['url_private'])
            delete_status = utils.fetch(end_point + 'files.delete' + token +
                                        '&file=' + f['id'])
            print(delete_status)

        print('complete!!')
Example #9
0
def crawl(url: str = base_url):
    '''
        製品の一覧を取得する。
    '''
    data = utils.fetch(url)
    data_list = [
        data,
    ]
    soup = bs(data, 'lxml')

    # ページの一覧を順番に読み込む
    page_list = list(
        set([
            page['href']
            for page in soup.find('p', class_='pagelink').find_all('a')
        ]))
    for page in page_list:
        d = utils.fetch(page)
        if d:
            data_list.append(d)

    # item list
    output = []
    for d in data_list:
        soup = bs(d, 'lxml')
        for item in soup.find('div', class_='item_list').ul.find_all('li'):
            item = item.div.span
            # JSON出力用
            o = {'manufacture': 'Akner'}

            # 製品名、製品画像
            o['name'] = item.a.img['alt'].replace('Anker ', '')
            o['image'] = item.a.img['src']
            o['url'] = item.a['href']
            o['price'] = int(
                re.sub(r'\D', '',
                       item.find('p', class_='price').string))

            # 製品名から容量を推測(13400)
            m = re.search(r'[1-9][0-9]*00', o['name'])
            if m:
                o['capacity'] = int(m.group(0))

            # USB PD最大出力の推測
            m = re.search(r'([1-9][0-9]+)W', o['name'])
            if m:
                o['pd_w'] = int(m.group(1))

            # 詳細
            o['detail'] = crawl_detail(o['url'])

            output.append(o)

    return output
Example #10
0
def crawl(url: str = base_url):
    '''
        製品の一覧を取得する。
    '''
    data = utils.fetch(url)
    data_list = [
        data,
    ]
    soup = bs(data, 'lxml')

    # ページの一覧を順番に読み込む
    page_list = list(
        set([
            a['href']
            for a in soup.find('nav', class_='pagination').ul.find_all('a')
        ]))
    for page in page_list:
        d = utils.fetch(page)
        if d:
            data_list.append(d)

    # item list
    output = []
    for d in data_list:
        soup = bs(d, 'lxml')
        for item in soup.find_all('section'):
            if not item.find('div', class_='_entry-inner') or not item.find(
                    'div', class_='_entry-image'):
                continue

            o = {'manufacture': 'cheero'}

            o['name'] = item.find('h2').string.strip()
            o['url'] = item.a['href']
            o['image'] = item.find('img')['data-src']
            o['price'] = int(
                re.sub(r'\D', '',
                       item.find('p', class_='price').span.string))
            o['detail'] = crawl_detail(item.a['href'])

            # 製品名から容量を推測
            m = re.search(r'([1-9][0-9]*00)mAh', o['name'])
            if m:
                o['capacity'] = int(m.group(1))

            # USB PD最大出力の推測
            m = re.search(r'([1-9][0-9]+)W$', o['name'])
            if m:
                o['pd_w'] = int(m.group(1))

            output.append(o)

    return output
Example #11
0
def install():
    fetch('http://www.pell.portland.or.us/~orc/Code/discount/discount-%(discount)s.tar.bz2')
    extract('discount-%(discount)s.tar.bz2')
    configure('discount-%(discount)s', ['--prefix=%s' % env.prefix,
                                        '--libdir=%s/lib' % env.prefix,
                                        '--mandir=%s/man' % env.prefix,
                                        '--shared',
                                        '--enable-all-features'],
              'configure.sh')
    run('sed -i .bkp -e "/ldconfig/d" %s/%s/librarian.sh' %
        (env.build, 'discount-%(discount)s' % env.versions))
    make('discount-%(discount)s')
    make('discount-%(discount)s', 'install')
def fetch_dict(url, dest):
    assert url.endswith(".tar.bz2"), url
    filename = os.path.basename(url)
    utils.fetch(url, os.path.join(DICT_DIR, filename))
    utils.run(["tar", "xjvf", filename], cwd=DICT_DIR)
    name = filename[: -len(".tar.bz2")]
    path = os.path.join(DICT_DIR, name)
    utils.run(["./configure", "--vars", "DESTDIR=tmp"], cwd=path)
    utils.run(["make"], cwd=path)
    utils.run(["make", "install"], cwd=path)
    result_dir = os.path.join(path, "tmp/usr/lib/aspell")
    utils.ensure_path(dest)
    for dict_file in os.listdir(result_dir):
        shutil.copy2(os.path.join(result_dir, dict_file), os.path.join(dest, dict_file))
def fetch_dict(url, dest):
    assert url.endswith('.tar.bz2'), url
    filename = os.path.basename(url)
    utils.fetch(url, os.path.join(DICT_DIR, filename))
    utils.run(["tar", "xjvf", filename], cwd=DICT_DIR)
    name = filename[:-len('.tar.bz2')]
    path = os.path.join(DICT_DIR, name)
    utils.run(["./configure", "--vars", "DESTDIR=tmp"], cwd=path)
    utils.run(["make"], cwd=path)
    utils.run(["make", "install"], cwd=path)
    result_dir = os.path.join(path, 'tmp/usr/lib/aspell')
    utils.ensure_path(dest)
    for dict_file in os.listdir(result_dir):
        shutil.copy2(os.path.join(result_dir, dict_file), os.path.join(dest, dict_file))
Example #14
0
    def _run(self, isbn):
        keywords = '+'.join(['details', 'texts', 'authors', 'subjects'])
        url = 'http://isbndb.com/api/books.xml?access_key=%s&index1=isbn&value1=%s&results=%s' % (ISBNDB_ACCESS_KEY, isbn, keywords)
        soup = BeautifulSoup(fetch(url))

        try:
            result = dict()
            result['title'] = soup.find('titlelong').string or soup.find('title').string
            result['author'] = soup.find('authorstext').string
            bookdata = soup.find('bookdata').attrs
            result['isbn'] = bookdata[2][1]
            result['source'] = 'http://isbndb.com/d/book/%s.html' % bookdata[0][1]
            # publisher info
            pubs = soup.find('publishertext').string
            reg = rx_publisher.search(pubs)
            if reg:
                if len(reg.groups()) == 1:
                    result['publisher'] = reg.group(1)
                elif len(reg.groups()) == 4:
                    result['publisher'] = reg.group(2).strip()
                    result['date'] = reg.group(4).strip()
            # date also can be found in details->edition_info->date
            if not 'date' in result:
                details = soup.find('details').attrs
                for e in details:
                    if u'edition_info' == e[0]:
                        reg = rx_edition.search(e[1])
                        if reg: result['date'] = reg.group(1)
            return result
        except:
            return None
Example #15
0
    def _run(self, isbn):
        url = 'http://books.iqbuy.ru/categories_offer/%s' % (isbn)
        data = rx_data.sub('', fetch(url).decode('cp1251'))
        soup = BeautifulSoup(data)

        try:
            result = dict()
            result['title'] = soup.find('h2', {'class': 'book-name'}).string
            authors = soup.find('p', {'class': 'book-author'})
            # author is optional
            if authors.strong.string:
                result['author'] = authors.strong.string.replace('  ', ' ')
            # series is optional
            series = authors.findNext('p')
            reg = rx_series.search(unicode(series))
            if reg:
                result['series'] = series.strong.string
                publisher = series.findNext('p')
            else:
                publisher = series
            # continue with publisher
            result['publisher'] = publisher.strong.string.replace('  ', ' ').strip()
            reg = rx_publisher.search(str(publisher))
            if reg: result['date'] = reg.group(1)
            result['source'] = url
            result['isbn'] = isbn
            result['photo'] = soup.find('td', {'class': 'book-image'}).p.img['src']
            return result
        except:
            return None
def fetch_issue(issue_key):
    '''There is a Python api for jira: pip install jira
    but we wanted to avoid dependencies. and it's simple.'''
    query = URL + 'rest/api/2/issue/' + issue_key
    query += '?fields=' + ','.join(f for f, _rep in FIELDS)
    raw_issue = utils.fetch(query)
    return json.loads(raw_issue)
Example #17
0
def main():
    argvs = sys.argv
    if len(argvs) != 2:
        print('usage:\n    delete_all_old_files.py <Slack Web API token>\n')
        exit()

    end_point = 'https://slack.com/api/'
    token = argvs[1]
    token = '?token=' + token

    # fetch files.list
    last_month_timestamp = (datetime.now() +
                            timedelta(days=-30)).strftime('%s')
    files_list = utils.fetch_all_files(end_point + 'files.list' + token +
                                       '&ts_to=' + last_month_timestamp)

    # show your files
    for f in files_list:
        print(f['id'], f['url_private'])

    # files.delete
    print('------------------------------')
    print('{0} 件削除します。よろしいですか?'.format(len(files_list)))
    ans = utils.prompt()

    if ans == 'y' or ans == 'Y':
        for f in files_list:
            print(f['id'], f['url_private'])
            delete_status = utils.fetch(end_point + 'files.delete' + token +
                                        '&file=' + f['id'])
            print(delete_status)

        print('complete!!')
Example #18
0
def save_search_result(p, queue, retry=0):
    proxy = Proxy.get_random()['address']
    url = SEARCH_URL.format(SEARCH_TEXT, p)

    try:
        r = fetch(url, proxy=proxy)
    except (Timeout, ConnectionError):
        sleep(0.1)
        retry += 1
        if retry > 5:
            queue.put(url)
            raise GreenletExit()
        try:
            p = Proxy.objects.get(address=proxy)
            if p:
                p.delete()
        except DoesNotExist:
            pass

        return save_search_result(url, queue, retry)
    soup = BeautifulSoup(r.text, 'lxml')
    results = soup.find(class_='results')
    if results is None:
        # 此代理已经被封, 换其他的代理
        sleep(0.1)
        retry += 1
        if retry > 5:
            queue.put(url)
            raise GreenletExit()
        return save_search_result(url, queue, retry)
    articles = results.find_all('div', lambda x: 'wx-rb' in x)
    for article in articles:
        save_article(article)
Example #19
0
    def episode_menu():
        et_tz = pytz.timezone('US/Eastern')
        date_et = common.get_date() if vars.params.get('custom_date', False) else utils.tznow(et_tz).date()

        # Avoid possible caching by using query string
        epg_url = 'https://nlnbamdnyc-a.akamaihd.net/fs/nba/feeds/epg/%d/%d_%d.js?t=%d' % (
            date_et.year, date_et.month, date_et.day, time.time())
        response = utils.fetch(epg_url)
        g_epg = json.loads(response[response.find('['):])

        for epg_item in g_epg:
            entry = epg_item['entry']

            start_et_hours, start_et_minutes = map(int, entry['start'].split(':'))
            duration_hours, duration_minutes = map(int, entry['duration'].split(':'))

            dt_et = et_tz.localize(datetime.datetime(date_et.year, date_et.month, date_et.day, start_et_hours, start_et_minutes))
            dt_utc = dt_et.astimezone(pytz.utc)

            start_timestamp = int((dt_utc - datetime.datetime(1970, 1, 1, tzinfo=pytz.utc)).total_seconds()) * 1000  # in milliseconds
            duration = (duration_hours * 60 + duration_minutes) * 60 * 1000  # in milliseconds

            params = {
                'start_timestamp': start_timestamp,
                'duration': duration,
            }
            utils.log(params, xbmc.LOGDEBUG)

            name = '%s %s: %s' % (
                entry['start'], dt_et.tzname(), entry['showTitle'] if entry['showTitle'] else entry['title'])
            common.addListItem(name, '', 'nba_tv_play_episode', iconimage=entry['image'], customparams=params)
Example #20
0
def fetchams(ref):
    try:
        dj = json.load(fetch("%s/dossier/%s?format=json" % (settings.PARLTRACK_URL,ref)))
    except:
        raise ValueError
    dossier,_ = Dossier.objects.get_or_create(id=ref,
                                              title=dj['procedure']['title'],
                                              _date=datetime.datetime.now())
    committees={}
    for am in dj['amendments']:
        for committee in am['committee']:
            date=am['date'].split('T')[0]
            id=committee_map.get(committee,committee)
            if not (id,date) in committees:
                c,_=Committee.objects.get_or_create(dossier=dossier,
                                                          title=committee,
                                                          cid=id,
                                                          src = am['src'],
                                                          date=date)
                committees[(id, date)]=c
            else:
                c=committees[(id,date)]
            a,_=Amendment.objects.get_or_create(seq = am['seq'],
                                                dossier = dossier,
                                                committee = c,
                                                lang = am['orig_lang'],
                                                authors = am['authors'],
                                                new = '\n'.join(am.get('new',[])),
                                                old = '\n'.join(am.get('old',[])),
                                                type = am['location'][0][0],
                                                location = am['location'][0][1])
    return dossier
Example #21
0
def save_search_result(p, queue, retry=0):
    proxy = Proxy.get_random()['address']
    url = SEARCH_URL.format(SEARCH_TEXT, p)

    try:
        r = fetch(url, proxy=proxy)
    except (Timeout, ConnectionError):
        sleep(0.1)
        retry += 1
        if retry > 5:
            queue.put(url)
            raise GreenletExit()
        try:
            p = Proxy.objects.get(address=proxy)
            if p:
                p.delete()
        except DoesNotExist:
            pass

        return save_search_result(url, queue, retry)
    soup = BeautifulSoup(r.text, 'lxml')
    results = soup.find(class_='results')
    if results is None:
        # 此代理已经被封, 换其他的代理
        sleep(0.1)
        retry += 1
        if retry > 5:
            queue.put(url)
            raise GreenletExit()
        return save_search_result(url, queue, retry)
    articles = results.find_all(
        'div', lambda x: 'wx-rb' in x)
    for article in articles:
        save_article(article)
Example #22
0
def crawl_detail(url):
    '''
        製品の詳細を取得する。
    '''
    data = utils.fetch(url)
    soup = bs(data, 'lxml')

    details = {}

    # Amazonへのリンク
    # cheeroはJavaScriptでの埋め込みなのでBeautifulSoupが使えない
    pat = re.compile(r'https?://amzn\.to/[0-9a-zA-Z]+')
    amazon = list(set([s for s in pat.findall(data)]))
    for a in amazon:
        details['amazon'] = a

    # 仕様
    for h3 in soup.find_all('h3'):
        if h3 and h3.string != 'SPEC':
            continue

        for row in h3.find_next_sibling().find_all('div', class_='table_elem'):
            cols = row.find_all('div')
            if len(cols) != 2:
                continue

            details[cols[0].string] = cols[1].text

    return details
Example #23
0
def check_proxy(p):
    try:
        res = fetch(
            'http://weixin.sogou.com/weixin?query=python&type=2&page=1',
            proxy=p['address'])
        if len(res.text) < 10000:
            p.delete()
    except RequestException:
        p.delete()
Example #24
0
def crawl(url: str = base_url):
    '''
        製品の一覧を取得する。
    '''
    data = utils.fetch(url)
    data_list = [
        data,
    ]
    soup = bs(data, 'lxml')

    # ページの一覧を順番に読み込む
    page_list = list(
        set([
            a['href'] for a in soup.find('ul', class_='page-numbers').find_all(
                'a', class_='page-numbers')
        ]))
    for page in page_list:
        d = utils.fetch(page)
        if d:
            data_list.append(d)

    # item list
    output = []
    for d in data_list:
        soup = bs(d, 'lxml')
        for item in soup.find('ul', class_='products').find_all('li'):
            if not item.find('div', class_='product_details'):
                continue

            o = {'manufacture': 'RAVPower'}
            o['name'] = item.find('h3').string.strip()
            o['url'] = item.find('a', class_='product_item_link')['href']
            o['image'] = item.find('img')['src']
            o['detail'] = crawl_detail(item.a['href'])

            # 製品名から容量を推測
            m = re.search(r'([1-9][0-9]*00)mAh', o['name'])
            if m:
                o['capacity'] = int(m.group(1))

            output.append(o)

    return output
def main():
    argvs = sys.argv
    if len(argvs) != 4:
        print('usage:\n    delete_all_your_files_in_direct_message.py <Slack Web API token> <Your Slack name> <Target user name>\n')
        exit()

    end_point = 'https://slack.com/api/'
    token, your_name, target_user_name = argvs[1:]
    token = '?token=' + token

    # fetch users.list
    users_list = utils.fetch(end_point + 'users.list' + token)
    your_id = [member['id'] for member in users_list['members'] if member.get('name') == your_name][0]
    target_user_id = [member['id'] for member in users_list['members'] if member.get('name') == target_user_name][0]
    print('your_id: ' + your_id)
    print('target_user_id: ' + target_user_id)

    # fetch im.list
    im_list = utils.fetch(end_point + 'im.list' + token)
    target_im_id = [im['id'] for im in im_list['ims'] if im.get('user') == target_user_id][0]
    print('target_im_id: ' + target_im_id)

    # fetch files.list
    your_files_list = utils.fetch_all_files(end_point + 'files.list' + token + '&user='******'ims')]

    # show your files
    for f in target_ims_your_files_list:
        print(f['id'], f['url_private'])

    # files.delete
    print('------------------------------')
    print('{0} 件削除します。よろしいですか?'.format(len(target_ims_your_files_list)))
    ans = utils.prompt()

    if ans == 'y' or ans == 'Y':
        for f in target_ims_your_files_list:
            print(f['id'], f['url_private'])
            delete_status = utils.fetch(end_point + 'files.delete' + token + '&file=' + f['id'])
            print(delete_status)

        print('complete!!')
def main():
    argvs = sys.argv
    if len(argvs) != 4:
        print('usage:\n    delete_all_your_posts_in_direct_message.py <Slack Web API token> <Your Slack name> <Target user name>\n')
        exit()

    end_point = 'https://slack.com/api/'
    token, your_name, target_user_name = argvs[1:]
    token = '?token=' + token

    # fetch users.list
    users_list = utils.fetch(end_point + 'users.list' + token)
    your_id = [member['id'] for member in users_list['members'] if member.get('name') == your_name][0]
    target_user_id = [member['id'] for member in users_list['members'] if member.get('name') == target_user_name][0]
    print('your_id: ' + your_id)
    print('target_user_id: ' + target_user_id)

    # fetch im.list
    im_list = utils.fetch(end_point + 'im.list' + token)
    target_im_id = [im['id'] for im in im_list['ims'] if im.get('user') == target_user_id][0]
    print('target_im_id: ' + target_im_id)

    # fetch im.history
    im_history = utils.fetch_all_history(end_point + 'im.history' + token + '&channel=' + target_im_id + '&count=1000')
    your_posts_list = [message for message in im_history if message.get('user') == your_id and message.get('subtype', '') == '']

    # show your posts
    for message in your_posts_list:
        print(message['text'].replace('\n', ''), message['ts'])

    # chat.delete
    print('------------------------------')
    print('{0} 件削除します。よろしいですか?'.format(len(your_posts_list)))
    ans = utils.prompt()

    if ans == 'y' or ans == 'Y':
        for message in your_posts_list:
            print(message['text'].replace('\n', ''), message['ts'])
            delete_status = utils.fetch(end_point + 'chat.delete' + token + '&ts=' + message['ts'] + '&channel=' + target_im_id)
            print(delete_status)

        print('complete!!')
Example #27
0
File: daemon.py Project: RBron/sun
 def __init__(self):
     pynotify.uninit()
     pynotify.init("sun")
     self.pkg_count = fetch()[0]
     self.message_added = ""
     self.summary = "{0}Software Updates".format(" " * 14)
     self.message = ("{0}{1} Software updates are available\n".format(
         " " * 3, self.pkg_count))
     self.icon = "{0}{1}.png".format(icon_path, __all__)
     self.n = pynotify.Notification(self.summary, self.message, self.icon)
     self.n.set_timeout(60000 * int(config()["STANDBY"]))
Example #28
0
 def visit(self, update=False):
     url = self.get_user_feed_url(update=update)
     print 'visit %s' % url
     js = fetch(url)
     d = json.loads(js)
     if d['status'] == 'ok':
         for item in d['items']:
             pk = int(item['pk'])
             if pk not in self.items:
                 self.items[pk] = item
     return d
Example #29
0
File: models.py Project: gage/proto
    def create_youtube_object_by_id(self, youtube_id):
        try:
            obj = self.get(youtube_id=youtube_id)
            return obj
        except YoutubeVideo.DoesNotExist:
            pass

        youtube_obj = youtube_utils.fetch(youtube_id)
        youtube_obj.pop('thumburl')
        obj, created = self.get_or_create(youtube_id=youtube_obj['youtube_id'], defaults=youtube_obj)
        return obj
Example #30
0
 def __init__(self):
     notify2.uninit()
     notify2.init("sun")
     self.pkg_count = fetch()[0]
     self.message_added = ""
     self.summary = "{0}Software Updates".format(" " * 14)
     self.message = ("{0}{1} Software updates are available\n".format(
         " " * 3, self.pkg_count))
     self.icon = "{0}{1}.png".format(icon_path, __all__)
     self.n = notify2.Notification(self.summary, self.message, self.icon)
     self.n.set_timeout(60000 * int(config()["STANDBY"]))
Example #31
0
def save_proxies(url):
    try:
        r = fetch(url)
    except requests.exceptions.RequestException:
        return False
    addresses = re.findall(PROXY_REGEX, r.text)
    for address in addresses:
        proxy = Proxy(address=address)
        try:
            proxy.save()
        except NotUniqueError:
            pass
Example #32
0
def preprocess(image_path):
    if image_path == "random":
        image = np.random.normal(size=(256, 256, 3)).astype(np.float32)
        image -= image.min()
        image /= image.max()
    else:
        image = Image.open(fetch(image_path)).convert("RGB")

    rgb2bgr = T.Lambda(lambda x: x[th.LongTensor([2, 1, 0])])
    normalize = T.Normalize(mean=[103.939, 116.779, 123.68], std=[1, 1, 1])

    return normalize(rgb2bgr(T.ToTensor()(image) * 255)).unsqueeze(0)
Example #33
0
def save_proxies(url):
    try:
        r = fetch(url)
    except requests.exceptions.RequestException:
        return False
    addresses = re.findall(PROXY_REGEX, r.text)
    for address in addresses:
        proxy = Proxy(address=address)
        try:
            proxy.save()
        except NotUniqueError:
            pass
def unregister():
    fb_uid = request.forms.uid
    access_token = request.forms.access_token
    url = 'https://api.facebook.com/method/fql.query?query=SELECT+uid%2C+name%2C+pic_big%2C+sex+FROM+user+WHERE+uid+%3D'
    url += fb_uid
    url += '&access_token='
    url += access_token
    url += '&format=json'
    fb_user_data = json.loads(utils.fetch(url))    
    if 'error_code' in fb_user_data:
        return 'error'
    else:
        db.users.remove({'fb_uid':fb_uid})
        return 'user removed'
Example #35
0
def parse_source(source, idx, header):
    """
    Import data from a single source based on the data type.
    """
    path = '{}/{}'.format(config.workspace_dir, idx)
    if not os.path.exists(path):
        os.makedirs(path)

    cache_url = source[header.index('cache')]
    cache_filename = re.search('/[^/]*$', cache_url).group()
    fetch(cache_url, path + cache_filename)

    files = rlistdir(path)
    for f in files:
        if re.match('.*\.(zip|obj|exe)$', f):  # some files had mislabelled ext
            unzip(f, path)

    shapes = []
    files = rlistdir(path)
    for f in files:
        if re.match('.*\.({})$'.format('|'.join(config.fiona_extensions)), f):
            objs = import_with_fiona(f, source[0])
            for obj in objs:
                shapes.append(obj)
        elif re.match('.*\.csv$', f):
            objs = import_csv(f, source[0])
            for obj in objs:
                shapes.append(obj)

    shutil.rmtree(path)

    if not shapes:
        _L.warning(
            'failed to parse source. did not find shapes. files in archive: {}'
            .format(files))

    return shapes
Example #36
0
 def kuaidaili():
     """
     快代理:https://www.kuaidaili.com
     """
     url = "https://www.kuaidaili.com/free/{}"
     items = ["inha/{}/".format(_) for _ in range(1, 21)]
     for proxy_type in items:
         html = fetch(url.format(proxy_type))
         if html:
             doc = pyquery.PyQuery(html)
             for proxy in doc(".table-bordered tr").items():
                 ip = proxy("[data-title=IP]").text()
                 port = proxy("[data-title=PORT]").text()
                 if ip and port:
                     yield "http://{}:{}".format(ip, port)
Example #37
0
    def data5u():
        """
        无忧代理:http://www.data5u.com/
        """
        url = "http://www.data5u.com/"

        html = fetch(url)
        if html:
            doc = pyquery.PyQuery(html)
            for index, item in enumerate(doc("li ul").items()):
                if index > 0:
                    ip = item("span:nth-child(1)").text()
                    port = item("span:nth-child(2)").text()
                    schema = item("span:nth-child(4)").text()
                    if ip and port and schema:
                        yield "{}://{}:{}".format(schema, ip, port)
Example #38
0
    def ip89():
        """
        89免费代理:http://http://www.89ip.cn
        """
        url = "http://www.89ip.cn/index_{}.html"

        items = [p for p in range(1, 8)]
        for proxy_type in items:
            html = fetch(url.format(proxy_type))
            if html:
                doc = pyquery.PyQuery(html)
                for item in doc(".layui-col-md8 tr").items():
                    ip = item("td:nth-child(1)").text()
                    port = item("td:nth-child(2)").text()
                    if ip and port:
                        yield "http://{}:{}".format(ip, port)
                        yield "https://{}:{}".format(ip, port)
Example #39
0
    def ip3366():
        """
        云代理:http://www.ip3366.net
        """
        url = "http://www.ip3366.net/free/?stype=1&page={}"

        items = [p for p in range(1, 8)]
        for page in items:
            html = fetch(url.format(page))
            if html:
                doc = pyquery.PyQuery(html)
                for proxy in doc(".table-bordered tr").items():
                    ip = proxy("td:nth-child(1)").text()
                    port = proxy("td:nth-child(2)").text()
                    schema = proxy("td:nth-child(4)").text()
                    if ip and port and schema:
                        yield "{}://{}:{}".format(schema.lower(), ip, port)
Example #40
0
    def _run(self, isbn):
        url = 'http://openlibrary.org/api/books?bibkeys=ISBN:%s&jscmd=data&format=json' % isbn

        try:
            json = simplejson.loads(fetch(url))
            result = dict()
            json = json['ISBN:'+isbn]
            result['title'] = json['title']
            result['author'] = ', '.join([i['name'] for i in json['authors']])
            result['isbn'] = isbn
            result['publisher'] = ', '.join([i['name'] for i in json['publishers']])
            result['date'] = json['publish_date']
            result['source'] = json['url']
            if 'cover' in json:
                result['photo'] = json['cover']['medium']
            return result
        except:
            return None
Example #41
0
    def iphai():
        """
        ip 海代理:http://www.iphai.com
        """
        url = "http://www.iphai.com/free/{}"

        items = ["ng"]
        for proxy_type in items:
            html = fetch(url.format(proxy_type))
            if html:
                doc = pyquery.PyQuery(html)
                for item in doc(".table-bordered tr").items():
                    ip = item("td:nth-child(1)").text()
                    port = item("td:nth-child(2)").text()
                    schema = item("td:nth-child(4)").text()
                    if not schema:
                        schema = "HTTP"
                    if ip and port and schema:
                        yield "{}://{}:{}".format(schema.lower(), ip, port)
def save_search_result(page, queue, retry=0):
    proxy = Proxy.get_random()['address']
    url = SEARCH_URL.format(SEARCH_TEXT, page)

    try:
        r = fetch(url, proxy=proxy)
    except (Timeout, ConnectionError, IOError):
        sleep(0.1)
        retry += 1
        if retry > 5:
            put_new_page(page, queue)
            raise GreenletExit()
        try:
            p = Proxy.objects.get(address=proxy)
            if p:
                p.delete()
        except DoesNotExist:
            pass

        return save_search_result(page, queue, retry)
    soup = BeautifulSoup(r.text, 'lxml')
    results = soup.find(class_='results')
    if results is None:
        # 此代理已经被封, 换其他的代理
        sleep(0.1)
        retry += 1
        if retry > 5:
            put_new_page(page, queue)
            print 'retry too much!'
            raise GreenletExit()
        return save_search_result(page, queue, retry)
    articles = results.find_all(
        'div', lambda x: 'wx-rb' in x)
    for article in articles:
        save_article(article)

    page_container = soup.find(id='pagebar_container')
    if page_container and u'下一页' in page_container.text:
        last_page = int(page_container.find_all('a')[-2].text)
        current_page = int(page_container.find('span').text)
        for page in range(current_page + 1, last_page + 1):
            put_new_page(page, queue)
Example #43
0
def calcAll():
    """
    Calculate the new macd-coefficients for all MACD-objects

    :return: List of serialized MACD-objects
    """
    global macd_objects
    global data

    for macd in macd_objects:
        try:
            if macd.pair not in data:
                data[macd.pair] = fetch(macd.pair)  # get data
                data[macd.pair] = parse_data(
                    data[macd.pair])  # in each pair is stored sdf-data itself

        except Exception as err:
            pass
        sdf = macd.calculate_coefficient(data[macd.pair][macd.time_period])
    data = dict()  # empty data
Example #44
0
async def get_repositories(user: dict, token: str) -> list:
    private_repos = int(user['owned_private_repos'])
    public_repos = int(user['public_repos'])
    total_repo_count = private_repos + public_repos

    async with aiohttp.ClientSession() as session:
        requests = [
            asyncio.ensure_future(
                fetch(
                    session,
                    f'{API_BASE}/user/repos?type=owner&sort=full_name&page={page}',
                    headers={
                        'Authorization': f'token {token}',
                    },
                )
            )
            for page in range(math.ceil(total_repo_count / PER_PAGE_COUNT))
        ]

        return await asyncio.gather(*requests)
def save_search_result(page, queue, retry=0):
    proxy = Proxy.get_random()['address']
    url = SEARCH_URL.format(SEARCH_TEXT, page)

    try:
        r = fetch(url, proxy=proxy)
    except (Timeout, ConnectionError, IOError):
        sleep(0.1)
        retry += 1
        if retry > 5:
            put_new_page(page, queue)
            raise GreenletExit()
        try:
            p = Proxy.objects.get(address=proxy)
            if p:
                p.delete()
        except DoesNotExist:
            pass

        return save_search_result(page, queue, retry)
    soup = BeautifulSoup(r.text, 'lxml')
    results = soup.find(class_='results')
    if results is None:
        # 此代理已经被封, 换其他的代理
        sleep(0.1)
        retry += 1
        if retry > 5:
            put_new_page(page, queue)
            print 'retry too much!'
            raise GreenletExit()
        return save_search_result(page, queue, retry)
    articles = results.find_all('div', lambda x: 'wx-rb' in x)
    for article in articles:
        save_article(article)

    page_container = soup.find(id='pagebar_container')
    if page_container and u'下一页' in page_container.text:
        last_page = int(page_container.find_all('a')[-2].text)
        current_page = int(page_container.find('span').text)
        for page in range(current_page + 1, last_page + 1):
            put_new_page(page, queue)
Example #46
0
    def xici():
        """
        西刺代理:http://www.xicidaili.com
        """
        url = "http://www.xicidaili.com/{}"

        items = []
        for page in range(1, 8):
            items.append(("wt/{}".format(page), "http://{}:{}"))
            items.append(("wn/{}".format(page), "https://{}:{}"))

        for item in items:
            proxy_type, host = item
            html = fetch(url.format(proxy_type))
            if html:
                doc = pyquery.PyQuery(html)
                for proxy in doc("table tr").items():
                    ip = proxy("td:nth-child(2)").text()
                    port = proxy("td:nth-child(3)").text()
                    if ip and port:
                        yield host.format(ip, port)
Example #47
0
 def ip_66():
     """
     66ip 代理:http://www.66ip.cn
     """
     from copy import deepcopy
     headers = deepcopy(HEADERS)
     cookie = loop.run_until_complete(get_cookie())
     headers.update({
         "Cookie": cookie,
         "Host": "www.66ip.cn",
         "Referer": "http://www.66ip.cn/nm.html"
     })
     url = 'http://www.66ip.cn/nmtq.php?getnum=100&isp=0&anonymoustype=0&start=&ports=&export=&ipaddress=&area=1&proxytype={}&api=66ip'
     pattern = "\d+\.\d+.\d+\.\d+:\d+"
     items = [(0, "http://{}"), (1, "https://{}")]
     for item in items:
         proxy_type, host = item
         html = fetch(url.format(proxy_type), headers=headers)
         if html:
             for proxy in re.findall(pattern, html):
                 yield host.format(proxy)
Example #48
0
    def _run(self, isbn):
        url = 'http://www.livelib.ru/find/%s' % isbn
        soup = BeautifulSoup(fetch(url))

        try:
            result = dict()
            result['title'] = soup.find('div', {'class': 'title'}).a.string
            result['author'] = soup.find('a', {'class': 'author unnoticeable'}).string
            span_info = soup.find('span', {'class': 'info'})
            result['publisher'] = span_info.string.replace('&laquo;', '').replace('&raquo;', '')
            span_info = span_info.nextSibling
            span_info = span_info.nextSibling
            result['date'] = span_info.string.replace(u' г.', '')
            span_info = span_info.nextSibling
            span_info = span_info.nextSibling
            result['isbn'] = isbn #span_info.string.replace(u'ISBN: ', '').replace('-', '')
            result['source'] = url
            result['photo'] = soup.find('div', {'class': 'thumbnail'}).a.img['src'].replace('/s/','/l/') # small size -> large
            return result
        except:
            return None
def register():
    fb_uid = request.forms.uid
    lon = request.forms.lon
    lat = request.forms.lat
    loc = [float(lon), float(lat)]
    access_token = request.forms.access_token
    url = 'https://api.facebook.com/method/fql.query?query=SELECT+uid%2C+name%2C+pic_big%2C+sex+FROM+user+WHERE+uid+%3D'
    url += fb_uid
    url += '&access_token='
    url += access_token
    url += '&format=json'
    fb_user_data = json.loads(utils.fetch(url))
    if 'error_code' in fb_user_data:
        return 'error'
    else:
        fb_user = fb_user_data[0]
        db.users.update({'fb_uid':fb_uid},{'$set':{'name':fb_user['name'], 'pic_big':fb_user['pic_big'], 'loc':loc}}, True)
        distance = 0.250 # 250 meters
        radians_distance = distance / 6371.0 # the radius of the earth is 6371 km
        cursor = db.users.find({"loc":{"$nearSphere":loc, "$maxDistance": radians_distance}},{'name':1,'fb_uid':1,'pic_big':1}).limit(200)
        nearby = list((record) for record in cursor)
        return utils.jsondumps(nearby)
Example #50
0
def crawl_detail(url):
    '''
        製品の詳細を取得する。
    '''
    data = utils.fetch(url)
    soup = bs(data, 'lxml')

    details = {}

    # Amazonへのリンク
    amazon = soup.find('a', class_='to_amazon_button')
    if amazon:
        details['amazon'] = amazon['href']

    # 仕様
    for row in soup.find('div', class_='item_detail_product').find_all('tr'):
        cols = row.find_all('td')
        # 「項目名 | 値」の形式(列が2)でなければスキップ
        if len(cols) != 2:
            continue
        details[cols[0].string] = cols[1].string

    return details
Example #51
0
    def download(self):
        "Download the best torrent for the current episode"
        torrent = self.best_candidate()
        if not torrent:
            return
        log.info("  - Downloading %s" % torrent.url)
        try:
            req = fetch(torrent.url)
            with open(config.torrents_dir + '/' + torrent.name + '.torrent', 'wb') as f:
                f.write(req.read())
        except Exception as e:
            torrent.failed_download = torrent.get('failed_download', 0) + 1
            print "  - Failed for the %d time(s) [%s, %s]" % (torrent.failed_download, type(e), e)
            self.hits.save()
            return
        guessed_next = numbering_from_str(torrent.next_episode)

        next, _ = self.get_episodes_after(guessed_next)
        self.hits.current = next if next else guessed_next
        log.info("  - Current episode is now %s" % self.hits.current)
        torrent.downloaded = True

        self.hits.save()
Example #52
0
    def run(self, isbn):
        url = 'http://www.labirint.ru/search/?txt=%s' % (isbn)

        try:
            reg = rx_content.search(fetch(url).decode('cp1251'))
            soup = BeautifulSoup(reg.group(0))

            result = dict()
            result['title'] = soup.find('span', {'class': 'fn'}).next.next.attrs[1][1]
            result['photo'] = soup.find('span', {'class': 'photo'}).next.next.attrs[1][1]
            result['series'] = soup.find('span', {'class': 'category'}).next.next.attrs[1][1]
            result['publisher'] = soup.find('span', {'class': 'brand'}).next.next.attrs[1][1]
            div = soup.find('div', {'class': 'isbn smallbr'})
            result['isbn'] = isbn
            div = div.findNext('div')
            result['author'] = div.next.next.string
            div = div.findNext('div')
            reg = rx_date.search(div.next.next.next.next)
            if reg: result['date'] = reg.group(1)
            result['source'] = url
            return result
        except:
            return None
Example #53
0
    return state, header


def filter_polygons(state, header):
    """
    Removes any non-polygon sources from the state file.

    We are only interested in parsing parcel data, which is
    marked as Polygon in the state file.
    """
    filtered_state = []

    for source in state:
        if 'Polygon' in source[header.index('geometry type')]:
            filtered_state.append(source)

    return filtered_state


if __name__ == '__main__':
    if not os.path.isfile(config.statefile_path):
        fetch(config.state_url, config.statefile_path)

    if not os.path.exists(config.output_dir):
        os.makedirs(config.output_dir)

    raw_state, header = load_state()
    state = filter_polygons(raw_state, header)

    parse_statefile(state, header)
Example #54
0
def install():
    fetch('http://piumarta.com/software/peg/peg-%(peg)s.tar.gz')
    extract('peg-%(peg)s.tar.gz')
    make('peg-%(peg)s', 'CC=clang')
    make('peg-%(peg)s', 'PREFIX=%s install' % env.prefix)
Example #55
0
def install():
    fetch("http://www.fastcgi.com/dist/fcgi-%(fcgi)s.tar.gz")
    extract("fcgi-%(fcgi)s.tar.gz")
    configure("fcgi-%(fcgi)s", ["--prefix=%s" % env.prefix])
    make("fcgi-%(fcgi)s")
    make("fcgi-%(fcgi)s", "install")
     'gtkbin-1.7.3.zip', DRIVE_C),
    ('https://dl.dropboxusercontent.com/u/4780737/pywebkitgtk.zip',
     'pywebkitgtk.zip', SITE_PACKAGES),
]

FILES = [
    # Unneeded. Serves only as an example.
    #('https://dl.dropboxusercontent.com/u/4780737/gtkspell.pyd',
    #      os.path.join(DRIVE_C_REAL, 'Python27', 'Lib', 'site-packages', 'gtkspell.pyd'))
]

print HELP

for url, filename in INSTALLERS:
    path = os.path.join(INSTALLERS_DIR, filename)
    fetch(url, path)
    install(path, use_wine=IS_LINUX)

for url, filename, dest in TARBALLS:
    path = os.path.join(INSTALLERS_DIR, filename)
    fetch(url, path)
    cmd = ['wine'] if IS_LINUX else []
    assert path.endswith('.zip'), path
    cmd.extend([SEVEN_ZIP, 'x', '-o' + dest, path])
    run(cmd)

for url, dest in FILES:
    fetch(url, dest)

# TODO: Remove once pygtkspellcheck lists its dependencies.
run(PYTHON + ['-m', 'pip', 'install', 'pyenchant==1.6.8'])