コード例 #1
0
ファイル: api_parse.py プロジェクト: luckyariane/arthas-bot
 def __api_call(self, api, auth=True):
     url = 'https://api.twitch.tv/helix/' + api
     if auth:
         page = get_webpage(url, h=H_AUTH)
     else:
         page = get_webpage(url, h=H_BASIC)
     return convert_json(page)
コード例 #2
0
ファイル: utils.py プロジェクト: teampopong/pokrbot
def fetch_new_bill_ids(assembly_id):
    directory = DIR['meta']
    meta_data = '%s/%d.csv' % (directory, assembly_id)

    lines = list(open(meta_data, 'r'))[1:]
    lines = [line.decode('utf-8') for line in lines]
    existing_ids = set(line.split(',', 1)[0].strip('"') for line in lines)
    last_proposed_date = max(line.split('","', 6)[5].strip('"') for line in lines)
    baseurl = BASEURL['list']
    page_size = PAGE_SIZE
    url = '%(baseurl)sPROPOSE_FROM=%(last_proposed_date)s&PAGE_SIZE=%(page_size)d' % locals()

    directory = '%s/%s' % (DIR['list'], assembly_id)
    fn = '%s/tmp.html' % directory

    utils.get_webpage(url, fn)
    p = utils.read_webpage(fn)
    rows = utils.get_elems(p, X['table'])

    new_bill_ids = []
    with open(meta_data, 'a') as f:
        for r in reversed(rows):
            columns = r.xpath(X['columns'])
            if len(columns)==8:
                p = parse_columns(columns)
                if p[0] not in existing_ids:
                    list_to_file(p, f)
                    new_bill_ids.append(p[0])
    return new_bill_ids
コード例 #3
0
ファイル: api_parse.py プロジェクト: luckyariane/arthas-bot
 def __api_call_old(self, api, auth=False):
     url = 'https://api.twitch.tv/kraken/' + api
     if auth:
         page = get_webpage(url, h=H_AUTH_OLD)
     else:
         page = get_webpage(url, h=H_BASIC)
     return convert_json(page)
コード例 #4
0
ファイル: safety.py プロジェクト: StephenEhmann/wrmap
def find(location, bounds, data, safetyMaps):
    for sm in safetyMaps:
        page = utils.get_webpage(sm)

        # host
        m = re.search(r'var assetHost = "(\S+)"', page)
        if (m):
            host = m.group(1)
        else:
            raise Exception('ERROR: Failed to find "var assetHost" in ' + sm)

        # city id
        m = re.search(r'data-id=\'(\d+)\'', page)
        if (m):
            cityId = m.group(1)
        else:
            raise Exception('ERROR: Failed to find "data-id=" in ' + sm)

        # polygon ids
        polyIdsJson = utils.get_webpage('https:' + host +
                                        '/polygons/features/cities/' +
                                        str(cityId) + '.json')
        polyIds = json.loads(polyIdsJson)
        #print('polys = ' + str(polys))

        for polyId in polyIds:
            neighborhoodPoly = json.loads(
                utils.get_webpage('https:' + host +
                                  '/polygons/polygon/neighborhoods/' +
                                  str(polyId) + '.json'))
            #print('neighborhoodPoly for ' + str(polyId) + ' = ' + str(neighborhoodPoly))
            data[polyId] = neighborhoodPoly
コード例 #5
0
def fetch_new_bill_ids(assembly_id):
    directory = DIR['meta']
    meta_data = '%s/%d.csv' % (directory, assembly_id)

    lines = list(open(meta_data, 'r'))[1:]
    lines = [line.decode('utf-8') for line in lines]
    existing_ids = set(line.split(',', 1)[0].strip('"') for line in lines)
    last_proposed_date = max(
        line.split('","', 6)[5].strip('"') for line in lines)
    baseurl = BASEURL['list']
    url = '%(baseurl)sPROPOSE_FROM=%(last_proposed_date)s&PAGE_SIZE=100' % locals(
    )

    directory = '%s/%s' % (DIR['list'], assembly_id)
    fn = '%s/tmp.html' % directory

    utils.get_webpage(url, fn)
    p = utils.read_webpage(fn)
    rows = utils.get_elems(p, X['table'])

    new_bill_ids = []
    with open(meta_data, 'a') as f:
        for r in reversed(rows):
            columns = r.xpath(X['columns'])
            if len(columns) == 8:
                p = parse_columns(columns)
                if p[0] not in existing_ids:
                    list_to_file(p, f)
                    new_bill_ids.append(p[0])
    return new_bill_ids
コード例 #6
0
ファイル: get_html.py プロジェクト: JH27/crawlers
def get_specifics(assembly_id, bill_id, link_id):
    if assembly_id > 16:
        baseurl = BASEURL['specifics']
    else:
        baseurl = BASEURL['specifics_old']

    outp = '%s/%s/%s.html' % (DIR['specifics'], assembly_id, bill_id)
    url = '%s%s' % (baseurl, link_id)
    utils.get_webpage(url, outp)
コード例 #7
0
def get_specifics(assembly_id, bill_id, link_id):
    if assembly_id > 16:
        baseurl = BASEURL['specifics']
    else:
        baseurl = BASEURL['specifics_old']

    outp = '%s/%s/%s.html' % (DIR['specifics'], assembly_id, bill_id)
    url = '%s%s' % (baseurl, link_id)
    utils.get_webpage(url, outp)
コード例 #8
0
ファイル: crawler.py プロジェクト: winnersky/crawlers
def get_pledges(basepath, baseurl, ids):
    if not os.path.exists(basepath):
        os.makedirs(basepath)

    tot = len(ids)
    for i, n in enumerate(ids):
        fn = '%s/%s.html' % (basepath, n)
        utils.get_webpage(baseurl + n, fn)
        #utils.get_webpage(baseurl + n, fn, decoding='euc-kr')
        print 'File written to %s (%s/%s)' % (fn, i, tot)
コード例 #9
0
ファイル: html.py プロジェクト: JH27/crawlers
def get_npages(assembly_id):
    url, directory = convert(assembly_id)
    utils.check_dir(directory)

    fn = '%s/tmp.html' % directory
    utils.get_webpage(url, fn)
    page = utils.read_webpage(fn)
    m = re.search(u'총(.+)건', page.xpath('//span[@class="text3"]/text()')[0])
    nbills = int(m.group(1))
    npages = int(math.ceil(nbills/float(PAGE_SIZE)))
    print 'Total %d bills, %d pages to %s' % (nbills, npages, directory)
    return npages
コード例 #10
0
ファイル: html.py プロジェクト: winnersky/crawlers
def get_npages(assembly_id):
    url, directory = convert(assembly_id)
    utils.check_dir(directory)

    fn = '%s/tmp.html' % directory
    utils.get_webpage(url, fn)
    page = utils.read_webpage(fn)
    m = re.search(u'총(.+)건', page.xpath('//span[@class="text3"]/text()')[0])
    nbills = int(m.group(1))
    npages = int(math.ceil(nbills / float(PAGE_SIZE)))
    print 'Total %d bills, %d pages to %s' % (nbills, npages, directory)
    return npages
コード例 #11
0
ファイル: bus.py プロジェクト: StephenEhmann/wrmap
def findDetails(origAllData, keywords):
    debug = True
    startAt = 0
    limit = 0

    if (0):
        # the first 600
        startAt = 0
        limit = 600
    if (0):
        # the rest
        startAt = 600
        limit = 0
    count = 0
    allData = []
    for d in origAllData:
        if (count >= startAt):
            if ('url' not in d):
                if (debug):
                    print('query: ' + d['place_id'])
                details = gmaps.place(d['place_id'])
                d['url'] = details['result']['url']
            if (d.get('found_keyword') == None):
                print('url = ' + d['url'])
                failed = False
                try:
                    buspage = utils.get_webpage(d['url'])
                except Exception as e:
                    failed = True
                else:
                    for k in keywords:
                        if (re.search(k, buspage)):
                            d['found_keyword'] = 1
                    if (d.get('found_keyword') == None):
                        d['found_keyword'] = 0

                if (failed):
                    print('failed at ' + str(count))
                    allFilename = criterionName + '.all.' + str(startAt) + '_' + str(count-1) + '.new.yml'
                    with open(allFilename, 'w') as yaml_file:
                        dump(allData, yaml_file, default_flow_style=False, Dumper=Dumper)
                    break

            allData.append(d)

        count += 1
        if (limit > 0 and count >= limit):
            break

    return allData
コード例 #12
0
ファイル: stream_data.py プロジェクト: luckyariane/arthas-bot
    def updateStreamData(self):
        if on_cooldown(self.lastUpdate, self.updateInterval): return

        stream_data = convert_json(get_webpage(self.url, h=H_BASIC))        
        if stream_data['stream'] == None: 
            self.live = False
            self.stream_start = None 
            self.game = None
        else:
            self.live = True 
            self.stream_start = convert_timestamp(stream_data['stream']['created_at'])
            self.game = stream_data['stream']['game']

        print self.live 
        print self.stream_start
        print self.game
        print self.lastUpdate
        self.lastUpdate = set_cooldown()
コード例 #13
0
ファイル: get_html.py プロジェクト: JH27/crawlers
def get_summaries(assembly_id, bill_id, link_id, has_summaries):
    outp = '%s/%s/%s.html' % (DIR['summaries'], assembly_id, bill_id)
    try:
        utils.get_webpage('%s%s' % (BASEURL['summaries'], link_id), outp)
    except:
        pass
コード例 #14
0
from utils import get_webpage, log_error
from pdfdoc import create_pdf
from bs4 import BeautifulSoup
from subprocess import Popen

raw_html = get_webpage('http://poems.com/today.php')
html = BeautifulSoup(raw_html, 'html.parser')
title = html.select('#poem_container #page_title')[0].text
content = html.select('#poem_container #poem p')

try:
    create_pdf(title, content)
    Popen(['poem.pdf'], shell=True)
except PermissionError as ex:
    log_error('Error while generating the PDF document: {0}'.format(str(ex)))
コード例 #15
0
ファイル: crawler.py プロジェクト: winnersky/crawlers
def get_people(url, page):
    utils.get_webpage(url, page)
コード例 #16
0
def get_summaries(assembly_id, bill_id, link_id, has_summaries):
    outp = '%s/%s/%s.html' % (DIR['summaries'], assembly_id, bill_id)
    try:
        utils.get_webpage('%s%s' % (BASEURL['summaries'], link_id), outp)
    except:
        pass
コード例 #17
0
ファイル: currency.py プロジェクト: luckyariane/arthas-bot
 def getCurrentUsers(self):
     self.current_users = list()
     api_dict = convert_json(get_webpage(self.url))
     for user_type in api_dict['chatters']:
         self.current_users += api_dict['chatters'][user_type]