def __api_call(self, api, auth=True): url = 'https://api.twitch.tv/helix/' + api if auth: page = get_webpage(url, h=H_AUTH) else: page = get_webpage(url, h=H_BASIC) return convert_json(page)
def fetch_new_bill_ids(assembly_id): directory = DIR['meta'] meta_data = '%s/%d.csv' % (directory, assembly_id) lines = list(open(meta_data, 'r'))[1:] lines = [line.decode('utf-8') for line in lines] existing_ids = set(line.split(',', 1)[0].strip('"') for line in lines) last_proposed_date = max(line.split('","', 6)[5].strip('"') for line in lines) baseurl = BASEURL['list'] page_size = PAGE_SIZE url = '%(baseurl)sPROPOSE_FROM=%(last_proposed_date)s&PAGE_SIZE=%(page_size)d' % locals() directory = '%s/%s' % (DIR['list'], assembly_id) fn = '%s/tmp.html' % directory utils.get_webpage(url, fn) p = utils.read_webpage(fn) rows = utils.get_elems(p, X['table']) new_bill_ids = [] with open(meta_data, 'a') as f: for r in reversed(rows): columns = r.xpath(X['columns']) if len(columns)==8: p = parse_columns(columns) if p[0] not in existing_ids: list_to_file(p, f) new_bill_ids.append(p[0]) return new_bill_ids
def __api_call_old(self, api, auth=False): url = 'https://api.twitch.tv/kraken/' + api if auth: page = get_webpage(url, h=H_AUTH_OLD) else: page = get_webpage(url, h=H_BASIC) return convert_json(page)
def find(location, bounds, data, safetyMaps): for sm in safetyMaps: page = utils.get_webpage(sm) # host m = re.search(r'var assetHost = "(\S+)"', page) if (m): host = m.group(1) else: raise Exception('ERROR: Failed to find "var assetHost" in ' + sm) # city id m = re.search(r'data-id=\'(\d+)\'', page) if (m): cityId = m.group(1) else: raise Exception('ERROR: Failed to find "data-id=" in ' + sm) # polygon ids polyIdsJson = utils.get_webpage('https:' + host + '/polygons/features/cities/' + str(cityId) + '.json') polyIds = json.loads(polyIdsJson) #print('polys = ' + str(polys)) for polyId in polyIds: neighborhoodPoly = json.loads( utils.get_webpage('https:' + host + '/polygons/polygon/neighborhoods/' + str(polyId) + '.json')) #print('neighborhoodPoly for ' + str(polyId) + ' = ' + str(neighborhoodPoly)) data[polyId] = neighborhoodPoly
def fetch_new_bill_ids(assembly_id): directory = DIR['meta'] meta_data = '%s/%d.csv' % (directory, assembly_id) lines = list(open(meta_data, 'r'))[1:] lines = [line.decode('utf-8') for line in lines] existing_ids = set(line.split(',', 1)[0].strip('"') for line in lines) last_proposed_date = max( line.split('","', 6)[5].strip('"') for line in lines) baseurl = BASEURL['list'] url = '%(baseurl)sPROPOSE_FROM=%(last_proposed_date)s&PAGE_SIZE=100' % locals( ) directory = '%s/%s' % (DIR['list'], assembly_id) fn = '%s/tmp.html' % directory utils.get_webpage(url, fn) p = utils.read_webpage(fn) rows = utils.get_elems(p, X['table']) new_bill_ids = [] with open(meta_data, 'a') as f: for r in reversed(rows): columns = r.xpath(X['columns']) if len(columns) == 8: p = parse_columns(columns) if p[0] not in existing_ids: list_to_file(p, f) new_bill_ids.append(p[0]) return new_bill_ids
def get_specifics(assembly_id, bill_id, link_id): if assembly_id > 16: baseurl = BASEURL['specifics'] else: baseurl = BASEURL['specifics_old'] outp = '%s/%s/%s.html' % (DIR['specifics'], assembly_id, bill_id) url = '%s%s' % (baseurl, link_id) utils.get_webpage(url, outp)
def get_pledges(basepath, baseurl, ids): if not os.path.exists(basepath): os.makedirs(basepath) tot = len(ids) for i, n in enumerate(ids): fn = '%s/%s.html' % (basepath, n) utils.get_webpage(baseurl + n, fn) #utils.get_webpage(baseurl + n, fn, decoding='euc-kr') print 'File written to %s (%s/%s)' % (fn, i, tot)
def get_npages(assembly_id): url, directory = convert(assembly_id) utils.check_dir(directory) fn = '%s/tmp.html' % directory utils.get_webpage(url, fn) page = utils.read_webpage(fn) m = re.search(u'총(.+)건', page.xpath('//span[@class="text3"]/text()')[0]) nbills = int(m.group(1)) npages = int(math.ceil(nbills/float(PAGE_SIZE))) print 'Total %d bills, %d pages to %s' % (nbills, npages, directory) return npages
def get_npages(assembly_id): url, directory = convert(assembly_id) utils.check_dir(directory) fn = '%s/tmp.html' % directory utils.get_webpage(url, fn) page = utils.read_webpage(fn) m = re.search(u'총(.+)건', page.xpath('//span[@class="text3"]/text()')[0]) nbills = int(m.group(1)) npages = int(math.ceil(nbills / float(PAGE_SIZE))) print 'Total %d bills, %d pages to %s' % (nbills, npages, directory) return npages
def findDetails(origAllData, keywords): debug = True startAt = 0 limit = 0 if (0): # the first 600 startAt = 0 limit = 600 if (0): # the rest startAt = 600 limit = 0 count = 0 allData = [] for d in origAllData: if (count >= startAt): if ('url' not in d): if (debug): print('query: ' + d['place_id']) details = gmaps.place(d['place_id']) d['url'] = details['result']['url'] if (d.get('found_keyword') == None): print('url = ' + d['url']) failed = False try: buspage = utils.get_webpage(d['url']) except Exception as e: failed = True else: for k in keywords: if (re.search(k, buspage)): d['found_keyword'] = 1 if (d.get('found_keyword') == None): d['found_keyword'] = 0 if (failed): print('failed at ' + str(count)) allFilename = criterionName + '.all.' + str(startAt) + '_' + str(count-1) + '.new.yml' with open(allFilename, 'w') as yaml_file: dump(allData, yaml_file, default_flow_style=False, Dumper=Dumper) break allData.append(d) count += 1 if (limit > 0 and count >= limit): break return allData
def updateStreamData(self): if on_cooldown(self.lastUpdate, self.updateInterval): return stream_data = convert_json(get_webpage(self.url, h=H_BASIC)) if stream_data['stream'] == None: self.live = False self.stream_start = None self.game = None else: self.live = True self.stream_start = convert_timestamp(stream_data['stream']['created_at']) self.game = stream_data['stream']['game'] print self.live print self.stream_start print self.game print self.lastUpdate self.lastUpdate = set_cooldown()
def get_summaries(assembly_id, bill_id, link_id, has_summaries): outp = '%s/%s/%s.html' % (DIR['summaries'], assembly_id, bill_id) try: utils.get_webpage('%s%s' % (BASEURL['summaries'], link_id), outp) except: pass
from utils import get_webpage, log_error from pdfdoc import create_pdf from bs4 import BeautifulSoup from subprocess import Popen raw_html = get_webpage('http://poems.com/today.php') html = BeautifulSoup(raw_html, 'html.parser') title = html.select('#poem_container #page_title')[0].text content = html.select('#poem_container #poem p') try: create_pdf(title, content) Popen(['poem.pdf'], shell=True) except PermissionError as ex: log_error('Error while generating the PDF document: {0}'.format(str(ex)))
def get_people(url, page): utils.get_webpage(url, page)
def getCurrentUsers(self): self.current_users = list() api_dict = convert_json(get_webpage(self.url)) for user_type in api_dict['chatters']: self.current_users += api_dict['chatters'][user_type]