def downloadChart(self): """ Fetch user chart. """ u = url.url( "https://www.odesk.com/api/team/v1/snapshots/" + self.settings.company + "/" + self.settings.username + "/" + str(int(time.time())) + ".xml" ) u.addParam("api_key", self.settings.apiKey) u.addParam("api_token", self.settings.token) u.addParam("api_sig", self.apiSig(u)) c = urllib2.urlopen(u.asString()) xml = minidom.parse(c) onlinePresence = xml.getElementsByTagName("online_presence")[0].firstChild.nodeValue chartUrl = ( "http://chart.apis.google.com/chart?chs=70x22&cht=ls&chco=00FF00" + "&chf=bg,lg,90,000000&chm=B,00FF00,0,0,0&chd=t:" + onlinePresence ) c = urllib2.urlopen(chartUrl) chart = open("/tmp/chart.png", "w") chart.write(c.read()) chart.close()
def crawl_web(seeds, search_type): snippet_lookup = dict() index = dict() graph = dict() total_pages = 40 for item in seeds: to_crawl = [item[0]] max_pages = item[1] crawled = [] while to_crawl: current_domain = "" if search_type == "BFS": current_page = to_crawl[0] to_crawl = to_crawl[1:] if 'https://' in current_page or 'http://' in current_page: o = urlparse(current_page) current_domain = o.scheme + "://" + o.netloc crawled.append(current_page) print "crawling" + str(len(crawled)) print current_page if len(crawled) - 1 >= max_pages: break content = get_page(current_page) if content: soup = BeautifulSoup(content) title = soup.title.string snippet = get_snippet(current_page, soup) snippet_lookup[current_page] = (title, snippet) add_page_to_index(index, current_page, content) outlinks = get_all_links(content) graph[current_page] = outlinks for link in outlinks: if not link in crawled and not link == "#": if link.find('https://') != -1 or link.find( 'http://') != -1: to_crawl.append(link) if link[0] == "/" and link.find( "//") == -1 and url.url(current_domain + link): to_crawl.append(current_domain + link) return index, graph, snippet_lookup
def beginProcess(info, identifier, gdbm_files, filter_file, path, category): log = logging.getLogger('classify') log.debug("classify.beginProcess()") if identifier == "w": url_obj = url.url(gdbm_files, filter_file, path, category) url_obj.processHTML(info) elif identifier == "f": textfile_obj = textfile.textfile(gdbm_files, filter_file, path, category) textfile_obj.processUTFFile(info) elif identifier == "h": textfile_obj = textfile.textfile(gdbm_files, filter_file, path, category) textfile_obj.processHTMLFile(info) else: log.debug("identifier value is not valid") return log.debug("program terminated") return
def daySnapshots(self, date): """ Returns the number of snapshots in specified date """ u = url.url( "https://www.odesk.com/api/team/v1/workdiaries/" + self.settings.company + "/" + self.settings.username + "/" + date + ".xml" ) u.addParam("api_key", self.settings.apiKey) u.addParam("api_token", self.settings.token) u.addParam("api_sig", self.apiSig(u)) c = urllib2.urlopen(u.asString()) xml = minidom.parse(c) return len(xml.getElementsByTagName("snapshot"))
def getToken(self): # TODO: finish it u = url.url("https://www.odesk.com/api/auth/v1/keys/frobs.xml") u.addParam("api_key", self.settings.apiKey) u.addParam("api_sig", self.apiSig(u)) handler = urllib2.urlopen(u.asString()) xml = minidom.parse(handler) frob = xml.getElementsByTagName("frob")[0].firstChild.nodeValue u = url("https://www.odesk.com/services/api/auth/") u.addParam("api_key", self.settings.apiKey) u.addParam("frob", frob) u.addParam("api_sig", self.apiSig(u)) handler = urllib2.urlopen(u.asString()) u = url("https://www.odesk.com/api/auth/v1/keys/tokens.xml") u.addParam("api_key", self.settings.apiKey) u.addParam("frob", frob) u.addParam("api_sig", self.apiSig(u)) handler = urllib2.urlopen(u.asString()) xml = minidom.parse(handler) return xml.getElementsByTagName("token")[0].firstChild.nodeValue
import re import time from companyList import company from url import url browser=webdriver.Chrome() header = ['时间','公司名称','新闻标题','新闻正文','URL','详细时间','新闻来源'] today = time.strftime('%Y-%m-%d',time.localtime(time.time())) news_list = [] companyObj = company() companyListTest = companyObj.testList companyList = companyObj.list xpath_title = '/html/body/div[2]/div[4]/div/div[2]/div[3]/div' #引入地址 urlObj = url() url = urlObj.url def info(x): # 新闻title if el.xpath(xpath_title + '[{}]/h3/a/text()'.format(x)) : title = el.xpath(xpath_title + '[{}]/h3/a/text()'.format(x)) title = ''.join(str(i) for i in title) title = re.sub('[\n]','',title) print(title) else : title = {} # 新闻url
def setUp(self): super(TestURL, self).setUp() self.url_obj = url.url(self.bot)
from gpiozero import MotionSensor import time import os import numpy as np import cv2 from url import url from facedetection import facedetect pir = MotionSensor(4) while 1: pir.wait_for_motion() print("Motion detected") url() facedetect() print("back in main") pir.wait_for_no_motion()
def __init__(self): self.o_urls = url.url() self.o_download = downlload.htmldowmloader() self.output = output.htmloutput() self.parser = parser_.parse1()
def __init__(self): self.url = url.url() self.store = store.store() self.pursueHtml = pursueHtml.pursueHtml()