def searchVideo(): temp = input("Input Video Title: ") #Convert to valid YouTube search URL temp = temp.replace(" ", "+") htmlURL = "https://www.youtube.com/results?search_query=" + temp #Get link of first page videos search result URLS = acc.getVideoURLS(htmlURL) if len(URLS) == 0: print("No Videos found!") return print("") pick = -1 while True: pick = input("Please Select a Video to Download: ") try: pick = int(pick) if pick >= 0 and pick <= 20: break else: print("Not a valid entry! Try again!") except ValueError: print("That's not a number! Try again.") URL.downloadByURL(URLS[pick])
def GetIndex(id, year_from, year_to): indexs=[] for year in range(year_from, year_to + 1): for season in range(1, 5): content = URL.request(__index_url_date.format(str(id),str(year),str(season))) search_begin = content.find('<table class=') if search_begin == -1 : continue search_begin = content.find('<tr class=\'\'>', search_begin) search_end = content.find('</table>', search_begin) content = content[search_begin:search_end] search_end = 0 index = 0 while True : search_begin = content.find('<tr class=', search_end) if search_begin == -1 : break record=[] for i in range(1, 10): search_begin = content.find('<td>', search_end) search_end = content.find('</td>', search_begin + 4) value = content[search_begin + 4:search_end] value = value.replace(',', '') record.append(value) indexs.append(record) indexs.sort(key = lambda x:x[0]) return indexs
def list(url): """ Returns a tuple (directories, files) providing a list of directory and file URLs which can be found at the given URL. The directory and file names are made absolute to URL's network location. Supported schemes are '' (no scheme = local file), 'file' and 'ftp'. """ url = URL.URL(url) scheme = url.scheme if scheme == 'ftp': ftp = ftp_open(url) try: values = ftp_listdir(ftp, url) finally: ftp_close(ftp) return values elif scheme == 'file' or scheme == '': return file_listdir(url) else: raise ValueError, 'unsupported scheme "%s"' % scheme
def click_me(self): # Passing in the current class instance (self) bq.writeToScrol(self) sleep(2) htmlData = url.getHtml() print(htmlData) self.scr.insert(tk.INSERT, htmlData)
def clickMe(self): self.action.configure(text='Hello ' + self.name.get()) print(self) bq.writetToScrol(self) sleep(2) htmlData = url.gethtml() print(htmlData) self.scr.insert(tk.INSERT, htmlData)
def clickMe(self): #self.action.configure(text='Hello ' + self.name.get()+ ' ' + self.numberChosen.get()) # aLabel.configure(foreground='red') # self.createThread(8) qs.writeToScrol(self) time.sleep(2) htmlData = url.getHtml() print(htmlData) self.scr.insert(tk.INSERT, htmlData)
def click_me(self): self.action.configure(text='Hello ' + self.name.get()) #print(self) # self.create_thread() # now called from imported module bq.write_to_scrol(self) sleep(2) html_data = url.get_html() print(html_data) self.scrol.insert(tk.INSERT, html_data)
def search_URL_question(content, url_list): # get a question url content = {'': content} url = "https://www.zhihu.com/search?type=content&q" + urllib.parse.urlencode( content) # search url in this page url_list = URL.get_content(url, []) # duplicate removal url_list = list(set(url_list)) return url_list
def loading_data_from_jason(begin): url = URL.URLclass( location=str(begin[0]) + "," + str(begin[1]), type=get_type(), rad=5000, Yure_key=YOUR_API_KEY, BASE_URL="https://maps.googleapis.com/maps/api/place/radarsearch/json?" ) BASE_URL = url.get_url_with_lat_and_lng_by_hend() data = json.loads(get_data_from_URL(BASE_URL)) return data
def echo(bot): """Echo the message the user sent.""" global update_id # Request updates after the last update_id for update in bot.get_updates(offset=update_id, timeout=10): update_id = update.update_id + 1 if update.message: # your bot can receive updates without messages # Reply to the message URLreply = URL.shortenURL(update.message.text) update.message.reply_text(str(URLreply))
def addURL(self, url): proto, user, password, server, port, path = URL.splitURL(url) if not server: print('Invalid URL: {}'.format(url)) return False action = self.db.addURL((proto, user, password, server, port, path)) if action != True: self.logger.log('Can’t add “{}”: {}'.format(url, action)) else: self.logger.log('Added URL “{}“'.format(url))
def run(self): datastore = DB(); readFile = open(self.infile, 'r') count = 0 for line in readFile: count = count + 1 readFile = open(self.infile, 'r') i = 0; for line in readFile: i = i + 1 authorId = line[:line.find('\t')].strip() if not datastore.exists(authorId): authorName = line[line.find('\t') + 1:].strip() surname = authorName[authorName.rfind(' ') + 1:].strip() u = URL('http://forebears.io/surnames/' + surname, authorId) datastore.insert(authorId, authorName, u.fetch()) print 'Thread ' + self.infile[8:] + ' is ' + str(i * 100.0 / count) + '% completed..'
def clickme(self): self.action.configure(text='Hello ' + self.name.get() + ' ' + 'you are' + ' ' + self.age.get() + 'years old') print(self) bq.writeToScrol(self) # self.createThread() # for idx in range(3): # time.sleep(5) # self.scr.insert(tk.INSERT, str(idx) + '\n') sleep(2) htmlData = url.getHtml() print(htmlData) self.scr.insert(tk.INSERT, htmlData)
def search_URL_topic(content, url_list, num_page): # get a topic url url = "https://www.zhihu.com/search?type=topic&q=" + content # search url in this page url_temp = URL.get_topic_id(url, []) if len(url_temp) > 0: url_list = url_list + url_temp # duplicate removal url_list = list(set(url_list)) # search url based on BFS if len(url_list) > 0: url = url_list[0] while '/topic' in url and len(url_list) > 0: url = url_list.pop(0) url_temp = URL.get_question_id(url, [], num_page) if len(url_temp) > 0: url_list = url_list + url_temp # duplicate removal url_list = list(set(url_list)) return url_list
def clickMe(self): self.action.configure(text='Hello ' + self.name.get()) # # Non-threaded code with sleep freezes the GUI # for idx in range(10): # sleep(5) # self.scr.insert(tk.INSERT, str(idx) + '\n') # # Threaded method does not freeze our GUI # self.createThread() # Passing in the current class instance (self) print(self) bq.writeToScrol(self) sleep(2) htmlData = url.getHtml() print(htmlData) self.scr.insert(tk.INSERT, htmlData)
def downLoad(self,URL,key,args = None): url = URL.get_urlTable().get(key) if (url==None): return if (args != None): if (isinstance(args,list)): url = url + args[0] + '/' + args[1] else: url = url + args try: f = urlopen(url) except: return -1 g = f.read() json_data = json.loads(g) return json_data
def UpdateCodes(): db = SQL.sql(__index_db) db.set("CREATE TABLE IF NOT EXISTS {}(code char(6) unique, name char(36))".format(__index_table)) for index in range(__begin_index, __end_index): index_str='{:0>6}'.format(index) content = URL.request(__index_url.format(index_str)) index_begin = content.find('var STOCKSYMBOL = \'') if index_begin == -1: continue index_end = content.find('\'', index_begin + 19) index_code = content[index_begin + 19:index_end] index_begin = content.find('var STOCKNAME = \'') if index_begin == -1: continue index_end = content.find('\'', index_begin + 17) index_name = content[index_begin + 17:index_end] LOG.info ('%s %s' %(index_code, index_name)) __indexs.append([index_code, index_name]) db.set("REPLACE INTO {} VALUES(\'{}\',\'{}\')".format(__index_table, index_code, index_name)) db.close()
def processa_feeds(self): # Procura os feeds ativos no banco de dados cursor_feeds = self.bd.procura_feeds() # API do Alchemy api = Alchemy() # Classe para insercao de noticias gn = GestorNoticias(self.bd, api) # Para cada feed Atom, processa os links retornados for (id_feed, link) in cursor_feeds: try: # Recupera as ultimas noticias do feed posts = self.le_feed(link) # Para cada post, limpa HTML e adiciona no banco de dados for post in posts.entries: try: # Verifica se eh necessario importar a noticia if URL.url_importada(post.link, self.bd): continue # Chama o AlchemyAPI para limpar o texto texto_processado = api.processa_html(post.link) # Adiciona noticia ao banco de dados gn.adiciona_noticia( post.link, post.title, texto_processado, None, id_feed, None) except: self.log.registra_erro( 'Erro ao extrair informacoes do link' + post.link + ' do feed ' + str(id_feed) + ':' + traceback.format_exc()) except: self.log.registra_erro( 'Erro ao processar feed' + str(id_feed) + ': ' + traceback.format_exc())
#!/usr/bin/env python # -*- coding: utf-8 -*- import sys, os sys.path.append(os.path.abspath('./Modules/')) import DevNux, RW, URL # Configurações path_store = 'store/' # Carrega os modulos utilizados DevNux = DevNux() RW = RW(path_store) URL = URL() while True: DevNux.clear() print('############## M E N U ###############') print('1) Encurtar URL') print('2) Ver Tabela Hash') print('3) Salvar Tabela Hash') print('4) Carregar Arquivo') print('5) Buscar por Hash') print('6) Base64') print('0) Sair') acao = input()
import json from URL import * from Git import * url = URL('https://api.github.com/users/aedorado/repos?per_page=1000') all_repos = json.loads(url.fetch()) # print urllib2.urlopen('http://www.google.com').read( for repo in all_repos: # print repo['html_url'] Git.clone(repo['html_url'])
def Browser(): import URL return URL.Browser()
def Cookie(): import URL return URL.CookieJar()
import URL ekeitho = URL.Url("http://www.ekeitho.com?toast=pb") if ekeitho.getValue("toast") == "pb": print "Passed Test 1: Got Correct Value for ekeitho" else: print "Failed Test 1: Wrong Value" try: ekeitho.getValue("pb") except KeyError: print "Passed Test 2: URL class passed the test with an unknown field" else: print "Failed Test 2: returned an unknown field somehow."
def excel_process(filedata, projectname): """ Read excel file and gets data """ filename = '%s/%s' % (UPLOAD_PATH, filedata) book = xlrd.open_workbook(filename) data = book_tree(book) topics = {} reference = None priority_cnt = db((db.project_tree.priority > 0) & (db.project_tree.project == projectname)).select( db.project_tree.priority) if len(priority_cnt) < 1: order = 1 norder = 1 else: order = priority_cnt.last().priority norder = priority_cnt.last().priority #order = 1 #norder = 1 # Row in Data for line in data: kind = True last_topic = None pos_col = 0 # Cols in Row for col in line: # If col type is INT then BREAK: that's' a QUESTION if type(col) is int: kind = False break if kind: if col not in topics: if last_topic in topics: value_topic = { 'name': col, 'dependence': topics[last_topic], 'priority': order, 'project': projectname } else: value_topic = { 'name': col, 'dependence': last_topic, 'priority': order, 'project': projectname } reference = db.topics.update_or_insert(**value_topic) if reference is None: reference = db.topics((db.topics.name == col)).id topics[col] = reference order += 5 last_topic = col pos_col += 1 # We get last_topic as LAST TOPIC used, obvious? question = line[pos_col:] elems = len(question) value_question = None if elems == 2: value_question = { 'priority': question[0], 'name': question[1], 'project': projectname } elif elems == 3: if question[2] == '-': question[2] = '' value_question = { 'priority': question[0], 'name': question[1], 'option_data': question[2], 'project': projectname } elif elems == 4: if question[2] == '-': question[2] = '' if question[3] == '-': question[3] = '' value_question = { 'priority': question[0], 'name': question[1], 'option_data': question[2], 'score_data': question[3], 'project': projectname } elif elems == 5: if question[2] == '-': question[2] = '' if question[3] == '-': question[3] = '' value_question = { 'priority': question[0], 'name': question[1], 'option_data': question[2], 'score_data': question[3], 'tags': [], 'project': projectname } for tag in question[4].split(','): temp = {'name': tag} tag_id = db.tag.update_or_insert(**temp) if tag_id is None: tag_id = db.tag(db.tag.name == tag).id value_question['tags'].append(tag_id) if value_question: reference = db.activities.insert(**value_question) #reference = db.activities.update_or_insert(**value_question) #if reference is None: #reference = db.activities( #(db.activities.name == question[1])).id value_tree = { 'project': projectname, 'topic': topics[last_topic], 'activity': reference, 'priority': norder } db.project_tree.update_or_insert(**value_tree) norder += 5 redirect(URL('project_tree'))
import URL class DownLoad: def __init__(self): pass def downLoad(self,URL,key,args = None): url = URL.get_urlTable().get(key) if (url==None): return if (args != None): if (isinstance(args,list)): url = url + args[0] + '/' + args[1] else: url = url + args try: f = urlopen(url) except: return -1 g = f.read() json_data = json.loads(g) return json_data if __name__ == '__main__': url = URL.URL() d = DownLoad() args = ['ltc_btc','1'] print d.downLoad(url,"trade_root",args)
def main(query, articles): dict_of_info = URL.main(query, articles) #Gets info from PubMed ID_paper_obj_dict = make_paper_objects(dict_of_info) ID_sentence_lists = sent_with_cooccur(ID_paper_obj_dict, query) return ID_sentence_lists
def __init__(self, baseUrl, params_obj): # self.baseUrl = baseUrl # self.params = self._makeParams (params_obj) self._url = URL.createInstance(baseUrl, params_obj)
import random from PersistentQueue import define_record, RecordFIFO, b64 if __name__ == '__main__': import URL UrlParts = define_record("UrlParts", "scheme hostname port relurl") f = RecordFIFO(UrlParts, (str, str, str, b64), "url_parts") for line in random.sample(open("urls").readlines(), 100000): line = line.strip() try: parts = URL.get_parts(line) except URL.BadFormat, exc: print exc continue f.put(*parts) f.close()
def settle_down(self, url): global global_url_counter self.text = [] self.link = [] html_str = self.get_html_str(url) def get_title(html_str): i = html_str.find('<title>') j = html_str.find('</title>') if i == -1 or j == -1: i = html_str.find('<TITLE>') j = html_str.find('</TITLE>') return html_str[i + 7:j] origin_title = get_title(html_str) def get_time(html_str): import re grouppattern = re.compile(r'(\d+)-(\d+)-(\d+)') biggest = 0 record = (0, 0, 0) target = grouppattern.findall(html_str) if not target: return record for year, month, day in target: tmp = (int(year) - 2000) * 365 + 12 * int(month) + int(day) if tmp > biggest: biggest, record = tmp, (year, month, day) return record time = get_time(html_str) # replace the \n to \s s = '' for char in html_str: adder = ' ' if char == '\n' else char s += adder re_cdata = re.compile('//<!\[CDATA\[[^>]*//\]\]>', re.DOTALL) # 匹配CDATA # re_script = re.compile('<\s*script[^>]*>[^<]*<\s*/\s*script\s*>', re.DOTALL) # Script # 这种方法不能处理里面含有 小于号的情况 2333333 re_script = re.compile('<script.*?/script>', re.DOTALL) # Script re_style = re.compile('<\s*style[^>]*>[^<]*<\s*/\s*style\s*>', re.DOTALL) # style re_style_upper = re.compile('<\s*STYLE[^>]*>[^<]*<\s*/\s*STYLE\s*>', re.DOTALL) # upper re_br = re.compile('<br\s*?/?>') # 处理换行 re_h = re.compile('</?\w+[^>]*>') # HTML标签 re_comment = re.compile('<!--[^>]*-->') # HTML注释 s = re_cdata.sub('', s) # 去掉CDATA s = re_script.sub('', s) # 去掉SCRIPT s = re_style.sub('', s) # 去掉style s = re_style_upper.sub('', s) # STYLE s = re_br.sub('\n', s) # 将br转换为换行 s = re_comment.sub('', s) # 去掉HTML注释 self.feed(s) self.close() links_to = [] for link in self.link: link = formuler(link) if urlfilter(link): links_to.append(link) # 正文的内容,不再采用一行的方式,显示,采用多行的方式 global_cache.append( URL(url, global_url_counter, origin_title, self.text, links_to, time)) global_url_counter += 1 return links_to