Example #1
0
def searchVideo():
    temp = input("Input Video Title: ")
    #Convert to valid YouTube search URL
    temp = temp.replace(" ", "+")
    htmlURL = "https://www.youtube.com/results?search_query=" + temp

    #Get link of first page videos search result
    URLS = acc.getVideoURLS(htmlURL)
    if len(URLS) == 0:
        print("No Videos found!")
        return

    print("")
    pick = -1
    while True:
        pick = input("Please Select a Video to Download: ")
        try:
            pick = int(pick)
            if pick >= 0 and pick <= 20:
                break
            else:
                print("Not a valid entry! Try again!")
        except ValueError:
            print("That's not a number! Try again.")

    URL.downloadByURL(URLS[pick])
Example #2
0
def GetIndex(id, year_from, year_to):
    indexs=[]
    for year in range(year_from, year_to + 1):
        for season in range(1, 5):
            content = URL.request(__index_url_date.format(str(id),str(year),str(season)))
            search_begin = content.find('<table class=')
            if search_begin == -1 :
                continue
            search_begin = content.find('<tr class=\'\'>', search_begin)
            search_end = content.find('</table>', search_begin)
            content = content[search_begin:search_end]
            search_end = 0
            index = 0
            while True :
                search_begin = content.find('<tr class=', search_end)
                if search_begin == -1 :
                    break
                record=[]
                for i in range(1, 10):
                    search_begin = content.find('<td>', search_end)
                    search_end = content.find('</td>', search_begin + 4)
                    value = content[search_begin + 4:search_end]
                    value = value.replace(',', '')
                    record.append(value)
                indexs.append(record)
    indexs.sort(key = lambda x:x[0])
                                       
    return indexs
Example #3
0
def list(url):
    """ Returns a tuple (directories, files) providing a list of
        directory and file URLs which can be found at the
        given URL.

        The directory and file names are made absolute to URL's
        network location.

        Supported schemes are '' (no scheme = local file), 'file' and
        'ftp'.

    """
    url = URL.URL(url)
    scheme = url.scheme

    if scheme == 'ftp':
        ftp = ftp_open(url)
        try:
            values = ftp_listdir(ftp, url)
        finally:
            ftp_close(ftp)
        return values

    elif scheme == 'file' or scheme == '':
        return file_listdir(url)

    else:
        raise ValueError, 'unsupported scheme "%s"' % scheme
Example #4
0
 def click_me(self):
     # Passing in the current class instance (self)
     bq.writeToScrol(self)
     sleep(2)
     htmlData = url.getHtml()
     print(htmlData)
     self.scr.insert(tk.INSERT, htmlData)
Example #5
0
def clickMe(self):
    self.action.configure(text='Hello ' + self.name.get())
    print(self)
    bq.writetToScrol(self)
    sleep(2)
    htmlData = url.gethtml()
    print(htmlData)
    self.scr.insert(tk.INSERT, htmlData)
 def clickMe(self):
     #self.action.configure(text='Hello ' + self.name.get()+ ' ' + self.numberChosen.get())
     # aLabel.configure(foreground='red')
     # self.createThread(8)
     qs.writeToScrol(self)
     time.sleep(2)
     htmlData = url.getHtml()
     print(htmlData)
     self.scr.insert(tk.INSERT, htmlData)
Example #7
0
 def click_me(self):
     self.action.configure(text='Hello ' + self.name.get())
     #print(self)
     # self.create_thread()                # now called from imported module
     bq.write_to_scrol(self)
     sleep(2)
     html_data = url.get_html()
     print(html_data)
     self.scrol.insert(tk.INSERT, html_data)
Example #8
0
def search_URL_question(content, url_list):
    # get a question url
    content = {'': content}
    url = "https://www.zhihu.com/search?type=content&q" + urllib.parse.urlencode(
        content)
    # search url in this page
    url_list = URL.get_content(url, [])
    # duplicate removal
    url_list = list(set(url_list))
    return url_list
Example #9
0
def loading_data_from_jason(begin):
    url = URL.URLclass(
        location=str(begin[0]) + "," + str(begin[1]),
        type=get_type(),
        rad=5000,
        Yure_key=YOUR_API_KEY,
        BASE_URL="https://maps.googleapis.com/maps/api/place/radarsearch/json?"
    )
    BASE_URL = url.get_url_with_lat_and_lng_by_hend()
    data = json.loads(get_data_from_URL(BASE_URL))
    return data
def echo(bot):
    """Echo the message the user sent."""
    global update_id
    # Request updates after the last update_id
    for update in bot.get_updates(offset=update_id, timeout=10):
        update_id = update.update_id + 1

        if update.message:  # your bot can receive updates without messages
            # Reply to the message
            URLreply = URL.shortenURL(update.message.text)
            update.message.reply_text(str(URLreply))
Example #11
0
    def addURL(self, url):

        proto, user, password, server, port, path = URL.splitURL(url)
        if not server:
            print('Invalid URL: {}'.format(url))
            return False
        action = self.db.addURL((proto, user, password, server, port, path))
        if action != True:
            self.logger.log('Can’t add “{}”: {}'.format(url, action))
        else:
            self.logger.log('Added URL “{}“'.format(url))
Example #12
0
	def run(self):
		datastore = DB();

		readFile = open(self.infile, 'r')

		count = 0
		for line in readFile:
			count = count + 1

		readFile = open(self.infile, 'r')
		i = 0;
		for line in readFile:
			i = i + 1
			authorId = line[:line.find('\t')].strip()

			if not datastore.exists(authorId):
				authorName = line[line.find('\t') + 1:].strip()
				surname = authorName[authorName.rfind(' ') + 1:].strip()
				u = URL('http://forebears.io/surnames/' + surname, authorId)
				datastore.insert(authorId, authorName, u.fetch())				
			print 'Thread ' + self.infile[8:] + ' is ' + str(i * 100.0 / count) + '% completed..'
Example #13
0
 def clickme(self):
     self.action.configure(text='Hello ' + self.name.get() + ' ' +
                           'you are' + ' ' + self.age.get() + 'years old')
     print(self)
     bq.writeToScrol(self)
     # self.createThread()
     # for idx in range(3):
     # time.sleep(5)
     # self.scr.insert(tk.INSERT, str(idx) + '\n')
     sleep(2)
     htmlData = url.getHtml()
     print(htmlData)
     self.scr.insert(tk.INSERT, htmlData)
Example #14
0
def search_URL_topic(content, url_list, num_page):
    # get a topic url
    url = "https://www.zhihu.com/search?type=topic&q=" + content

    # search url in this page
    url_temp = URL.get_topic_id(url, [])
    if len(url_temp) > 0:
        url_list = url_list + url_temp

    # duplicate removal
    url_list = list(set(url_list))

    # search url based on BFS
    if len(url_list) > 0:
        url = url_list[0]
    while '/topic' in url and len(url_list) > 0:
        url = url_list.pop(0)
        url_temp = URL.get_question_id(url, [], num_page)
        if len(url_temp) > 0:
            url_list = url_list + url_temp
    # duplicate removal
    url_list = list(set(url_list))
    return url_list
Example #15
0
    def clickMe(self):
        self.action.configure(text='Hello ' + self.name.get())
        #         # Non-threaded code with sleep freezes the GUI
        #         for idx in range(10):
        #             sleep(5)
        #             self.scr.insert(tk.INSERT, str(idx) + '\n')

        #         # Threaded method does not freeze our GUI
        #         self.createThread()

        # Passing in the current class instance (self)
        print(self)
        bq.writeToScrol(self)
        sleep(2)
        htmlData = url.getHtml()
        print(htmlData)
        self.scr.insert(tk.INSERT, htmlData)
Example #16
0
    def downLoad(self,URL,key,args = None):
        url = URL.get_urlTable().get(key)
        if (url==None):
            return

        if (args != None):
            if (isinstance(args,list)):
                url = url + args[0] + '/' + args[1]
            else:
                 url = url + args
        try:
            f = urlopen(url)
        except:
            return -1

        g = f.read()
        json_data = json.loads(g)
        return json_data
Example #17
0
def UpdateCodes():
    db = SQL.sql(__index_db)
    db.set("CREATE TABLE IF NOT EXISTS {}(code char(6) unique, name char(36))".format(__index_table))
    for index in range(__begin_index, __end_index):
        index_str='{:0>6}'.format(index)
        content = URL.request(__index_url.format(index_str))
        index_begin = content.find('var STOCKSYMBOL = \'')
        if index_begin == -1:
            continue
        index_end = content.find('\'', index_begin + 19)
        index_code = content[index_begin + 19:index_end]
        index_begin = content.find('var STOCKNAME = \'')
        if index_begin == -1:
            continue
        index_end = content.find('\'', index_begin + 17)
        index_name = content[index_begin + 17:index_end]
        LOG.info ('%s %s' %(index_code, index_name))
        __indexs.append([index_code, index_name])
        db.set("REPLACE INTO {} VALUES(\'{}\',\'{}\')".format(__index_table, index_code, index_name))
    db.close()
Example #18
0
    def processa_feeds(self):

        # Procura os feeds ativos no banco de dados
        cursor_feeds = self.bd.procura_feeds()

        # API do Alchemy
        api = Alchemy()

        # Classe para insercao de noticias
        gn = GestorNoticias(self.bd, api)

        # Para cada feed Atom, processa os links retornados
        for (id_feed, link) in cursor_feeds:

            try:
                # Recupera as ultimas noticias do feed
                posts = self.le_feed(link)

                # Para cada post, limpa HTML e adiciona no banco de dados
                for post in posts.entries:

                    try:
                        # Verifica se eh necessario importar a noticia
                        if URL.url_importada(post.link, self.bd):
                            continue

                        # Chama o AlchemyAPI para limpar o texto
                        texto_processado = api.processa_html(post.link)

                        # Adiciona noticia ao banco de dados
                        gn.adiciona_noticia(
                            post.link, post.title, texto_processado, None, id_feed, None)

                    except:
                        self.log.registra_erro(
                            'Erro ao extrair informacoes do link' + post.link + ' do feed ' + str(id_feed) + ':' + traceback.format_exc())

            except:
                self.log.registra_erro(
                    'Erro ao processar feed' + str(id_feed) + ': ' + traceback.format_exc())
Example #19
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys, os

sys.path.append(os.path.abspath('./Modules/'))

import DevNux, RW, URL

# Configurações
path_store = 'store/'

# Carrega os modulos utilizados
DevNux = DevNux()
RW = RW(path_store)
URL = URL()

while True:

    DevNux.clear()
    print('##############  M E N U ###############')
    print('1) Encurtar URL')
    print('2) Ver Tabela Hash')
    print('3) Salvar Tabela Hash')
    print('4) Carregar Arquivo')
    print('5) Buscar por Hash')
    print('6) Base64')
    print('0) Sair')

    acao = input()
Example #20
0
import json
from URL import *
from Git import *

url = URL('https://api.github.com/users/aedorado/repos?per_page=1000')
all_repos = json.loads(url.fetch())

# print urllib2.urlopen('http://www.google.com').read(
for repo in all_repos:
	# print repo['html_url']
	Git.clone(repo['html_url'])
Example #21
0
def Browser():
    import URL
    return URL.Browser()
Example #22
0
def Cookie():
    import URL
    return URL.CookieJar()
Example #23
0
import URL

ekeitho = URL.Url("http://www.ekeitho.com?toast=pb")
if ekeitho.getValue("toast") == "pb":
    print "Passed Test 1: Got Correct Value for ekeitho"
else:
    print "Failed Test 1: Wrong Value"

try:
    ekeitho.getValue("pb")
except KeyError:
    print "Passed Test 2: URL class passed the test with an unknown field"
else:
    print "Failed Test 2: returned an unknown field somehow."
Example #24
0
def excel_process(filedata, projectname):
    """
    Read excel file and gets data
    """
    filename = '%s/%s' % (UPLOAD_PATH, filedata)
    book = xlrd.open_workbook(filename)
    data = book_tree(book)
    topics = {}
    reference = None
    priority_cnt = db((db.project_tree.priority > 0)
                      & (db.project_tree.project == projectname)).select(
                          db.project_tree.priority)
    if len(priority_cnt) < 1:
        order = 1
        norder = 1
    else:
        order = priority_cnt.last().priority
        norder = priority_cnt.last().priority
    #order = 1
    #norder = 1
    # Row in Data
    for line in data:
        kind = True
        last_topic = None
        pos_col = 0
        # Cols in Row
        for col in line:
            # If col type is INT then BREAK: that's' a QUESTION
            if type(col) is int:
                kind = False
                break
            if kind:
                if col not in topics:
                    if last_topic in topics:
                        value_topic = {
                            'name': col,
                            'dependence': topics[last_topic],
                            'priority': order,
                            'project': projectname
                        }
                    else:
                        value_topic = {
                            'name': col,
                            'dependence': last_topic,
                            'priority': order,
                            'project': projectname
                        }
                    reference = db.topics.update_or_insert(**value_topic)
                    if reference is None:
                        reference = db.topics((db.topics.name == col)).id
                    topics[col] = reference
                    order += 5
                last_topic = col
            pos_col += 1
        # We get last_topic as LAST TOPIC used, obvious?
        question = line[pos_col:]
        elems = len(question)
        value_question = None
        if elems == 2:
            value_question = {
                'priority': question[0],
                'name': question[1],
                'project': projectname
            }
        elif elems == 3:
            if question[2] == '-':
                question[2] = ''
            value_question = {
                'priority': question[0],
                'name': question[1],
                'option_data': question[2],
                'project': projectname
            }
        elif elems == 4:
            if question[2] == '-':
                question[2] = ''
            if question[3] == '-':
                question[3] = ''
            value_question = {
                'priority': question[0],
                'name': question[1],
                'option_data': question[2],
                'score_data': question[3],
                'project': projectname
            }
        elif elems == 5:
            if question[2] == '-':
                question[2] = ''
            if question[3] == '-':
                question[3] = ''
            value_question = {
                'priority': question[0],
                'name': question[1],
                'option_data': question[2],
                'score_data': question[3],
                'tags': [],
                'project': projectname
            }
            for tag in question[4].split(','):
                temp = {'name': tag}
                tag_id = db.tag.update_or_insert(**temp)
                if tag_id is None:
                    tag_id = db.tag(db.tag.name == tag).id
                value_question['tags'].append(tag_id)
        if value_question:
            reference = db.activities.insert(**value_question)
            #reference = db.activities.update_or_insert(**value_question)
            #if reference is None:
            #reference = db.activities(
            #(db.activities.name == question[1])).id
            value_tree = {
                'project': projectname,
                'topic': topics[last_topic],
                'activity': reference,
                'priority': norder
            }
            db.project_tree.update_or_insert(**value_tree)
            norder += 5
    redirect(URL('project_tree'))
Example #25
0
import URL

class DownLoad:
    def __init__(self):
        pass

    def downLoad(self,URL,key,args = None):
        url = URL.get_urlTable().get(key)
        if (url==None):
            return

        if (args != None):
            if (isinstance(args,list)):
                url = url + args[0] + '/' + args[1]
            else:
                 url = url + args
        try:
            f = urlopen(url)
        except:
            return -1

        g = f.read()
        json_data = json.loads(g)
        return json_data


if __name__ == '__main__':
    url = URL.URL()
    d = DownLoad()
    args = ['ltc_btc','1']
    print d.downLoad(url,"trade_root",args)
Example #26
0
def main(query, articles):
    dict_of_info = URL.main(query, articles)    #Gets info from PubMed
    ID_paper_obj_dict = make_paper_objects(dict_of_info)
    ID_sentence_lists = sent_with_cooccur(ID_paper_obj_dict, query)
    return ID_sentence_lists
Example #27
0
 def __init__(self, baseUrl, params_obj):
     # self.baseUrl = baseUrl
     # self.params = self._makeParams (params_obj)
     self._url = URL.createInstance(baseUrl, params_obj)
Example #28
0
def main(query, articles):
    dict_of_info = URL.main(query, articles)  #Gets info from PubMed
    ID_paper_obj_dict = make_paper_objects(dict_of_info)
    ID_sentence_lists = sent_with_cooccur(ID_paper_obj_dict, query)
    return ID_sentence_lists
Example #29
0
import random
from PersistentQueue import define_record, RecordFIFO, b64

if __name__ == '__main__':
    import URL

    UrlParts = define_record("UrlParts", "scheme hostname port relurl")
    f = RecordFIFO(UrlParts, (str, str, str, b64), "url_parts")

    for line in random.sample(open("urls").readlines(), 100000):
        line = line.strip()
        try:
            parts = URL.get_parts(line)
        except URL.BadFormat, exc:
            print exc
            continue
        f.put(*parts)

    f.close()
Example #30
0
    def settle_down(self, url):

        global global_url_counter
        self.text = []
        self.link = []
        html_str = self.get_html_str(url)

        def get_title(html_str):
            i = html_str.find('<title>')
            j = html_str.find('</title>')
            if i == -1 or j == -1:
                i = html_str.find('<TITLE>')
                j = html_str.find('</TITLE>')
            return html_str[i + 7:j]

        origin_title = get_title(html_str)

        def get_time(html_str):
            import re
            grouppattern = re.compile(r'(\d+)-(\d+)-(\d+)')
            biggest = 0
            record = (0, 0, 0)
            target = grouppattern.findall(html_str)
            if not target:
                return record
            for year, month, day in target:
                tmp = (int(year) - 2000) * 365 + 12 * int(month) + int(day)
                if tmp > biggest:
                    biggest, record = tmp, (year, month, day)
            return record

        time = get_time(html_str)

        # replace the \n to \s
        s = ''
        for char in html_str:
            adder = ' ' if char == '\n' else char
            s += adder

        re_cdata = re.compile('//<!\[CDATA\[[^>]*//\]\]>',
                              re.DOTALL)  # 匹配CDATA
        # re_script = re.compile('<\s*script[^>]*>[^<]*<\s*/\s*script\s*>', re.DOTALL)  # Script
        # 这种方法不能处理里面含有 小于号的情况 2333333
        re_script = re.compile('<script.*?/script>', re.DOTALL)  # Script
        re_style = re.compile('<\s*style[^>]*>[^<]*<\s*/\s*style\s*>',
                              re.DOTALL)  # style
        re_style_upper = re.compile('<\s*STYLE[^>]*>[^<]*<\s*/\s*STYLE\s*>',
                                    re.DOTALL)  # upper
        re_br = re.compile('<br\s*?/?>')  # 处理换行
        re_h = re.compile('</?\w+[^>]*>')  # HTML标签
        re_comment = re.compile('<!--[^>]*-->')  # HTML注释

        s = re_cdata.sub('', s)  # 去掉CDATA
        s = re_script.sub('', s)  # 去掉SCRIPT
        s = re_style.sub('', s)  # 去掉style
        s = re_style_upper.sub('', s)  # STYLE
        s = re_br.sub('\n', s)  # 将br转换为换行
        s = re_comment.sub('', s)  # 去掉HTML注释

        self.feed(s)
        self.close()

        links_to = []
        for link in self.link:
            link = formuler(link)
            if urlfilter(link):
                links_to.append(link)
        # 正文的内容,不再采用一行的方式,显示,采用多行的方式
        global_cache.append(
            URL(url, global_url_counter, origin_title, self.text, links_to,
                time))
        global_url_counter += 1
        return links_to