def test_download_to_download_folder(self):
     os.remove('settings.json')
     self.settings = sad.Settings(download_folder='Downloaded_Test')
     self.downloader = sad.Downloader()
     self.downloader.run()
     self.assertEqual(2, len(os.listdir('Downloaded_Test')))
     shutil.rmtree('Downloaded_Test')
예제 #2
0
def main():
    writer = csv.writer(open('countries.csv', 'w'))
    D = downloader.Downloader()
    html = D('http://example.webscraping.com/ajax/search.json?page=0&page_size=1000&search_term=.')
    ajax = json.loads(html)
    for record in ajax['records']:
        writer.writerow([record['country']])
예제 #3
0
def main(url_to_m3u8, download_dir, verbose, ignore_ssl):
    """
    :type url_to_m3u8: str
    :type download_dir: str
    :type verbose: bool
    :type ignore_ssl: bool
    :rtype: None
    """
    http_settings = dict(headers={
        "User-Agent":
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6)"
        " AppleWebKit/602.4.8 (KHTML, like Gecko)"
        " Version/10.0.3 Safari/602.4.8"
    }, )
    if ignore_ssl:
        http_settings["verify"] = False

    global DOWNLOADER
    DOWNLOADER = downloader.Downloader(
        download_dir=download_dir,
        http_settings=http_settings,
    )
    logging.basicConfig(level=logging.INFO if verbose else logging.WARNING)

    process_main_playlist(url_to_m3u8)
예제 #4
0
    def main(self):
        """総理大臣のCSVファイルをHTMLページへと変換するメインプログラム。"""

        # ホームディレクトリの直下のデスクトップのディレクトリに、
        # SouriDaijinというディレクトリを作成する。
        # すでに存在すれば、当該ディレクトリを消して、新たに作り、
        # 存在しなければ、当該ディレクトリを作成する。
        home_directory = os.environ['HOME']
        base_directory = home_directory + '/Desktop/PrimeMinisters/'
        if os.path.isdir(base_directory):
            shutil.rmtree(base_directory)
        os.makedirs(base_directory)

        # ダウンローダに必要なファイル群をすべてダウンロードしてもらい、
        # 入力となるテーブルを獲得する。
        a_downloader = downloader.Downloader(base_directory)
        a_table = a_downloader.download_all()
        print a_table

        # トランスレータに入力となるテーブルを渡して変換してもらい、
        # 出力となるテーブルを獲得する。
        a_translator = translator.Translator(a_table)
        a_table = a_translator.table()
        print a_table

        # ライタに出力となるテーブルを渡して、
        # Webページを作成してもらう。
        a_writer = writer.Writer(base_directory, a_table)
        a_writer.write()

        return 0
예제 #5
0
    def __init__(self):

        # Check the OS first
        if not str(sys.platform) == "darwin":
            self.u.head("Incompatible System")
            print(" ")
            print("This script can only be run from macOS/OS X.")
            print(" ")
            print("The current running system is \"{}\".".format(sys.platform))
            print(" ")
            self.grab("Press [enter] to quit...")
            print(" ")
            exit(1)

        self.dl = downloader.Downloader()
        self.r = run.Run()
        self.u = utils.Utils()
        self.web_drivers = None
        self.os_build_number = None
        self.os_number = None
        self.wd_loc = None
        self.sip_checked = False
        self.installed_version = "Not Installed!"
        self.get_manifest()
        self.get_system_info()
 def test_get_next_season(self):
     self.wl.update_watchlist('The.Big.Bang.Theory', 'S01E30')
     self.downloader = sad.Downloader()
     self.downloader.run()
     folder = os.listdir(os.getcwd())
     downloaded_list = [f for f in folder if '.torrent' in f]
     self.assertEqual(2, len(downloaded_list))
     self.assertIn('The.Big.Bang.Theory.S02E01.torrent', downloaded_list)
 def execute_server_command(self, server_message):
     validation_error = self.validate_command(server_message)
     if validation_error:
         if validation_error['number'] is not None:
             return validation_error
     error = server_message.get('error')
     if error:
         self.logger.warning("@ Server return error: %s\t%s" %
                             (error.get('code'), error.get('message')))
     command, number = server_message.get('command'), server_message.get(
         'number')
     if command:
         if self.errors:
             self.logger.error(
                 "! Server returns command on request containing an error - this is error."
             )
             return {"number": number, "result": False}
         self.logger.info("Executing command number %i : %s" %
                          (number, str(command)))
         method = getattr(self, command, None)
         if not method:
             method = getattr(self.printer, command)
         payload = server_message.get('payload')
         if server_message.get('is_link'):
             if self.downloader and self.downloader.is_alive():
                 self.register_error(
                     108,
                     "Can't start new download, because previous download isn't finished."
                 )
                 result = False
             else:
                 if command == 'gcodes':
                     self.printer.set_filename(
                         server_message.get('filename'))
                 self.downloader = downloader.Downloader(
                     self,
                     payload,
                     method,
                     is_zip=bool(server_message.get('zip')))
                 self.downloader.start()
                 result = True
         else:
             if payload:
                 arguments = [payload]
             else:
                 arguments = []
             try:
                 result = method(*arguments)
                 # to reduce needless 'return True' in methods, we assume that return of None, that is a successful call
                 result = result or result == None
             except Exception as e:
                 message = "! Error while executing command %s, number %d.\t%s" % (
                     command, number, e.message)
                 self.register_error(109, message, is_blocking=False)
                 self.logger.exception(message)
                 result = False
         ack = {"number": number, "result": result}
         return ack
예제 #8
0
    def __init__(self):
        super().__init__()

        self.state = {
            "current_url": None
        }
        self.settings = {
            "geometry": "600x250+400+300",
            "treeview": [
                ["Uploader", 100],
                ["Title", 190],
                ["Progress", 70],
                ["ETA (s)", 50],
                ["Speed", 70]
            ]
        }

        try:
            with open("settings.json") as fp:
                self.settings.update(json.load(fp))
        except FileNotFoundError:
            with open("settings.json", "w") as fp:
                json.dump(self.settings, fp)

        self.title("Video Downloader")
        self.attributes("-topmost", True)
        self.geometry(self.settings["geometry"])
        self.columnconfigure(0, weight=1)
        self.rowconfigure(1, weight=1)
        self.minsize(600, 250)

        self.preview_frame = PreviewFrame(self)
        self.preview_frame.grid(padx=5, pady=5, sticky="nwe")

        self.tv = Tree(self, self.settings["treeview"])
        self.tv.grid(column=0, row=1, padx=5, pady=5, sticky="nswe")

        self.bottom_frame = BottomFrame(self)
        self.bottom_frame.grid(column=0, row=2, sticky="w")

        self.menu = PopupMenu(self, "cut", "copy", "paste")
        self.tv_menu = PopupMenu(self, "cancel", "pause", "download_speed")

        self.pv_thread = downloader.Preview(self.callback)
        self.dl_thread = downloader.Downloader(self.callback)
        self.cv_thread = downloader.Converter(self.callback)

        try:
            self.pv_thread.add(self.clipboard_get())
        except TclError:
            pass

        self.bind("<Button-3>", self.popup)

        self.after(100, self.check_clipboard)

        self.protocol("WM_DELETE_WINDOW", self.end)
        self.mainloop()
예제 #9
0
def subpaer_urls(seed_url):
    html = downloader.Downloader(cache=pageShe)
    urls = [
        re.findall(r'href="(.*)">', line)[0] for line in re.findall(
            r'<div class="List2">(.*?)</div>', html, re.DOTALL)[0].split('\n')
        if re.search(r'href', line)
    ]
    return [urljoin(seed_url, url) for url in urls]
    pass
예제 #10
0
 def __init__(self, url, count):
     # 初始化下载地址
     self.url = url
     # 初始化爬取数据总数
     self.count = count
     # 初始化下载器
     self.downloader = downloader.Downloader()
     # 初始化输出器
     self.outputer = outputer.Outputer()
예제 #11
0
 def _parser_html_(self, content, filename):
     try:
         if not os._exists(filename):
             with open(filename, 'w+') as fp:
                 fp.write(content.encode('utf-8'))
                 fp.close()
     except Exception as e:
         self.log.error(e)
         downloader.Downloader()._logout_banker()
예제 #12
0
    def __handle_upload_idb_start(self, client, idb_name, idb_hash, idb_size):
        self.__logger.info(
            "Client {} sent an upload request, starting the upload of file {}, size {}"
            .format(client.addr, idb_name, idb_size))

        local_file_path = os.path.join(self.__idbs_path, idb_name)
        client.downloader = downloader.Downloader(self.__logger,
                                                  local_file_path, idb_name,
                                                  idb_hash, idb_size)
예제 #13
0
    def saveIntoMyHbase(self, link, soup=None):
        """
        save the info form the MySQL into  the Hbase,it's hbave the same 'id'
        :param link: the link are use to save 
        :param soup: the soup of the link if not will download the link first
        :return: 
        """

        sql = "url = '%s'" % (link)
        sql = "select *from %s where %s" % (MySQL_TABLE, sql)
        sql_data = self.mdb.selectSQL(MySQL_TABLE, sql)[0]
        # hbase_save = sql_data["hbase"]
        # hbase_save = hbase_save if isinstance(hbase_save,str) else hbase_save.encode('utf-8')
        hbase_save = DEFUALT_FALSE
        if hbase_save == DEFUALT_TRUE:
            # if  save into hbase,return
            return
            pass
        elif hbase_save == DEFUALT_FALSE:
            # if not save into hbase,then save
            download = downloader.Downloader()
            soup = soup if soup else BeautifulSoup(download(link), htmlparser)
            urls = getInnerPageURLs(soup)[0][0]
            url = "URL:%s%s" % (mainHTTP, urls.encode('utf-8'))
            dhtml = download(url)
            hbase_save = DEFUALT_TRUE if dhtml else DEFUALT_FALSE
            dsoup = BeautifulSoup(dhtml, htmlparser)
            saveTxt = ""
            for string in dsoup.stripped_strings:
                dammit = UnicodeDammit(string)
                saveTxt = saveTxt + "%s\n" % dammit.unicode_markup.encode(
                    'utf-8')
            # print saveTxt
            #self.hdb = hbase_mar.HbaseClient()
            #if HBASE_TABLE not in self.hdb.get_tables():
            #    self.hdb.create_table(HBASE_TABLE, "page")

            for key in sql_data.keys():
                if key == 'id':
                    continue
                v = sql_data[key] if isinstance(
                    sql_data[key], str) else sql_data[key].encode('utf-8')
                id = "%d" % sql_data['id']
                self.hdb.put(HBASE_TABLE, "%s" % id,
                             {"page:%s" % (key): "%s" % v})
            else:
                self.hdb.put(HBASE_TABLE, "%s" % id,
                             {"page:data": "%s" % (saveTxt)})
                # show the info about the put
                #self.hdb.ggetrow(HBASE_TABLE, "%d" % sql_data['id'])
            print "%s save into hbase" % url

            pass
        else:
            pass
        pass
예제 #14
0
    def handleService(self):
        params = self.parser.getParams()
        name = self.parser.getParam(params, "name")
        title = self.parser.getParam(params, "title")
        category = self.parser.getParam(params, "category")
        page = self.parser.getParam(params, "page")
        url = self.parser.getParam(params, "url")
        vtitle = self.parser.getParam(params, "vtitle")
        service = self.parser.getParam(params, "service")
        action = self.parser.getParam(params, "action")
        path = self.parser.getParam(params, "path")
        
        if name == None:
            self.listsABCMenu(self.cm.makeABCList())
        if name == 'abc-menu':
            self.showSerialTitles(category)
        elif name == 'serial-title':
            self.showSeason(page, category)
        elif name == 'serial-season' and title != None and page != None:    
            self.showSerialParts(page, title, category)

        if name == 'playSelectedMovie':
            nUrl = mainUrl + page
            linkVideo = ''
            ID = ''
            ID = self.getVideoID(nUrl)
            #print str (ID)
            if (ID!=False):
                if ID != '':
                    linkVideo = self.up.getVideoLink(ID)
                    if linkVideo != False:
                        self.LOAD_AND_PLAY_VIDEO(linkVideo, title)
                else:
                    d = xbmcgui.Dialog()
                    d.ok('Brak linku', SERVICE + ' - tymczasowo wyczerpałeś limit ilości uruchamianych seriali.', 'Zapraszamy za godzinę.')

        if service == SERVICE and action == 'download' and url != '':
            self.cm.checkDir(os.path.join(dstpath, SERVICE))
	    if dbg == 'true':
		log.info(SERVICE + ' - handleService()[download][0] -> title: ' + urllib.unquote_plus(vtitle))
		log.info(SERVICE + ' - handleService()[download][0] -> url: ' + urllib.unquote_plus(url))
		log.info(SERVICE + ' - handleService()[download][0] -> path: ' + path)	
	    if urllib.unquote_plus(url).startswith('/'):
		urlTempVideo = self.getVideoID(mainUrl + urllib.unquote_plus(url))
		linkVideo = self.up.getVideoLink(urlTempVideo)
		if dbg == 'true':
		    log.info(SERVICE + ' - handleService()[download][1] -> title: ' + urllib.unquote_plus(vtitle))
		    log.info(SERVICE + ' - handleService()[download][1] -> temp url: ' + urlTempVideo)
		    log.info(SERVICE + ' - handleService()[download][1] -> url: ' + linkVideo)							
		if linkVideo != False:
		    if dbg == 'true':
			log.info(SERVICE + ' - handleService()[download][2] -> title: ' + urllib.unquote_plus(vtitle))
			log.info(SERVICE + ' - handleService()[download][2] -> url: ' + linkVideo)
			log.info(SERVICE + ' - handleService()[download][2] -> path: ' + path)							
		    dwnl = downloader.Downloader()
		    dwnl.getFile({ 'title': urllib.unquote_plus(vtitle), 'url': linkVideo, 'path': path })
 def setUp(self):
     test_dir = os.listdir(os.getcwd())
     if 'settings.json' in test_dir:
         os.rename('settings.json', 'settings.json.bkp')
     if 'watchlist.json' in test_dir:
         os.rename('watchlist.json', 'watchlist.json.bkp')
     self.settings = sad.Settings()
     self.wl = sad.Watchlist(
         series_list=['Breaking.Bad', 'The.Big.Bang.Theory'])
     self.downloader = sad.Downloader()
예제 #16
0
 def __init__(self, **kwargs):
     self.dl = downloader.Downloader()
     self.r = run.Run()
     self.iasl_url = "https://bitbucket.org/RehabMan/acpica/downloads/iasl.zip"
     self.iasl = self.check_iasl()
     if not self.iasl:
         return None
     self.dsdt = None
     self.dsdt_raw = None
     self.dsdt_lines = None
예제 #17
0
 def __init__(self, master=None):
     super().__init__(master)
     self.master = master
     self.master.geometry("500x250")
     self.grid()
     self.row = 0
     self.quality = tk.StringVar(self, value="480p")
     self.isAudio = tk.BooleanVar(self)
     self.descargador = downloader.Downloader(
         pathlib.Path().absolute().joinpath('descargas'))
     self.create_widgets()
 def gcodes(self, gcodes_or_link, is_link = False):
     if is_link:
         if self.is_downloading():
             self.logger.error('Download command received while downloading processing. Aborting...')
             return False
         else:
             self.downloader = downloader.Downloader(self, gcodes_or_link)
             self.downloader.start()
     else:
         gcodes = base64.b64decode(gcodes_or_link)
         self.unbuffered_gcodes(gcodes)
 def test_fetched_download_returns_the_searched_tv_series(self):
     '''When downloader searches for a tv series it must garantee it is
        being returned by the fetcher'''
     os.remove('watchlist.json')
     name = 'The.100'
     self.wl = sad.Watchlist(series_list=[name])
     self.wl.update_watchlist(name, 'S03E00')
     self.downloader = sad.Downloader()
     self.downloader.action = 'show_magnets'
     magnet_list = self.downloader.run()
     self.assertIn(name.replace('.', '+').lower(), magnet_list)
예제 #20
0
파일: engine.py 프로젝트: zymITsky/ants
 def __init__(self, spider, node_manager, schedule):
     self.settings = node_manager.settings
     self.node_manager = node_manager
     self.status = EngineStatusClient()
     self.stats = load_object(self.settings['STATS_CLASS'])(self)
     self.spider = spider
     self.scheduler = schedule
     self.signals = SignalManager(self)
     self.downloader = downloader.Downloader(self)
     self.extension_manager = ExtensionManager(self)
     self.scraper = scrapy.Scraper(self, spider)
예제 #21
0
def main(url_to_m3u8, download_dir, verbose):
    """
    :type url_to_m3u8: str 
    :type download_dir: str 
    :type verbose: bool 
    :rtype: None 
    """
    global DOWNLOADER
    DOWNLOADER = downloader.Downloader(download_dir=download_dir)
    logging.basicConfig(level=logging.INFO if verbose else logging.WARNING)

    process_main_playlist(url_to_m3u8)
예제 #22
0
    def download(self):
        if self.cache and self.seed_url in self.cache.keys():
            self.html = self.cache[self.seed_url]
            print "Get", self.seed_url, "from cache..."
            pass
        else:
            self.html = downloader.Downloader()(self.seed_url)
            self.cache[self.seed_url] = self.html
            print "Download", self.seed_url, "..."

            pass
        pass
예제 #23
0
 def __init__(self, **kwargs):
     self.dl = downloader.Downloader()
     self.r = run.Run()
     self.u = utils.Utils("SSDT Time")
     self.iasl_url_macOS = "https://bitbucket.org/RehabMan/acpica/downloads/iasl.zip"
     self.iasl_url_linux = "http://amdosx.kellynet.nl/iasl.zip"
     self.iasl_url_windows = "https://acpica.org/sites/acpica/files/iasl-win-20200528.zip"
     self.iasl = self.check_iasl()
     if not self.iasl:
         raise Exception("Could not locate or download iasl!")
     self.dsdt = None
     self.dsdt_raw = None
     self.dsdt_lines = None
예제 #24
0
def target_depot():
    max_urls = 5
    D = downloader.Downloader()
    zipped_data = D('http://www.luckyegg.net/places/top-1mcsv.zip')
    urls = []  # top 1 million URL's will be stored in this list
    with zipfile.ZipFile(io.BytesIO(zipped_data)) as zf:
        csv_filename = zf.namelist()[0]
        for _, website in csv.reader(io.TextIOWrapper(zf.open(csv_filename))):
            urls.append('http://' + website)
            if len(urls) == max_urls:
                break
            print('目标网站:', len(urls), website)
    return urls
예제 #25
0
파일: main.py 프로젝트: socialpercon/Cojo
    def __init__(self):
        self.config = configparser.ConfigParser()
        self.downloader = downloader.Downloader()
        self.download_location = os.getcwd()+"\Downloads\\"

        root = self.root = Gui(title="URL Link Grabber")
        root.set_version("Version 0.2 6-11-2017")

        root.window.protocol("WM_DELETE_WINDOW", self.quit)

        root.button_get_links.config(command=self.get_links)
        root.button_filter.config(command=self.filter)
        root.button_save.config(command=self.save_links)
        root.button_show_in_explorer.config(command=self.open_explorer)
        root.window.bind('<Return>', self.get_links)
예제 #26
0
 def __init__(self):
     self.settings = settings.Settings()
     list.__init__(
         self,
         [
             Channel("BBC TV", "tv", self.settings),
             Channel("ITV", "itv", self.settings),
             #                             Channel("Channel 4", "ch4", self.settings),
             #                             Channel("Five", "five", self.settings),
             #                             Channel("BBC Podcasts", "podcast", self.settings),
             Channel("BBC Radio", "radio", self.settings),
             #                             Channel("Hulu", "hulu", self.settings),
         ])
     self.downloader = downloader.Downloader(self.settings)
     self.streamer = streamer.Streamer(self.settings)
예제 #27
0
    def __init__(self, url, ext):

        #~ parser = argsparser.ArgsParser()
        #~ args = sys.argv #toma los argumentos de la consola
        if (url.startswith("www.")):
            url = "http://" + url
        if (not ext.startswith(".")):
            ext = "." + ext

        if (url == "" or ext == ""):
            print "Debe informar url y extension con --url= y --ext= ."

        contentDownl = downloader.Downloader()
        count = contentDownl.downloadAll(url, ext)
        print "Se han descargado " + str(count) + " archivos con extension "\
         + ext + " de la url \"" + url + "\""
예제 #28
0
 def _get_result_(self, soup, point):
     try:
         self.log.info('get result')
         tables = soup.find_all('table')
         for table in tables:
             if isinstance(table, bs4.element.Tag):
                 for tr in table.find_all('tr'):
                     string = ''
                     if isinstance(tr, bs4.element.Tag):
                         for td in tr.find_all('td'):
                             if isinstance(td, bs4.element.Tag):
                                 string = string + ':\t' + td.text
                     print(string)
     except Exception as e:
         self.log.error(e)
         downloader.Downloader()._logout_banker()
예제 #29
0
def download_file():
    user_id = get_user_id(request.remote_addr)
    print('ip: %s downloading' % (user_id))
    url = request.args.get('url', None)
    if not test_url(url):
        return error_json('Please Enter A Valid YouTube URL')
    print(url)
    if user_id in DOWNLOADS:
        return error_json('Already downloading')
    yt = downloader.Downloader(url, user_id)
    DOWNLOADS[user_id] = yt
    title = yt.title + '.mp4'
    yt.start()
    fp = '%s.mp4' % (user_id)
    _json = {"fp": fp, "title": title}

    return json.dumps(_json)
예제 #30
0
def link_crawler(
    start_url,
    link_regex,
    delay=5,
    robots_url_suffix="robots.txt",
    user_agent="wswp",
    max_depth=5,
    scrape_callback=None,
    num_retries=3,
):
    seen = {}
    crawler_queue = queue.Queue()
    crawler_queue.put(start_url)

    headers = {"User-Agent": user_agent}

    D = downloader.Downloader(headers)

    protocol, domain, *_ = parse.urlsplit(start_url)
    robots_url = parse.urlunsplit(
        (protocol, domain, robots_url_suffix, "", ""))
    rp = parse_robots(robots_url)

    while not crawler_queue.empty():
        url = crawler_queue.get()

        if rp and not rp.can_fetch(user_agent, url):
            print(f"blocked by robots.txt {url}")
            continue

        html = D(url, num_retries)

        if scrape_callback:
            scrape_callback(url, html)

        depth = seen.get(url, 0)
        if depth == max_depth:
            print(f"touch max depth {url}")
            continue

        for link in get_links(html):
            if link and re.match(link_regex, link):
                abs_link = parse.urljoin(url, link)
                if abs_link not in seen:
                    crawler_queue.put(abs_link)
                    seen[abs_link] = depth + 1