def say_with_google(word, autoremove=True, background=False, debug=False): """ Say a word with Google. https://ubuntuincident.wordpress.com/2012/03/27/audio-pronunciation-of-words-from-google/ The return value is a tuple: (found, mp3_file), where found is True if the word was retrieved successfully (False otherwise), and mp3_file is the path of the locally saved mp3 (or None if it was not saved). Set autoremove to False if you want to work with the mp3 later, when this function returned. The function stores the mp3 files in /tmp. """ found = False # Was the mp3 successfully found? mp3_file = None # Is the locally saved mp3 file kept? url = template.format(word=word) content = web.get_page(url, user_agent=True) if content: found = True fname = '/tmp/{word}.mp3'.format(word=word) fs.store_content_in_file(content, fname, overwrite=True) mp3_file = fname if not debug: play(fname, background=background) if autoremove: os.unlink(fname) mp3_file = None else: found = False mp3_file = None return (found, mp3_file)
def say_with_google(word, autoremove=True, background=False, debug=False): """ Say a word with Google. https://ubuntuincident.wordpress.com/2012/03/27/audio-pronunciation-of-words-from-google/ The return value is a tuple: (found, mp3_file), where found is True if the word was retrieved successfully (False otherwise), and mp3_file is the path of the locally saved mp3 (or None if it was not saved). Set autoremove to False if you want to work with the mp3 later, when this function returned. The function stores the mp3 files in /tmp. """ found = False # Was the mp3 successfully found? mp3_file = None # Is the locally saved mp3 file kept? url = template.format(word=word) content = web.get_page(url, user_agent=True) if content: found = True fname = "/tmp/{word}.mp3".format(word=word) fs.store_content_in_file(content, fname, overwrite=True) mp3_file = fname if not debug: play(fname, background=background) if autoremove: os.unlink(fname) mp3_file = None else: found = False mp3_file = None return (found, mp3_file)
def open_in_browser(html, test=False): """Save an HTML source to a temp. file and open it in the browser. Return value: name of the temp. file.""" temp = tempfile.NamedTemporaryFile(prefix='tmp', suffix='.html', dir='/tmp', delete=False) fs.store_content_in_file(html, temp.name, overwrite=True) if not test: webbrowser.open_new_tab(temp.name) return temp.name
def get_page_with_cookies_using_wget(url): """Get the content of a cookies-protected page. The page is downloaded with wget. Cookies are passed to wget.""" cookies = get_cookies_in_text(get_host(url)) fs.store_content_in_file(cookies, cfg.COOKIES_TXT, overwrite=True) OPTIONS = "--cookies=on --load-cookies={0} --keep-session-cookies".format(cfg.COOKIES_TXT) cmd = "{wget} {options} '{url}' -qO-".format(wget=cfg.WGET, options=OPTIONS, url=url) page = process.get_simple_cmd_output(cmd) os.unlink(cfg.COOKIES_TXT) return page
def open_in_browser(html, test=False): """Save an HTML source to a temp. file and open it in the browser. Return value: name of the temp. file.""" temp = tempfile.NamedTemporaryFile(prefix='tmp', suffix='.html', dir='/tmp', delete=False) fs.store_content_in_file(html, temp.name, overwrite=True) if not test: webbrowser.open_new_tab(temp.name) return temp.name
def download_to(url, local_file, user_agent=False, referer=False, timeout=None, overwrite=False): """Fetch the content of a URL and store it in a local file.""" content = get_page(url, user_agent=user_agent, referer=referer, timeout=timeout) fs.store_content_in_file(content, local_file, overwrite=overwrite)
def get_page_with_cookies_using_wget(url): """Get the content of a cookies-protected page. The page is downloaded with wget. Cookies are passed to wget.""" cookies = get_cookies_in_text(get_host(url)) fs.store_content_in_file(cookies, cfg.COOKIES_TXT, overwrite=True) OPTIONS = "--cookies=on --load-cookies={0} --keep-session-cookies".format( cfg.COOKIES_TXT) cmd = "{wget} {options} '{url}' -qO-".format(wget=cfg.WGET, options=OPTIONS, url=url) page = process.get_simple_cmd_output(cmd) os.unlink(cfg.COOKIES_TXT) return page
def html_to_text(html, method=cfg.LYNX): """Convert an HTML source to text format. Two methods are available: (1) with lynx, (2) with html2text.py. The return value is a string.""" temp = tempfile.NamedTemporaryFile(prefix='tmp', suffix='.html', dir='/tmp', delete=False) fs.store_content_in_file(html, temp.name, overwrite=True) if method == cfg.LYNX: cmd = "{lynx} {html} -dump".format(lynx=cfg.LYNX, html=temp.name) elif method == cfg.HTML2TEXT: cmd = "python {html2text} {html}".format(html2text=cfg.HTML2TEXT, html=temp.name) else: print >>sys.stderr, "Warning! Unknown method is used in web.html_to_text." os.unlink(temp.name) return None text = process.get_simple_cmd_output(cmd) os.unlink(temp.name) return text
def download(self, warning=True): """Download yourself.""" if os.path.exists(self.get_skip_path()): return False # else if not self.exists(): if self.make_dirs(): obj = web.get_page(self.file_url, user_agent=True, referer=True) fs.store_content_in_file(obj, self.get_local_path()) ok = self.exists() if not ok and warning: print >>sys.stderr, "# warning: couldn't download {url}.".format(url=self.file_url) if self.readme: self.save_readme() return ok
def download(self, warning=True): """Download yourself.""" if os.path.exists(self.get_skip_path()): return False # else if not self.exists(): if self.make_dirs(): obj = web.get_page(self.file_url, user_agent=True, referer=True) fs.store_content_in_file(obj, self.get_local_path()) ok = self.exists() if not ok and warning: print >> sys.stderr, "# warning: couldn't download {url}.".format( url=self.file_url) if self.readme: self.save_readme() return ok
def html_to_text(html, method=cfg.LYNX): """Convert an HTML source to text format. Two methods are available: (1) with lynx, (2) with html2text.py. The return value is a string.""" temp = tempfile.NamedTemporaryFile(prefix='tmp', suffix='.html', dir='/tmp', delete=False) fs.store_content_in_file(html, temp.name, overwrite=True) if method == cfg.LYNX: cmd = "{lynx} {html} -dump".format(lynx=cfg.LYNX, html=temp.name) elif method == cfg.HTML2TEXT: cmd = "python {html2text} {html}".format(html2text=cfg.HTML2TEXT, html=temp.name) else: print >> sys.stderr, "Warning! Unknown method is used in web.html_to_text." os.unlink(temp.name) return None text = process.get_simple_cmd_output(cmd) os.unlink(temp.name) return text
def download_to(url, local_file, user_agent=False, referer=False, timeout=None, overwrite=False): """Fetch the content of a URL and store it in a local file.""" content = get_page(url, user_agent=user_agent, referer=referer, timeout=timeout) fs.store_content_in_file(content, local_file, overwrite=overwrite)
def test_store_content_in_file(self): content = web.get_page(GOOGLE) assert not os.path.exists(cfg.TEST_TMP_FILE) fs.store_content_in_file(content, cfg.TEST_TMP_FILE) assert os.path.getsize(cfg.TEST_TMP_FILE) > 0 os.unlink(cfg.TEST_TMP_FILE)
def test_store_content_in_file(self): content = web.get_page(GOOGLE) assert not os.path.exists(cfg.TEST_TMP_FILE) fs.store_content_in_file(content, cfg.TEST_TMP_FILE) assert os.path.getsize(cfg.TEST_TMP_FILE) > 0 os.unlink(cfg.TEST_TMP_FILE)