def import_url (url, rd, progress=None, add_webpage_source=True, threaded=False, interactive=True): """Import information from URL. We handle HTML with scrape_url. Everything else, we hand back to our caller as a list of files. This is a little stupid -- it would be more elegant to just hand back a class, but our importer stuff is a little munged up with gui-ness and it's just going to have to be ugly for now """ if progress: progress(0.01,'Fetching webpage') sock=urllib.request.urlopen(url) header=sock.headers.get('content-type','text/html') if progress: progress(0.02, 'Reading headers') if header.find('html')>=0: #return scrape_url(url,progress) return WebPageImporter(rd, url, prog=progress, add_webpage_source=add_webpage_source, threaded=threaded, interactive=interactive) elif header=='application/zip': import zip_importer return zip_importer.zipfile_to_filelist(sock,progress,os.path.splitext(url.split('/')[-1])[0]) else: fn = os.path.join(tempfile.tempdir,url.split('/')[-1]) while os.path.exists(fn): fn=add_to_fn(fn) ofi = open(fn,'w') ofi.write(get_url(sock,progress)) ofi.close() return [fn]
def import_url (url, rd, progress=None, add_webpage_source=True, threaded=False, interactive=True): """Import information from URL. We handle HTML with scrape_url. Everything else, we hand back to our caller as a list of files. This is a little stupid -- it would be more elegant to just hand back a class, but our importer stuff is a little munged up with gui-ness and it's just going to have to be ugly for now """ if progress: progress(0.01,'Fetching webpage') sock=urllib.urlopen(url) header=sock.headers.get('content-type','text/html') if progress: progress(0.02, 'Reading headers') if header.find('html')>=0: #return scrape_url(url,progress) return WebPageImporter(rd, url, prog=progress, add_webpage_source=add_webpage_source, threaded=threaded, interactive=interactive) elif header=='application/zip': import zip_importer return zip_importer.zipfile_to_filelist(sock,progress,os.path.splitext(url.split('/')[-1])[0]) else: fn = os.path.join(tempfile.tempdir,url.split('/')[-1]) while os.path.exists(fn): fn=add_to_fn(fn) ofi = open(fn,'w') ofi.write(get_url(sock,progress)) ofi.close() return [fn]