def setUp(self): # silence errors self.original_logger = planet.logger planet.getLogger('CRITICAL',None) try: os.makedirs(workdir) except: self.tearDown() os.makedirs(workdir)
def apply(doc): output_dir = config.output_dir() if not os.path.exists(output_dir): os.makedirs(output_dir) log = planet.getLogger(config.log_level(),config.log_format()) # Go-go-gadget-template for template_file in config.template_files(): shell.run(template_file, doc) # Process bill of materials for copy_file in config.bill_of_materials(): dest = os.path.join(output_dir, copy_file) for template_dir in config.template_directories(): source = os.path.join(template_dir, copy_file) if os.path.exists(source): break else: log.error('Unable to locate %s', copy_file) continue mtime = os.stat(source).st_mtime if not os.path.exists(dest) or os.stat(dest).st_mtime < mtime: dest_dir = os.path.split(dest)[0] if not os.path.exists(dest_dir): os.makedirs(dest_dir) log.info("Copying %s to %s", source, dest) shutil.copyfile(source, dest) shutil.copystat(source, dest)
def run(template_file, doc, mode='template'): """ select a template module based on file extension and execute it """ log = planet.getLogger(planet.config.log_level(),planet.config.log_format()) if mode == 'template': dirs = planet.config.template_directories() else: dirs = planet.config.filter_directories() # parse out "extra" options if template_file.find('?') < 0: extra_options = {} else: import cgi template_file, extra_options = template_file.split('?',1) extra_options = dict(cgi.parse_qsl(extra_options)) # see if the template can be located for template_dir in dirs: template_resolved = os.path.join(template_dir, template_file) if os.path.exists(template_resolved): break else: log.error("Unable to locate %s %s", mode, template_file) if not mode in logged_modes: log.info("%s search path:", mode) for template_dir in dirs: log.info(" %s", os.path.realpath(template_dir)) logged_modes.append(mode) return template_resolved = os.path.realpath(template_resolved) # Add shell directory to the path, if not already there shellpath = os.path.join(sys.path[0],'planet','shell') if shellpath not in sys.path: sys.path.append(shellpath) # Try loading module for processing this template, based on the extension base,ext = os.path.splitext(os.path.basename(template_resolved)) module_name = ext[1:] try: module = __import__(module_name) except Exception, inst: return log.error("Skipping %s '%s' after failing to load '%s': %s", mode, template_resolved, module_name, inst)
exact output produced by upstream tasks. This script captures such output. It should be run whenever there is a major change in the contract between stages """ import shutil, os, sys # move up a directory sys.path.insert(0, os.path.split(sys.path[0])[0]) os.chdir(sys.path[0]) # copy spider output to splice input import planet from planet import spider, config planet.getLogger('CRITICAL',None) config.load('tests/data/spider/config.ini') spider.spiderPlanet() if os.path.exists('tests/data/splice/cache'): shutil.rmtree('tests/data/splice/cache') shutil.move('tests/work/spider/cache', 'tests/data/splice/cache') source=open('tests/data/spider/config.ini') dest1=open('tests/data/splice/config.ini', 'w') dest1.write(source.read().replace('/work/spider/', '/data/splice/')) dest1.close() source.seek(0) dest2=open('tests/work/apply_config.ini', 'w') dest2.write(source.read().replace('[Planet]', '''[Planet]
sys.path[0] = os.getcwd() # determine verbosity verbosity = 1 for arg,value in (('-q',0),('--quiet',0),('-v',2),('--verbose',2)): if arg in sys.argv: verbosity = value sys.argv.remove(arg) # find all of the planet test modules modules = [] for pattern in sys.argv[1:] or ['test_*.py']: modules += map(fullmodname, glob.glob(os.path.join('tests', pattern))) # enable logging import planet if verbosity == 0: planet.getLogger("FATAL",None) if verbosity == 1: planet.getLogger("WARNING",None) if verbosity == 2: planet.getLogger("DEBUG",None) # load all of the tests into a suite try: suite = unittest.TestLoader().loadTestsFromNames(modules) except Exception, exception: # attempt to produce a more specific message for module in modules: __import__(module) raise # run test suite unittest.TextTestRunner(verbosity=verbosity).run(suite)
def setUp(self): # silence errors self.original_logger = planet.logger planet.getLogger('CRITICAL', None)
no_publish = 1 elif arg.startswith("-"): print >> sys.stderr, "Unknown option:", arg sys.exit(1) else: config_file.append(arg) import locale locale.setlocale(locale.LC_ALL, "") from planet import config config.load(config_file or 'config.ini') if verbose: import planet planet.getLogger('DEBUG', config.log_format()) if not offline: from planet import spider try: spider.spiderPlanet(only_if_new=only_if_new) except Exception, e: print e from planet import splice doc = splice.splice() if debug_splice: from planet import logger logger.info('writing debug.atom') debug = open('debug.atom', 'w')
def splice(): """ Splice together a planet from a cache of entries """ import planet log = planet.getLogger(config.log_level(),config.log_format()) log.info("Loading cached data") cache = config.cache_directory() dir=[(os.stat(file).st_mtime,file) for file in glob.glob(cache+"/*") if not os.path.isdir(file)] dir.sort() dir.reverse() max_items=max([config.items_per_page(templ) for templ in config.template_files() or ['Planet']]) doc = minidom.parseString('<feed xmlns="http://www.w3.org/2005/Atom"/>') feed = doc.documentElement # insert feed information createTextElement(feed, 'title', config.name()) date(feed, 'updated', time.gmtime()) gen = createTextElement(feed, 'generator', config.generator()) gen.setAttribute('uri', config.generator_uri()) author = doc.createElement('author') createTextElement(author, 'name', config.owner_name()) createTextElement(author, 'email', config.owner_email()) feed.appendChild(author) if config.feed(): createTextElement(feed, 'id', config.feed()) link = doc.createElement('link') link.setAttribute('rel', 'self') link.setAttribute('href', config.feed()) if config.feedtype(): link.setAttribute('type', "application/%s+xml" % config.feedtype()) feed.appendChild(link) if config.link(): link = doc.createElement('link') link.setAttribute('rel', 'alternate') link.setAttribute('href', config.link()) feed.appendChild(link) # insert subscription information sub_ids = [] feed.setAttribute('xmlns:planet',planet.xmlns) sources = config.cache_sources_directory() for sub in config.subscriptions(): data=feedparser.parse(filename(sources,sub)) if data.feed.has_key('id'): sub_ids.append(data.feed.id) if not data.feed: continue xdoc=minidom.parseString('''<planet:source xmlns:planet="%s" xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns) reconstitute.source(xdoc.documentElement, data.feed, None, None) feed.appendChild(xdoc.documentElement) index = idindex.open() # insert entry information items = 0 for mtime,file in dir: if index != None: base = os.path.basename(file) if index.has_key(base) and index[base] not in sub_ids: continue try: entry=minidom.parse(file) # verify that this entry is currently subscribed to entry.normalize() sources = entry.getElementsByTagName('source') if sources: ids = sources[0].getElementsByTagName('id') if ids and ids[0].childNodes[0].nodeValue not in sub_ids: ids = sources[0].getElementsByTagName('planet:id') if not ids: continue if ids[0].childNodes[0].nodeValue not in sub_ids: continue # add entry to feed feed.appendChild(entry.documentElement) items = items + 1 if items >= max_items: break except: log.error("Error parsing %s", file) if index: index.close() return doc
#!/usr/bin/env python import os, sys, ConfigParser, shutil, glob venus_base = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0,venus_base) if __name__ == "__main__": import planet planet.getLogger('WARN',None) hide_planet_ns = True while len(sys.argv) > 1: if sys.argv[1] == '-v' or sys.argv[1] == '--verbose': del sys.argv[1] elif sys.argv[1] == '-p' or sys.argv[1] == '--planet': hide_planet_ns = False del sys.argv[1] else: break parser = ConfigParser.ConfigParser() parser.add_section('Planet') parser.add_section(sys.argv[1]) work = reduce(os.path.join, ['tests','work','reconsititute'], venus_base) output = os.path.join(work, 'output') filters = os.path.join(venus_base,'filters') parser.set('Planet','cache_directory',work) parser.set('Planet','output_dir',output) parser.set('Planet','filter_directories',filters) if hide_planet_ns: parser.set('Planet','template_files','themes/common/atom.xml.xslt')
def load(config_file): """ initialize and load a configuration""" global parser parser = ConfigParser() parser.read(config_file) import config, planet from planet import opml, foaf, csv_config log = planet.logger if not log: log = planet.getLogger(config.log_level(),config.log_format()) # Theme support theme = config.output_theme() if theme: for path in ("", os.path.join(sys.path[0],'themes')): theme_dir = os.path.join(path,theme) theme_file = os.path.join(theme_dir,'config.ini') if os.path.exists(theme_file): # initial search list for theme directories dirs = config.template_directories() if theme_dir not in dirs: dirs.append(theme_dir) if os.path.dirname(config_file) not in dirs: dirs.append(os.path.dirname(config_file)) # read in the theme parser = ConfigParser() parser.read(theme_file) bom = config.bill_of_materials() # complete search list for theme directories dirs += [os.path.join(theme_dir,dir) for dir in config.template_directories() if dir not in dirs] # merge configurations, allowing current one to override theme template_files = config.template_files() parser.set('Planet','template_files','') parser.read(config_file) for file in config.bill_of_materials(): if not file in bom: bom.append(file) parser.set('Planet', 'bill_of_materials', ' '.join(bom)) parser.set('Planet', 'template_directories', ' '.join(dirs)) parser.set('Planet', 'template_files', ' '.join(template_files + config.template_files())) break else: log.error('Unable to find theme %s', theme) # Filter support dirs = config.filter_directories() filter_dir = os.path.join(sys.path[0],'filters') if filter_dir not in dirs and os.path.exists(filter_dir): parser.set('Planet', 'filter_directories', ' '.join(dirs+[filter_dir])) # Reading list support reading_lists = config.reading_lists() if reading_lists: if not os.path.exists(config.cache_lists_directory()): os.makedirs(config.cache_lists_directory()) def data2config(data, cached_config): if content_type(list).find('opml')>=0: opml.opml2config(data, cached_config) elif content_type(list).find('foaf')>=0: foaf.foaf2config(data, cached_config) elif content_type(list).find('csv')>=0: csv_config.csv2config(data, cached_config) elif content_type(list).find('config')>=0: cached_config.readfp(data) else: from planet import shell import StringIO cached_config.readfp(StringIO.StringIO(shell.run( content_type(list), data.getvalue(), mode="filter"))) if cached_config.sections() in [[], [list]]: raise Exception for list in reading_lists: downloadReadingList(list, parser, data2config)
# more python 2.2 accomodations if not hasattr(unittest.TestCase, 'assertTrue'): unittest.TestCase.assertTrue = unittest.TestCase.assert_ if not hasattr(unittest.TestCase, 'assertFalse'): unittest.TestCase.assertFalse = unittest.TestCase.failIf # try to start in a consistent, predictable location if sys.path[0]: os.chdir(sys.path[0]) sys.path[0] = os.getcwd() # find all of the planet test modules modules = map(fullmodname, glob.glob(os.path.join('tests', 'test_*.py'))) # enable warnings import planet planet.getLogger("WARNING",None) # load all of the tests into a suite try: suite = unittest.TestLoader().loadTestsFromNames(modules) except Exception, exception: # attempt to produce a more specific message for module in modules: __import__(module) raise verbosity = 1 if "-q" in sys.argv or '--quiet' in sys.argv: verbosity = 0 if "-v" in sys.argv or '--verbose' in sys.argv: verbosity = 2
def spiderPlanet(only_if_new = False): """ Spider (fetch) an entire planet """ # log = planet.getLogger(config.log_level(),config.log_format()) log = planet.getLogger(config.log_level(),config.log_format()) global index index = True timeout = config.feed_timeout() try: socket.setdefaulttimeout(float(timeout)) log.info("Socket timeout set to %d seconds", timeout) except: try: from planet import timeoutsocket timeoutsocket.setDefaultSocketTimeout(float(timeout)) log.info("Socket timeout set to %d seconds", timeout) except: log.warning("Timeout set to invalid value '%s', skipping", timeout) from Queue import Queue from threading import Thread fetch_queue = Queue() parse_queue = Queue() threads = {} http_cache = config.http_cache_directory() # Should this be done in config? if http_cache and not os.path.exists(http_cache): os.makedirs(http_cache) if int(config.spider_threads()): # Start all the worker threads for i in range(int(config.spider_threads())): threads[i] = Thread(target=httpThread, args=(i,fetch_queue, parse_queue, log)) threads[i].start() else: log.info("Building work queue") # Load the fetch and parse work queues for uri in config.subscriptions(): # read cached feed info sources = config.cache_sources_directory() feed_source = filename(sources, uri) feed_info = feedparser.parse(feed_source) if feed_info.feed and only_if_new: log.info("Feed %s already in cache", uri) continue if feed_info.feed.get('planet_http_status',None) == '410': log.info("Feed %s gone", uri) continue if threads and _is_http_uri(uri): fetch_queue.put(item=(uri, feed_info)) else: parse_queue.put(item=(uri, feed_info, uri)) # Mark the end of the fetch queue for thread in threads.keys(): fetch_queue.put(item=(None, None)) # Process the results as they arrive while fetch_queue.qsize() or parse_queue.qsize() or threads: while parse_queue.qsize() == 0 and threads: time.sleep(0.1) while parse_queue.qsize(): (uri, feed_info, feed) = parse_queue.get(False) try: if not hasattr(feed,'headers') or int(feed.headers.status)<300: options = {} if hasattr(feed_info,'feed'): options['etag'] = \ feed_info.feed.get('planet_http_etag',None) try: modified=time.strptime( feed_info.feed.get('planet_http_last_modified', None)) except: pass data = feedparser.parse(feed, **options) else: data = feedparser.FeedParserDict({'version': None, 'headers': feed.headers, 'entries': [], 'feed': {}, 'bozo': 0, 'status': int(feed.headers.status)}) writeCache(uri, feed_info, data) except Exception, e: import sys, traceback type, value, tb = sys.exc_info() log.error('Error processing %s', uri) for line in (traceback.format_exception_only(type, value) + traceback.format_tb(tb)): log.error(line.rstrip()) for index in threads.keys(): if not threads[index].isAlive(): del threads[index] if not threads: log.info("Finished threaded part of processing.")
def setUp(self): # silence errors import planet planet.logger = None planet.getLogger('CRITICAL',None)
verbosity = 1 for arg, value in (('-q', 0), ('--quiet', 0), ('-v', 2), ('--verbose', 2)): if arg in sys.argv: verbosity = value sys.argv.remove(arg) # find all of the planet test modules modules = [] for pattern in sys.argv[1:] or ['test_*.py']: modules += map(fullmodname, glob.glob(os.path.join('tests', pattern))) # enable logging import planet if verbosity == 0: planet.getLogger("FATAL", None) if verbosity == 1: planet.getLogger("WARNING", None) if verbosity == 2: planet.getLogger("DEBUG", None) # load all of the tests into a suite try: suite = unittest.TestLoader().loadTestsFromNames(modules) except Exception as exception: # attempt to produce a more specific message for each_module in modules: __import__(each_module) raise # run test suite
def setUp(self): # silence errors self.original_logger = planet.logger planet.getLogger('CRITICAL',None)
elif arg == "-d" or arg == "--debug-splice": debug_splice = 1 elif arg == "--no-publish": no_publish = 1 elif arg.startswith("-"): print >>sys.stderr, "Unknown option:", arg sys.exit(1) else: config_file.append(arg) from planet import config config.load(config_file or 'config.ini') if verbose: import planet planet.getLogger('DEBUG',config.log_format()) if not offline: from planet import spider try: spider.spiderPlanet(only_if_new=only_if_new) except Exception, e: print e from planet import splice doc = splice.splice() if debug_splice: from planet import logger logger.info('writing debug.atom') debug=open('debug.atom','w')
#!/usr/bin/env python import os, sys, ConfigParser, shutil, glob venus_base = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, venus_base) if __name__ == "__main__": import planet planet.getLogger('WARN', None) hide_planet_ns = True while len(sys.argv) > 1: if sys.argv[1] == '-v' or sys.argv[1] == '--verbose': del sys.argv[1] elif sys.argv[1] == '-p' or sys.argv[1] == '--planet': hide_planet_ns = False del sys.argv[1] else: break parser = ConfigParser.ConfigParser() parser.add_section('Planet') parser.add_section(sys.argv[1]) work = reduce(os.path.join, ['tests', 'work', 'reconsititute'], venus_base) output = os.path.join(work, 'output') filters = os.path.join(venus_base, 'filters') parser.set('Planet', 'cache_directory', work) parser.set('Planet', 'output_dir', output) parser.set('Planet', 'filter_directories', filters) if hide_planet_ns: parser.set('Planet', 'template_files', 'themes/common/atom.xml.xslt')
#!/usr/bin/env python import os, sys, ConfigParser, shutil, glob venus_base = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0,venus_base) if __name__ == "__main__": hide_planet_ns = True while len(sys.argv) > 1: if sys.argv[1] == '-v' or sys.argv[1] == '--verbose': import planet planet.getLogger('DEBUG',None) del sys.argv[1] elif sys.argv[1] == '-p' or sys.argv[1] == '--planet': hide_planet_ns = False del sys.argv[1] else: break parser = ConfigParser.ConfigParser() parser.add_section('Planet') parser.add_section(sys.argv[1]) work = reduce(os.path.join, ['tests','work','reconsititute'], venus_base) output = os.path.join(work, 'output') filters = os.path.join(venus_base,'filters') parser.set('Planet','cache_directory',work) parser.set('Planet','output_dir',output) parser.set('Planet','filter_directories',filters) if hide_planet_ns: parser.set('Planet','template_files','themes/common/atom.xml.xslt')
This script captures such output. It should be run whenever there is a major change in the contract between stages """ import shutil, os, sys # move up a directory sys.path.insert(0, os.path.split(sys.path[0])[0]) os.chdir(sys.path[0]) # copy spider output to splice input import planet from planet import spider, config planet.getLogger('CRITICAL', None) config.load('tests/data/spider/config.ini') spider.spiderPlanet() if os.path.exists('tests/data/splice/cache'): shutil.rmtree('tests/data/splice/cache') shutil.move('tests/work/spider/cache', 'tests/data/splice/cache') source = open('tests/data/spider/config.ini') dest1 = open('tests/data/splice/config.ini', 'w') dest1.write(source.read().replace('/work/spider/', '/data/splice/')) dest1.close() source.seek(0) dest2 = open('tests/work/apply_config.ini', 'w') dest2.write(source.read().replace(
def load(config_files): """ initialize and load a configuration""" global parser parser = ConfigParser() parser.read(config_files) import config, planet from planet import opml, foaf, csv_config log = planet.logger if not log: log = planet.getLogger(config.log_level(),config.log_format()) # Theme support theme = config.output_theme() if theme: for path in ("", os.path.join(sys.path[0],'themes')): theme_dir = os.path.join(path,theme) theme_file = os.path.join(theme_dir,'config.ini') if os.path.exists(theme_file): # initial search list for theme directories dirs = config.template_directories() if theme_dir not in dirs: dirs.append(theme_dir) if not hasattr(config_files, 'append'): config_files = [config_files] for config_file in config_files: if os.path.dirname(config_file) not in dirs: dirs.append(os.path.dirname(config_file)) # read in the theme parser = ConfigParser() parser.read(theme_file) bom = config.bill_of_materials() # complete search list for theme directories dirs += [os.path.join(theme_dir,dir) for dir in config.template_directories() if dir not in dirs] # merge configurations, allowing current one to override theme template_files = config.template_files() parser.set('Planet','template_files','') parser.read(config_files) for file in config.bill_of_materials(): if not file in bom: bom.append(file) parser.set('Planet', 'bill_of_materials', ' '.join(bom)) parser.set('Planet', 'template_directories', ' '.join(dirs)) parser.set('Planet', 'template_files', ' '.join(template_files + config.template_files())) break else: log.error('Unable to find theme %s', theme) # Filter support dirs = config.filter_directories() filter_dir = os.path.join(sys.path[0],'filters') if filter_dir not in dirs and os.path.exists(filter_dir): parser.set('Planet', 'filter_directories', ' '.join(dirs+[filter_dir])) # Reading list support reading_lists = config.reading_lists() if reading_lists: if not os.path.exists(config.cache_lists_directory()): os.makedirs(config.cache_lists_directory()) def data2config(data, cached_config): if content_type(list).find('opml')>=0: opml.opml2config(data, cached_config) elif content_type(list).find('foaf')>=0: foaf.foaf2config(data, cached_config) elif content_type(list).find('csv')>=0: csv_config.csv2config(data, cached_config) elif content_type(list).find('config')>=0: cached_config.readfp(data) else: from planet import shell import StringIO cached_config.readfp(StringIO.StringIO(shell.run( content_type(list), data.getvalue(), mode="filter"))) if cached_config.sections() in [[], [list]]: raise Exception for list in reading_lists: downloadReadingList(list, parser, data2config)
# determine verbosity verbosity = 1 for arg, value in (('-q', 0), ('--quiet', 0), ('-v', 2), ('--verbose', 2)): if arg in sys.argv: verbosity = value sys.argv.remove(arg) # find all of the planet test modules modules = [] for pattern in sys.argv[1:] or ['test_*.py']: modules += map(fullmodname, glob.glob(os.path.join('tests', pattern))) # enable logging import planet if verbosity == 0: planet.getLogger("FATAL", None) if verbosity == 1: planet.getLogger("WARNING", None) if verbosity == 2: planet.getLogger("DEBUG", None) # load all of the tests into a suite try: suite = unittest.TestLoader().loadTestsFromNames(modules) except Exception, exception: # attempt to produce a more specific message for module in modules: __import__(module) raise # run test suite unittest.TextTestRunner(verbosity=verbosity).run(suite)