def apply(doc): output_dir = config.output_dir() if not os.path.exists(output_dir): os.makedirs(output_dir) log = planet.getLogger(config.log_level(),config.log_format()) # Go-go-gadget-template for template_file in config.template_files(): shell.run(template_file, doc) # Process bill of materials for copy_file in config.bill_of_materials(): dest = os.path.join(output_dir, copy_file) for template_dir in config.template_directories(): source = os.path.join(template_dir, copy_file) if os.path.exists(source): break else: log.error('Unable to locate %s', copy_file) continue mtime = os.stat(source).st_mtime if not os.path.exists(dest) or os.stat(dest).st_mtime < mtime: dest_dir = os.path.split(dest)[0] if not os.path.exists(dest_dir): os.makedirs(dest_dir) log.info("Copying %s to %s", source, dest) shutil.copyfile(source, dest) shutil.copystat(source, dest)
def info(): jsonString = { "app_name": "flask_app", "version": config.git_version(), "git_commit_sha": config.git_commit_sha(), "environment": [{ "service_port": config.port() }, { "log_level": config.log_level() }] } return jsonify(jsonString), 200
def load(config_files): """ initialize and load a configuration""" global parser parser = ConfigParser() parser.read(config_files) import config, planet from planet import opml, foaf, csv_config log = planet.logger if not log: log = planet.getLogger(config.log_level(),config.log_format()) # Theme support theme = config.output_theme() if theme: for path in ("", os.path.join(sys.path[0],'themes')): theme_dir = os.path.join(path,theme) theme_file = os.path.join(theme_dir,'config.ini') if os.path.exists(theme_file): # initial search list for theme directories dirs = config.template_directories() if theme_dir not in dirs: dirs.append(theme_dir) if not hasattr(config_files, 'append'): config_files = [config_files] for config_file in config_files: if os.path.dirname(config_file) not in dirs: dirs.append(os.path.dirname(config_file)) # read in the theme parser = ConfigParser() parser.read(theme_file) bom = config.bill_of_materials() # complete search list for theme directories dirs += [os.path.join(theme_dir,dir) for dir in config.template_directories() if dir not in dirs] # merge configurations, allowing current one to override theme template_files = config.template_files() parser.set('Planet','template_files','') parser.read(config_files) for file in config.bill_of_materials(): if not file in bom: bom.append(file) parser.set('Planet', 'bill_of_materials', ' '.join(bom)) parser.set('Planet', 'template_directories', ' '.join(dirs)) parser.set('Planet', 'template_files', ' '.join(template_files + config.template_files())) break else: log.error('Unable to find theme %s', theme) # Filter support dirs = config.filter_directories() filter_dir = os.path.join(sys.path[0],'filters') if filter_dir not in dirs and os.path.exists(filter_dir): parser.set('Planet', 'filter_directories', ' '.join(dirs+[filter_dir])) # Reading list support reading_lists = config.reading_lists() if reading_lists: if not os.path.exists(config.cache_lists_directory()): os.makedirs(config.cache_lists_directory()) def data2config(data, cached_config): if content_type(list).find('opml')>=0: opml.opml2config(data, cached_config) elif content_type(list).find('foaf')>=0: foaf.foaf2config(data, cached_config) elif content_type(list).find('csv')>=0: csv_config.csv2config(data, cached_config) elif content_type(list).find('config')>=0: cached_config.readfp(data) else: from planet import shell import StringIO cached_config.readfp(StringIO.StringIO(shell.run( content_type(list), data.getvalue(), mode="filter"))) if cached_config.sections() in [[], [list]]: raise Exception for list in reading_lists: downloadReadingList(list, parser, data2config)
def load(config_file): """ initialize and load a configuration""" global parser parser = ConfigParser() parser.read(config_file) import config, planet from planet import opml, foaf, csv_config log = planet.logger if not log: log = planet.getLogger(config.log_level(),config.log_format()) # Theme support theme = config.output_theme() if theme: for path in ("", os.path.join(sys.path[0],'themes')): theme_dir = os.path.join(path,theme) theme_file = os.path.join(theme_dir,'config.ini') if os.path.exists(theme_file): # initial search list for theme directories dirs = config.template_directories() if theme_dir not in dirs: dirs.append(theme_dir) if os.path.dirname(config_file) not in dirs: dirs.append(os.path.dirname(config_file)) # read in the theme parser = ConfigParser() parser.read(theme_file) bom = config.bill_of_materials() # complete search list for theme directories dirs += [os.path.join(theme_dir,dir) for dir in config.template_directories() if dir not in dirs] # merge configurations, allowing current one to override theme template_files = config.template_files() parser.set('Planet','template_files','') parser.read(config_file) for file in config.bill_of_materials(): if not file in bom: bom.append(file) parser.set('Planet', 'bill_of_materials', ' '.join(bom)) parser.set('Planet', 'template_directories', ' '.join(dirs)) parser.set('Planet', 'template_files', ' '.join(template_files + config.template_files())) break else: log.error('Unable to find theme %s', theme) # Filter support dirs = config.filter_directories() filter_dir = os.path.join(sys.path[0],'filters') if filter_dir not in dirs and os.path.exists(filter_dir): parser.set('Planet', 'filter_directories', ' '.join(dirs+[filter_dir])) # Reading list support reading_lists = config.reading_lists() if reading_lists: if not os.path.exists(config.cache_lists_directory()): os.makedirs(config.cache_lists_directory()) def data2config(data, cached_config): if content_type(list).find('opml')>=0: opml.opml2config(data, cached_config) elif content_type(list).find('foaf')>=0: foaf.foaf2config(data, cached_config) elif content_type(list).find('csv')>=0: csv_config.csv2config(data, cached_config) elif content_type(list).find('config')>=0: cached_config.readfp(data) else: from planet import shell import StringIO cached_config.readfp(StringIO.StringIO(shell.run( content_type(list), data.getvalue(), mode="filter"))) if cached_config.sections() in [[], [list]]: raise Exception for list in reading_lists: downloadReadingList(list, parser, data2config)
def spiderPlanet(only_if_new = False): """ Spider (fetch) an entire planet """ # log = planet.getLogger(config.log_level(),config.log_format()) log = planet.getLogger(config.log_level(),config.log_format()) global index index = True timeout = config.feed_timeout() try: socket.setdefaulttimeout(float(timeout)) log.info("Socket timeout set to %d seconds", timeout) except: try: from planet import timeoutsocket timeoutsocket.setDefaultSocketTimeout(float(timeout)) log.info("Socket timeout set to %d seconds", timeout) except: log.warning("Timeout set to invalid value '%s', skipping", timeout) from Queue import Queue from threading import Thread fetch_queue = Queue() parse_queue = Queue() threads = {} http_cache = config.http_cache_directory() # Should this be done in config? if http_cache and not os.path.exists(http_cache): os.makedirs(http_cache) if int(config.spider_threads()): # Start all the worker threads for i in range(int(config.spider_threads())): threads[i] = Thread(target=httpThread, args=(i,fetch_queue, parse_queue, log)) threads[i].start() else: log.info("Building work queue") # Load the fetch and parse work queues for uri in config.subscriptions(): # read cached feed info sources = config.cache_sources_directory() feed_source = filename(sources, uri) feed_info = feedparser.parse(feed_source) if feed_info.feed and only_if_new: log.info("Feed %s already in cache", uri) continue if feed_info.feed.get('planet_http_status',None) == '410': log.info("Feed %s gone", uri) continue if threads and _is_http_uri(uri): fetch_queue.put(item=(uri, feed_info)) else: parse_queue.put(item=(uri, feed_info, uri)) # Mark the end of the fetch queue for thread in threads.keys(): fetch_queue.put(item=(None, None)) # Process the results as they arrive while fetch_queue.qsize() or parse_queue.qsize() or threads: while parse_queue.qsize() == 0 and threads: time.sleep(0.1) while parse_queue.qsize(): (uri, feed_info, feed) = parse_queue.get(False) try: if not hasattr(feed,'headers') or int(feed.headers.status)<300: options = {} if hasattr(feed_info,'feed'): options['etag'] = \ feed_info.feed.get('planet_http_etag',None) try: modified=time.strptime( feed_info.feed.get('planet_http_last_modified', None)) except: pass data = feedparser.parse(feed, **options) else: data = feedparser.FeedParserDict({'version': None, 'headers': feed.headers, 'entries': [], 'feed': {}, 'bozo': 0, 'status': int(feed.headers.status)}) writeCache(uri, feed_info, data) except Exception, e: import sys, traceback type, value, tb = sys.exc_info() log.error('Error processing %s', uri) for line in (traceback.format_exception_only(type, value) + traceback.format_tb(tb)): log.error(line.rstrip()) for index in threads.keys(): if not threads[index].isAlive(): del threads[index] if not threads: log.info("Finished threaded part of processing.")
def splice(): """ Splice together a planet from a cache of entries """ import planet log = planet.getLogger(config.log_level(),config.log_format()) log.info("Loading cached data") cache = config.cache_directory() dir=[(os.stat(file).st_mtime,file) for file in glob.glob(cache+"/*") if not os.path.isdir(file)] dir.sort() dir.reverse() max_items=max([config.items_per_page(templ) for templ in config.template_files() or ['Planet']]) doc = minidom.parseString('<feed xmlns="http://www.w3.org/2005/Atom"/>') feed = doc.documentElement # insert feed information createTextElement(feed, 'title', config.name()) date(feed, 'updated', time.gmtime()) gen = createTextElement(feed, 'generator', config.generator()) gen.setAttribute('uri', config.generator_uri()) author = doc.createElement('author') createTextElement(author, 'name', config.owner_name()) createTextElement(author, 'email', config.owner_email()) feed.appendChild(author) if config.feed(): createTextElement(feed, 'id', config.feed()) link = doc.createElement('link') link.setAttribute('rel', 'self') link.setAttribute('href', config.feed()) if config.feedtype(): link.setAttribute('type', "application/%s+xml" % config.feedtype()) feed.appendChild(link) if config.link(): link = doc.createElement('link') link.setAttribute('rel', 'alternate') link.setAttribute('href', config.link()) feed.appendChild(link) # insert subscription information sub_ids = [] feed.setAttribute('xmlns:planet',planet.xmlns) sources = config.cache_sources_directory() for sub in config.subscriptions(): data=feedparser.parse(filename(sources,sub)) if data.feed.has_key('id'): sub_ids.append(data.feed.id) if not data.feed: continue xdoc=minidom.parseString('''<planet:source xmlns:planet="%s" xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns) reconstitute.source(xdoc.documentElement, data.feed, None, None) feed.appendChild(xdoc.documentElement) index = idindex.open() # insert entry information items = 0 for mtime,file in dir: if index != None: base = os.path.basename(file) if index.has_key(base) and index[base] not in sub_ids: continue try: entry=minidom.parse(file) # verify that this entry is currently subscribed to entry.normalize() sources = entry.getElementsByTagName('source') if sources: ids = sources[0].getElementsByTagName('id') if ids and ids[0].childNodes[0].nodeValue not in sub_ids: ids = sources[0].getElementsByTagName('planet:id') if not ids: continue if ids[0].childNodes[0].nodeValue not in sub_ids: continue # add entry to feed feed.appendChild(entry.documentElement) items = items + 1 if items >= max_items: break except: log.error("Error parsing %s", file) if index: index.close() return doc