def apply(doc): output_dir = config.output_dir() if not os.path.exists(output_dir): os.makedirs(output_dir) log = planet.logger planet_filters = config.filters('Planet') # Go-go-gadget-template for template_file in config.template_files(): output_file = shell.run(template_file, doc) # run any template specific filters if config.filters(template_file) != planet_filters: output = open(output_file).read() for filter in config.filters(template_file): if filter in planet_filters: continue if filter.find('>') > 0: # tee'd output filter, dest = filter.split('>', 1) tee = shell.run(filter.strip(), output, mode="filter") if tee: output_dir = planet.config.output_dir() dest_file = os.path.join(output_dir, dest.strip()) dest_file = open(dest_file, 'w') dest_file.write(tee) dest_file.close() else: # pipe'd output output = shell.run(filter, output, mode="filter") if not output: os.unlink(output_file) break else: handle = open(output_file, 'w') handle.write(output) handle.close() # Process bill of materials for copy_file in config.bill_of_materials(): dest = os.path.join(output_dir, copy_file) for template_dir in config.template_directories(): source = os.path.join(template_dir, copy_file) if os.path.exists(source): break else: log.error('Unable to locate %s', copy_file) log.info("Template search path:") for template_dir in config.template_directories(): log.info(" %s", os.path.realpath(template_dir)) continue mtime = os.stat(source).st_mtime if not os.path.exists(dest) or os.stat(dest).st_mtime < mtime: dest_dir = os.path.split(dest)[0] if not os.path.exists(dest_dir): os.makedirs(dest_dir) log.info("Copying %s to %s", source, dest) if os.path.exists(dest): os.chmod(dest, 0644) shutil.copyfile(source, dest) shutil.copystat(source, dest)
def apply(doc): output_dir = config.output_dir() if not os.path.exists(output_dir): os.makedirs(output_dir) log = planet.getLogger(config.log_level(),config.log_format()) # Go-go-gadget-template for template_file in config.template_files(): shell.run(template_file, doc) # Process bill of materials for copy_file in config.bill_of_materials(): dest = os.path.join(output_dir, copy_file) for template_dir in config.template_directories(): source = os.path.join(template_dir, copy_file) if os.path.exists(source): break else: log.error('Unable to locate %s', copy_file) continue mtime = os.stat(source).st_mtime if not os.path.exists(dest) or os.stat(dest).st_mtime < mtime: dest_dir = os.path.split(dest)[0] if not os.path.exists(dest_dir): os.makedirs(dest_dir) log.info("Copying %s to %s", source, dest) shutil.copyfile(source, dest) shutil.copystat(source, dest)
def apply(doc): output_dir = config.output_dir() if not os.path.exists(output_dir): os.makedirs(output_dir) log = planet.logger planet_filters = config.filters('Planet') # Go-go-gadget-template for template_file in config.template_files(): output_file = shell.run(template_file, doc) # run any template specific filters if config.filters(template_file) != planet_filters: output = open(output_file).read() for filter in config.filters(template_file): if filter in planet_filters: continue if filter.find('>')>0: # tee'd output filter,dest = filter.split('>',1) tee = shell.run(filter.strip(), output, mode="filter") if tee: output_dir = planet.config.output_dir() dest_file = os.path.join(output_dir, dest.strip()) dest_file = open(dest_file,'w') dest_file.write(tee) dest_file.close() else: # pipe'd output output = shell.run(filter, output, mode="filter") if not output: os.unlink(output_file) break else: handle = open(output_file,'w') handle.write(output) handle.close() # Process bill of materials for copy_file in config.bill_of_materials(): dest = os.path.join(output_dir, copy_file) for template_dir in config.template_directories(): source = os.path.join(template_dir, copy_file) if os.path.exists(source): break else: log.error('Unable to locate %s', copy_file) log.info("Template search path:") for template_dir in config.template_directories(): log.info(" %s", os.path.realpath(template_dir)) continue mtime = os.stat(source).st_mtime if not os.path.exists(dest) or os.stat(dest).st_mtime < mtime: dest_dir = os.path.split(dest)[0] if not os.path.exists(dest_dir): os.makedirs(dest_dir) log.info("Copying %s to %s", source, dest) if os.path.exists(dest): os.chmod(dest, 0644) shutil.copyfile(source, dest) shutil.copystat(source, dest)
def load(config_files): """ initialize and load a configuration""" global parser parser = ConfigParser() parser.read(config_files) import config, planet from planet import opml, foaf, csv_config log = planet.logger if not log: log = planet.getLogger(config.log_level(),config.log_format()) # Theme support theme = config.output_theme() if theme: for path in ("", os.path.join(sys.path[0],'themes')): theme_dir = os.path.join(path,theme) theme_file = os.path.join(theme_dir,'config.ini') if os.path.exists(theme_file): # initial search list for theme directories dirs = config.template_directories() if theme_dir not in dirs: dirs.append(theme_dir) if not hasattr(config_files, 'append'): config_files = [config_files] for config_file in config_files: if os.path.dirname(config_file) not in dirs: dirs.append(os.path.dirname(config_file)) # read in the theme parser = ConfigParser() parser.read(theme_file) bom = config.bill_of_materials() # complete search list for theme directories dirs += [os.path.join(theme_dir,dir) for dir in config.template_directories() if dir not in dirs] # merge configurations, allowing current one to override theme template_files = config.template_files() parser.set('Planet','template_files','') parser.read(config_files) for file in config.bill_of_materials(): if not file in bom: bom.append(file) parser.set('Planet', 'bill_of_materials', ' '.join(bom)) parser.set('Planet', 'template_directories', ' '.join(dirs)) parser.set('Planet', 'template_files', ' '.join(template_files + config.template_files())) break else: log.error('Unable to find theme %s', theme) # Filter support dirs = config.filter_directories() filter_dir = os.path.join(sys.path[0],'filters') if filter_dir not in dirs and os.path.exists(filter_dir): parser.set('Planet', 'filter_directories', ' '.join(dirs+[filter_dir])) # Reading list support reading_lists = config.reading_lists() if reading_lists: if not os.path.exists(config.cache_lists_directory()): os.makedirs(config.cache_lists_directory()) def data2config(data, cached_config): if content_type(list).find('opml')>=0: opml.opml2config(data, cached_config) elif content_type(list).find('foaf')>=0: foaf.foaf2config(data, cached_config) elif content_type(list).find('csv')>=0: csv_config.csv2config(data, cached_config) elif content_type(list).find('config')>=0: cached_config.readfp(data) else: from planet import shell import StringIO cached_config.readfp(StringIO.StringIO(shell.run( content_type(list), data.getvalue(), mode="filter"))) if cached_config.sections() in [[], [list]]: raise Exception for list in reading_lists: downloadReadingList(list, parser, data2config)
def load(config_file): """ initialize and load a configuration""" global parser parser = ConfigParser() parser.read(config_file) import config, planet from planet import opml, foaf, csv_config log = planet.logger if not log: log = planet.getLogger(config.log_level(),config.log_format()) # Theme support theme = config.output_theme() if theme: for path in ("", os.path.join(sys.path[0],'themes')): theme_dir = os.path.join(path,theme) theme_file = os.path.join(theme_dir,'config.ini') if os.path.exists(theme_file): # initial search list for theme directories dirs = config.template_directories() if theme_dir not in dirs: dirs.append(theme_dir) if os.path.dirname(config_file) not in dirs: dirs.append(os.path.dirname(config_file)) # read in the theme parser = ConfigParser() parser.read(theme_file) bom = config.bill_of_materials() # complete search list for theme directories dirs += [os.path.join(theme_dir,dir) for dir in config.template_directories() if dir not in dirs] # merge configurations, allowing current one to override theme template_files = config.template_files() parser.set('Planet','template_files','') parser.read(config_file) for file in config.bill_of_materials(): if not file in bom: bom.append(file) parser.set('Planet', 'bill_of_materials', ' '.join(bom)) parser.set('Planet', 'template_directories', ' '.join(dirs)) parser.set('Planet', 'template_files', ' '.join(template_files + config.template_files())) break else: log.error('Unable to find theme %s', theme) # Filter support dirs = config.filter_directories() filter_dir = os.path.join(sys.path[0],'filters') if filter_dir not in dirs and os.path.exists(filter_dir): parser.set('Planet', 'filter_directories', ' '.join(dirs+[filter_dir])) # Reading list support reading_lists = config.reading_lists() if reading_lists: if not os.path.exists(config.cache_lists_directory()): os.makedirs(config.cache_lists_directory()) def data2config(data, cached_config): if content_type(list).find('opml')>=0: opml.opml2config(data, cached_config) elif content_type(list).find('foaf')>=0: foaf.foaf2config(data, cached_config) elif content_type(list).find('csv')>=0: csv_config.csv2config(data, cached_config) elif content_type(list).find('config')>=0: cached_config.readfp(data) else: from planet import shell import StringIO cached_config.readfp(StringIO.StringIO(shell.run( content_type(list), data.getvalue(), mode="filter"))) if cached_config.sections() in [[], [list]]: raise Exception for list in reading_lists: downloadReadingList(list, parser, data2config)
def splice(): """ Splice together a planet from a cache of entries """ import planet log = planet.logger log.info("Loading cached data") cache = config.cache_directory() dir = [(os.stat(file).st_mtime, file) for file in glob.glob(cache + "/*") if not os.path.isdir(file)] dir.sort() dir.reverse() max_items = max([ config.items_per_page(templ) for templ in config.template_files() or ['Planet'] ]) doc = minidom.parseString('<feed xmlns="http://www.w3.org/2005/Atom"/>') feed = doc.documentElement # insert feed information createTextElement(feed, 'title', config.name()) date(feed, 'updated', time.gmtime()) gen = createTextElement(feed, 'generator', config.generator()) gen.setAttribute('uri', config.generator_uri()) author = doc.createElement('author') createTextElement(author, 'name', config.owner_name()) createTextElement(author, 'email', config.owner_email()) feed.appendChild(author) if config.feed(): createTextElement(feed, 'id', config.feed()) link = doc.createElement('link') link.setAttribute('rel', 'self') link.setAttribute('href', config.feed()) if config.feedtype(): link.setAttribute('type', "application/%s+xml" % config.feedtype()) feed.appendChild(link) if config.link(): link = doc.createElement('link') link.setAttribute('rel', 'alternate') link.setAttribute('href', config.link()) feed.appendChild(link) # insert subscription information sub_ids = [] feed.setAttribute('xmlns:planet', planet.xmlns) sources = config.cache_sources_directory() for sub in config.subscriptions(): data = feedparser.parse(filename(sources, sub)) if data.feed.has_key('id'): sub_ids.append(data.feed.id) if not data.feed: continue xdoc = minidom.parseString('''<planet:source xmlns:planet="%s" xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns) reconstitute.source(xdoc.documentElement, data.feed, None, None) feed.appendChild(xdoc.documentElement) index = idindex.open() # insert entry information items = 0 count = {} new_feed_items = config.new_feed_items() for mtime, file in dir: if index != None: base = os.path.basename(file) if index.has_key(base) and index[base] not in sub_ids: continue try: entry = minidom.parse(file) # verify that this entry is currently subscribed to and that the # number of entries contributed by this feed does not exceed # config.new_feed_items entry.normalize() sources = entry.getElementsByTagName('source') if sources: ids = sources[0].getElementsByTagName('id') if ids: id = ids[0].childNodes[0].nodeValue count[id] = count.get(id, 0) + 1 if new_feed_items and count[id] > new_feed_items: continue if id not in sub_ids: ids = sources[0].getElementsByTagName('planet:id') if not ids: continue id = ids[0].childNodes[0].nodeValue if id not in sub_ids: continue # add entry to feed feed.appendChild(entry.documentElement) items = items + 1 if items >= max_items: break except: log.error("Error parsing %s", file) if index: index.close() return doc
def splice(): """ Splice together a planet from a cache of entries """ import planet log = planet.logger log.info("Loading cached data") cache = config.cache_directory() dir=[(os.stat(file).st_mtime,file) for file in glob.glob(cache+"/*") if not os.path.isdir(file)] dir.sort() dir.reverse() max_items=max([config.items_per_page(templ) for templ in config.template_files() or ['Planet']]) doc = minidom.parseString('<feed xmlns="http://www.w3.org/2005/Atom"/>') feed = doc.documentElement # insert feed information createTextElement(feed, 'title', config.name()) date(feed, 'updated', time.gmtime()) gen = createTextElement(feed, 'generator', config.generator()) gen.setAttribute('uri', config.generator_uri()) author = doc.createElement('author') createTextElement(author, 'name', config.owner_name()) createTextElement(author, 'email', config.owner_email()) feed.appendChild(author) if config.feed(): createTextElement(feed, 'id', config.feed()) link = doc.createElement('link') link.setAttribute('rel', 'self') link.setAttribute('href', config.feed()) if config.feedtype(): link.setAttribute('type', "application/%s+xml" % config.feedtype()) feed.appendChild(link) if config.pubsubhubbub_hub(): hub = doc.createElement('link') hub.setAttribute('rel', 'hub') hub.setAttribute('href', config.pubsubhubbub_hub()) feed.appendChild(hub) if config.link(): link = doc.createElement('link') link.setAttribute('rel', 'alternate') link.setAttribute('href', config.link()) feed.appendChild(link) # insert subscription information sub_ids = [] feed.setAttribute('xmlns:planet',planet.xmlns) sources = config.cache_sources_directory() for sub in config.subscriptions(): data=feedparser.parse(filename(sources,sub)) if data.feed.has_key('id'): sub_ids.append(data.feed.id) if not data.feed: continue # warn on missing links if not data.feed.has_key('planet_message'): if not data.feed.has_key('links'): data.feed['links'] = [] for link in data.feed.links: if link.rel == 'self': break else: log.debug('missing self link for ' + sub) for link in data.feed.links: if link.rel == 'alternate' and 'html' in link.type: break else: log.debug('missing html link for ' + sub) xdoc=minidom.parseString('''<planet:source xmlns:planet="%s" xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns) reconstitute.source(xdoc.documentElement, data.feed, None, None) feed.appendChild(xdoc.documentElement) index = idindex.open() # insert entry information items = 0 count = {} atomNS='http://www.w3.org/2005/Atom' new_feed_items = config.new_feed_items() for mtime,file in dir: if index != None: base = os.path.basename(file) if index.has_key(base) and index[base] not in sub_ids: continue try: entry=minidom.parse(file) # verify that this entry is currently subscribed to and that the # number of entries contributed by this feed does not exceed # config.new_feed_items entry.normalize() sources = entry.getElementsByTagNameNS(atomNS, 'source') if sources: ids = sources[0].getElementsByTagName('id') if ids: id = ids[0].childNodes[0].nodeValue count[id] = count.get(id,0) + 1 if new_feed_items and count[id] > new_feed_items: continue if id not in sub_ids: ids = sources[0].getElementsByTagName('planet:id') if not ids: continue id = ids[0].childNodes[0].nodeValue if id not in sub_ids: log.warn('Skipping: ' + id) if id not in sub_ids: continue # add entry to feed feed.appendChild(entry.documentElement) items = items + 1 if items >= max_items: break except: log.error("Error parsing %s", file) if index: index.close() return doc
def splice(): """ Splice together a planet from a cache of entries """ import planet log = planet.logger log.info("Loading cached data") cache = config.cache_directory() dir = [(os.stat(file).st_mtime, file) for file in glob.glob(cache + "/*") if not os.path.isdir(file)] dir.sort() dir.reverse() max_items = max([ config.items_per_page(templ) for templ in config.template_files() or ['Planet'] ]) doc = minidom.parseString('<feed xmlns="http://www.w3.org/2005/Atom"/>') feed = doc.documentElement # insert feed information createTextElement(feed, 'title', config.name()) date(feed, 'updated', time.gmtime()) gen = createTextElement(feed, 'generator', config.generator()) gen.setAttribute('uri', config.generator_uri()) author = doc.createElement('author') createTextElement(author, 'name', config.owner_name()) createTextElement(author, 'email', config.owner_email()) feed.appendChild(author) if config.feed(): createTextElement(feed, 'id', config.feed()) link = doc.createElement('link') link.setAttribute('rel', 'self') link.setAttribute('href', config.feed()) if config.feedtype(): link.setAttribute('type', "application/%s+xml" % config.feedtype()) feed.appendChild(link) if config.pubsubhubbub_hub(): hub = doc.createElement('link') hub.setAttribute('rel', 'hub') hub.setAttribute('href', config.pubsubhubbub_hub()) feed.appendChild(hub) if config.link(): link = doc.createElement('link') link.setAttribute('rel', 'alternate') link.setAttribute('href', config.link()) feed.appendChild(link) # insert subscription information sub_ids = [] feed.setAttribute('xmlns:planet', planet.xmlns) sources = config.cache_sources_directory() for sub in config.subscriptions(): data = feedparser.parse(filename(sources, sub)) if data.feed.has_key('id'): sub_ids.append(data.feed.id) if not data.feed: continue # warn on missing links if not data.feed.has_key('planet_message'): if not data.feed.has_key('links'): data.feed['links'] = [] for link in data.feed.links: if link.rel == 'self': break else: log.debug('missing self link for ' + sub) for link in data.feed.links: if link.rel == 'alternate' and 'html' in link.type: break else: log.debug('missing html link for ' + sub) xdoc = minidom.parseString('''<planet:source xmlns:planet="%s" xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns) reconstitute.source(xdoc.documentElement, data.feed, None, None) feed.appendChild(xdoc.documentElement) index = idindex.open() # insert entry information items = 0 count = {} atomNS = 'http://www.w3.org/2005/Atom' new_feed_items = config.new_feed_items() posted_urls = set() if config.post_to_twitter(): if os.path.exists(posted_urls_file): try: with open(posted_urls_file, 'rb') as f: posted_urls = pickle.load(f) except Exception as ex: log.error("Error reading posted_urls %s", ex) # print(posted_urls) for mtime, file in dir: if index != None: base = os.path.basename(file) if index.has_key(base) and index[base] not in sub_ids: continue try: entry = minidom.parse(file) # verify that this entry is currently subscribed to and that the # number of entries contributed by this feed does not exceed # config.new_feed_items entry.normalize() sources = entry.getElementsByTagNameNS(atomNS, 'source') if sources: ids = sources[0].getElementsByTagName('id') if ids: id = ids[0].childNodes[0].nodeValue count[id] = count.get(id, 0) + 1 if new_feed_items and count[id] > new_feed_items: continue if id not in sub_ids: ids = sources[0].getElementsByTagName('planet:id') if not ids: continue id = ids[0].childNodes[0].nodeValue if id not in sub_ids: log.warn('Skipping: ' + id) if id not in sub_ids: continue # Twitter integration if config.post_to_twitter(): url = None twitter = None title = "Untitled post..." links = entry.getElementsByTagName('link') if links: for link in links: if link.hasAttribute('rel') and link.hasAttribute( 'type') and link.hasAttribute('href'): if (link.getAttribute('rel') == 'alternate' and link.getAttribute('type') == 'text/html'): url = link.getAttribute('href') break titles = entry.getElementsByTagName('title') if titles: title = unicode( titles[0].firstChild.nodeValue.encode('utf-8'), 'utf-8').strip() handles = entry.getElementsByTagName('planet:twitter') if (handles): twitter = unicode( handles[0].firstChild.nodeValue.encode('utf-8'), "utf-8") if url is not None and url not in posted_urls: # log.debug("Going to post URL to Twitter: twitter='{}' title='{}', url='{}'".format(twitter, title, url)) txt_append = u'' if twitter: txt_append = u" (by @" + twitter.encode( 'utf-8').strip() + u")" max_title_len = 280 - 20 - len(txt_append) if (len(title) > max_title_len): title = title[:max_title_len] txt = title + txt_append + u"\n" + url log.debug(u"Text to post '{}'".format(txt)) try: posted_urls.add(url) config.twitter_api.update_status(txt) except Exception as ex: log.error(u"Error posting to Twitter: %s", ex) # add entry to feed feed.appendChild(entry.documentElement) items = items + 1 if items >= max_items: break except Exception as ex: log.error("Error parsing %s: %s", file, ex) exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback, limit=2, file=sys.stdout) if config.post_to_twitter(): with open(posted_urls_file, 'wb') as f: pickle.dump(posted_urls, f, protocol=pickle.HIGHEST_PROTOCOL) if index: index.close() return doc
def splice(): """ Splice together a planet from a cache of entries """ import planet log = planet.getLogger(config.log_level(),config.log_format()) log.info("Loading cached data") cache = config.cache_directory() dir=[(os.stat(file).st_mtime,file) for file in glob.glob(cache+"/*") if not os.path.isdir(file)] dir.sort() dir.reverse() max_items=max([config.items_per_page(templ) for templ in config.template_files() or ['Planet']]) doc = minidom.parseString('<feed xmlns="http://www.w3.org/2005/Atom"/>') feed = doc.documentElement # insert feed information createTextElement(feed, 'title', config.name()) date(feed, 'updated', time.gmtime()) gen = createTextElement(feed, 'generator', config.generator()) gen.setAttribute('uri', config.generator_uri()) author = doc.createElement('author') createTextElement(author, 'name', config.owner_name()) createTextElement(author, 'email', config.owner_email()) feed.appendChild(author) if config.feed(): createTextElement(feed, 'id', config.feed()) link = doc.createElement('link') link.setAttribute('rel', 'self') link.setAttribute('href', config.feed()) if config.feedtype(): link.setAttribute('type', "application/%s+xml" % config.feedtype()) feed.appendChild(link) if config.link(): link = doc.createElement('link') link.setAttribute('rel', 'alternate') link.setAttribute('href', config.link()) feed.appendChild(link) # insert subscription information sub_ids = [] feed.setAttribute('xmlns:planet',planet.xmlns) sources = config.cache_sources_directory() for sub in config.subscriptions(): data=feedparser.parse(filename(sources,sub)) if data.feed.has_key('id'): sub_ids.append(data.feed.id) if not data.feed: continue xdoc=minidom.parseString('''<planet:source xmlns:planet="%s" xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns) reconstitute.source(xdoc.documentElement, data.feed, None, None) feed.appendChild(xdoc.documentElement) index = idindex.open() # insert entry information items = 0 for mtime,file in dir: if index != None: base = os.path.basename(file) if index.has_key(base) and index[base] not in sub_ids: continue try: entry=minidom.parse(file) # verify that this entry is currently subscribed to entry.normalize() sources = entry.getElementsByTagName('source') if sources: ids = sources[0].getElementsByTagName('id') if ids and ids[0].childNodes[0].nodeValue not in sub_ids: ids = sources[0].getElementsByTagName('planet:id') if not ids: continue if ids[0].childNodes[0].nodeValue not in sub_ids: continue # add entry to feed feed.appendChild(entry.documentElement) items = items + 1 if items >= max_items: break except: log.error("Error parsing %s", file) if index: index.close() return doc