def test_config(): parser = AtomisatorConfig(test_conf) sources = parser.sources sources.sort() dir = os.path.dirname(test_conf) wanted = [('rss', ('%s/digg.xml' % dir,)), ('rss', ('%s/digg.xml' % dir,)), ('rss', ('%s/pp.xml' % dir,)), ('rss', ('%s/pp.xml' % dir,)), ('rss', ('%s/tarek.xml' % dir,)), ('rss', ('%s/tarek.xml' % dir,))] assert_equals(sources, wanted) wanted = os.path.join(dir, 'atomisator.db') assert_equals(parser.database, 'sqlite:///%s' % wanted) # getting readers assert_equals(parser.get_reader('xxx'), None) assert_equals(parser.get_reader('xml'), 'atomisator.reader.xml') f = [f[0] for f in parser.filters] f.sort() assert_equals(f, ['autotags', 'buzzwords', 'doublons', 'spam', 'stopwords'])
def test_quotes(): parser = AtomisatorConfig(cfg) rss = ('rss', ('output.xml', 'http://link.xml', 'This is the output', 'This is the description')) parser.outputs = [rss] assert_equals(parser.outputs, [rss])
def index(self): parser = AtomisatorConfig(CONFIG) # getting parameters for the rss output rss = dict(parser.outputs)['rss'] # getting the target xml file rss_file = rss[0] xml = os.path.realpath(rss_file) if not os.path.exists(xml): xml = os.path.realpath(join(root, rss_file)) if not os.path.exists(xml): raise ValueError('File %s not found' % xml) # if not under public, we need to copy it to public/rss.xml if xml != PUBLIC_RSS: shutil.copyfile(xml, PUBLIC_RSS) doc = etree.XML(open(xml).read()) items = doc.xpath('/rss/channel/item') def _date(value): d = time.strptime(value.split('.')[0], '%Y-%m-%d %H:%M:%S') d = datetime.datetime(*d[:6]) return d.strftime('%d/%m/%Y') def _extract(entry): if entry.tag == 'pubDate': return entry.tag, _date(entry.text) if entry.tag == 'title': if len(entry.text) > TITLESIZE: return 'title', entry.text[:TITLESIZE] + '...' return 'title', entry.text return entry.tag, entry.text items = [dict([_extract(x) for x in e.getchildren()]) for e in items] # building an extract def _extract(html, title): if isinstance(html, unicode): try: html = html.decode('utf8') except: html = str(type(html)) parser = Html2Txt() parser.reset() parser.feed(html) parser.close() res = parser.output().strip() size = MAXSIZE - len(title) if size < 0: return '' return res[:size] + '...' for i in items: i['extract'] = _extract(i['description'], i['title']) c.entries = items c.title = doc.xpath('/rss/channel/title')[0].text return render('/front.mako')
def update(self): parser = AtomisatorConfig(CONFIG) # getting parameters for the rss output file_, link, title, description = self._get_values(parser) # TODO make atomisator cfg file read/writeable by text # to avoid all this crappy parsing _get = request.GET.get title = _get('title', title) link = _get('link', link) description = _get('description', description) self._set_values(parser, file_, link, title, description) parser.database = _get('database', parser.database) current = [ '%s %s'.strip() % (p[0], ' '.join(p[1])) for p in parser.filters ] filters = _get('filters', '\n'.join(current)) filters = [ f for f in [s.strip() for s in filters.split('\n')] if f != '' ] filters = [(u[0], tuple(u[1:])) for u in [f.split() for f in filters]] if filters != parser.filters: parser.filters = filters current = [ '%s %s'.strip() % (p[0], ' '.join(p[1])) for p in parser.enhancers ] enhancers = _get('enhancers', '\n'.join(current)) enhancers = [ f for f in [s.strip() for s in enhancers.split('\n')] if f != '' ] enhancers = [(u[0], tuple(u[1:])) for u in [f.split() for f in enhancers]] if enhancers != parser.enhancers: parser.enhancers = enhancers current = [ '%s %s'.strip() % (p[0], ' '.join(p[1])) for p in parser.sources ] sources = _get('sources', '\n'.join(current)) sources = [ f for f in [s.strip() for s in sources.split('\n')] if f != '' ] sources = [(u[0], tuple(u[1:])) for u in [f.split() for f in sources]] parser.sources = sources parser.write() redirect_to(action='index')
def test_config(): parser = AtomisatorConfig(cfg) s = parser.sources waited = [('rss', ('gdigg.xml', )), ('rss', ('gtarek.xml', )), ('rss', ('gpp.xml', )), ('rss', ('gdigg.xml', )), ('rss', ('gtarek.xml', )), ('rss', ('gpp.xml', ))] assert_equals(s, waited) parser.sources = (('rss', ('ok.xml', )), ) assert_equals(parser.sources, [('rss', (u'ok.xml', ))]) assert_equals(parser.database, 'sqlite:///gatomisator.db') parser.database = 'sqlite://here' assert_equals(parser.database, 'sqlite://here') assert_equals(parser.timeout, 5.0) parser.timeout = 7.0 assert_equals(parser.timeout, 7.0) assert_equals(parser.store_entries, True) parser.store_entries = False assert_equals(parser.store_entries, False) assert_equals(parser.max_age, 30) parser.max_age = 35 assert_equals(parser.max_age, 35) old = open(cfg).read() parser.write() new = open(cfg).read() assert new != old open(cfg, 'w').write(old) assert_equals(parser.processes, PROCESSES) parser.processes = 1 assert_equals(parser.processes, 1)
def index(self): c.title = 'Backoffice' parser = AtomisatorConfig(CONFIG) c.atomisator = {} # getting parameters for the rss output file_, link, title, description = self._get_values(parser) c.atomisator['title'] = title s = [s[0] + ' ' + ' '.join(s[1]) for s in parser.sources] c.atomisator['sources'] = '\n'.join(s) c.atomisator['database'] = parser.database c.atomisator['description'] = description c.atomisator['link'] = link s = [s[0] + ' ' + ' '.join(s[1]) for s in parser.filters] c.atomisator['filters'] = '\n'.join(s) s = [s[0] + ' ' + ' '.join(s[1]) for s in parser.enhancers] c.atomisator['enhancers'] = '\n'.join(s) return render('/backoffice.mako')
def test_defaults(): parser = AtomisatorConfig(cfg2) assert_equals(parser.enhancers, []) assert_equals(parser.filters, []) assert_equals(parser.outputs, [])
def __init__(self, conf): self.parser = AtomisatorConfig(conf) self.existing_entries = [] self.filter_chain = None create_session(self.parser.database)