Beispiel #1
0
def test_config():
    parser = AtomisatorConfig(test_conf)
    sources = parser.sources
    sources.sort()
    dir = os.path.dirname(test_conf)
    wanted = [('rss', ('%s/digg.xml' % dir,)),
              ('rss', ('%s/digg.xml' % dir,)),
              ('rss', ('%s/pp.xml' % dir,)),
              ('rss', ('%s/pp.xml' % dir,)),
              ('rss', ('%s/tarek.xml' % dir,)),
              ('rss', ('%s/tarek.xml' % dir,))]


    assert_equals(sources, wanted)
    wanted = os.path.join(dir, 'atomisator.db')
    assert_equals(parser.database, 'sqlite:///%s' % wanted)

    # getting readers
    assert_equals(parser.get_reader('xxx'), None)
    assert_equals(parser.get_reader('xml'),
                  'atomisator.reader.xml')

    f = [f[0] for f in parser.filters]
    f.sort()
    assert_equals(f, ['autotags', 'buzzwords', 'doublons', 'spam', 'stopwords'])
Beispiel #2
0
def test_quotes():
    parser = AtomisatorConfig(cfg)
    rss = ('rss', ('output.xml', 'http://link.xml', 'This is the output',
                   'This is the description'))
    parser.outputs = [rss]

    assert_equals(parser.outputs, [rss])
Beispiel #3
0
    def index(self):
        parser = AtomisatorConfig(CONFIG)

        # getting parameters for the rss output
        rss = dict(parser.outputs)['rss']

        # getting the target xml file
        rss_file = rss[0]
        xml = os.path.realpath(rss_file)
        if not os.path.exists(xml):
            xml = os.path.realpath(join(root, rss_file))
            if not os.path.exists(xml):
                raise ValueError('File %s not found' % xml)

        # if not under public, we need to copy it to public/rss.xml
        if xml != PUBLIC_RSS:
            shutil.copyfile(xml, PUBLIC_RSS)

        doc = etree.XML(open(xml).read())
        items = doc.xpath('/rss/channel/item')

        def _date(value):
            d = time.strptime(value.split('.')[0], '%Y-%m-%d %H:%M:%S')
            d = datetime.datetime(*d[:6])
            return d.strftime('%d/%m/%Y')

        def _extract(entry):
            if entry.tag == 'pubDate':
                return entry.tag, _date(entry.text)
            if entry.tag == 'title':
                if len(entry.text) > TITLESIZE:
                    return 'title', entry.text[:TITLESIZE] + '...'
                return 'title', entry.text
            return entry.tag, entry.text

        items = [dict([_extract(x) for x in e.getchildren()]) for e in items]

        # building an extract
        def _extract(html, title):
            if isinstance(html, unicode):
                try:
                    html = html.decode('utf8')
                except:
                    html = str(type(html))
            parser = Html2Txt()
            parser.reset()
            parser.feed(html)
            parser.close()
            res = parser.output().strip()
            size = MAXSIZE - len(title)
            if size < 0:
                return ''
            return res[:size] + '...'

        for i in items:
            i['extract'] = _extract(i['description'], i['title'])

        c.entries = items
        c.title = doc.xpath('/rss/channel/title')[0].text
        return render('/front.mako')
Beispiel #4
0
    def update(self):
        parser = AtomisatorConfig(CONFIG)
        # getting parameters for the rss output
        file_, link, title, description = self._get_values(parser)

        # TODO make atomisator cfg file read/writeable by text
        # to avoid all this crappy parsing
        _get = request.GET.get
        title = _get('title', title)
        link = _get('link', link)
        description = _get('description', description)
        self._set_values(parser, file_, link, title, description)

        parser.database = _get('database', parser.database)

        current = [
            '%s %s'.strip() % (p[0], ' '.join(p[1])) for p in parser.filters
        ]

        filters = _get('filters', '\n'.join(current))
        filters = [
            f for f in [s.strip() for s in filters.split('\n')] if f != ''
        ]
        filters = [(u[0], tuple(u[1:])) for u in [f.split() for f in filters]]
        if filters != parser.filters:
            parser.filters = filters

        current = [
            '%s %s'.strip() % (p[0], ' '.join(p[1])) for p in parser.enhancers
        ]

        enhancers = _get('enhancers', '\n'.join(current))
        enhancers = [
            f for f in [s.strip() for s in enhancers.split('\n')] if f != ''
        ]
        enhancers = [(u[0], tuple(u[1:]))
                     for u in [f.split() for f in enhancers]]

        if enhancers != parser.enhancers:
            parser.enhancers = enhancers

        current = [
            '%s %s'.strip() % (p[0], ' '.join(p[1])) for p in parser.sources
        ]
        sources = _get('sources', '\n'.join(current))
        sources = [
            f for f in [s.strip() for s in sources.split('\n')] if f != ''
        ]
        sources = [(u[0], tuple(u[1:])) for u in [f.split() for f in sources]]

        parser.sources = sources
        parser.write()
        redirect_to(action='index')
Beispiel #5
0
def test_config():
    parser = AtomisatorConfig(cfg)
    s = parser.sources
    waited = [('rss', ('gdigg.xml', )), ('rss', ('gtarek.xml', )),
              ('rss', ('gpp.xml', )), ('rss', ('gdigg.xml', )),
              ('rss', ('gtarek.xml', )), ('rss', ('gpp.xml', ))]

    assert_equals(s, waited)
    parser.sources = (('rss', ('ok.xml', )), )
    assert_equals(parser.sources, [('rss', (u'ok.xml', ))])

    assert_equals(parser.database, 'sqlite:///gatomisator.db')
    parser.database = 'sqlite://here'
    assert_equals(parser.database, 'sqlite://here')

    assert_equals(parser.timeout, 5.0)
    parser.timeout = 7.0
    assert_equals(parser.timeout, 7.0)

    assert_equals(parser.store_entries, True)
    parser.store_entries = False
    assert_equals(parser.store_entries, False)

    assert_equals(parser.max_age, 30)
    parser.max_age = 35
    assert_equals(parser.max_age, 35)

    old = open(cfg).read()
    parser.write()
    new = open(cfg).read()
    assert new != old

    open(cfg, 'w').write(old)

    assert_equals(parser.processes, PROCESSES)
    parser.processes = 1
    assert_equals(parser.processes, 1)
Beispiel #6
0
    def index(self):
        c.title = 'Backoffice'
        parser = AtomisatorConfig(CONFIG)
        c.atomisator = {}

        # getting parameters for the rss output
        file_, link, title, description = self._get_values(parser)
        c.atomisator['title'] = title
        s = [s[0] + ' ' + ' '.join(s[1]) for s in parser.sources]

        c.atomisator['sources'] = '\n'.join(s)
        c.atomisator['database'] = parser.database
        c.atomisator['description'] = description
        c.atomisator['link'] = link

        s = [s[0] + ' ' + ' '.join(s[1]) for s in parser.filters]
        c.atomisator['filters'] = '\n'.join(s)

        s = [s[0] + ' ' + ' '.join(s[1]) for s in parser.enhancers]

        c.atomisator['enhancers'] = '\n'.join(s)

        return render('/backoffice.mako')
Beispiel #7
0
def test_defaults():
    parser = AtomisatorConfig(cfg2)
    assert_equals(parser.enhancers, [])
    assert_equals(parser.filters, [])
    assert_equals(parser.outputs, [])
Beispiel #8
0
 def __init__(self, conf):
     self.parser = AtomisatorConfig(conf)
     self.existing_entries = []
     self.filter_chain = None
     create_session(self.parser.database)