Example #1
0
 def save_info(self):
     config = as_dict(self.config)
     self.parser_info = ParserInfo(name=self.name, config=json.dumps(config))
     self.parser_info.save()
     return self.parser_info
Example #2
0
class RSSParser:

    name = 'RSS'

    class Config:
        resource = 'test.xml'

    def __init__(self, **config):
        self.config = self.Config()
        self.config.__dict__.update(config)

        url = urlparse(self.config.resource)
        if url.scheme == 'file':
            self.config.resource = url.path
        elif url.scheme:
            raise NotImplementedError('Only local documents are supported')

    def get_document(self):
        with open(self.config.resource) as f:
            return etree.parse(f)

    def iter_events(self, doc):
        events = doc.findall('events/event')
        for e in events:
            event = ParsedEvent(e)
            event.parse()
            yield event

    def iter_places(self, doc):
        places = doc.findall('places/place')
        for e in places:
            place = ParsedPlace(e)
            place.parse()
            yield place

    def iter_sessions(self, doc):
        sessions = doc.findall('schedule/session')
        for e in sessions:
            session = Session(e)
            session.parse()
            yield session

    def save_info(self):
        config = as_dict(self.config)
        self.parser_info = ParserInfo(name=self.name, config=json.dumps(config))
        self.parser_info.save()
        return self.parser_info

    def parse_all(self):
        '''good if all parsers would implement this
        '''
        doc = self.get_document()
        return {
            'events': [e.parsed_data for e in self.iter_events(doc)],
            'places': [p.parsed_data for p in self.iter_places(doc)],
            'sessions': [s.parsed_data for s in self.iter_sessions(doc)],
        }

    def save_all(self):
        '''main method
        '''
        doc = self.get_document()
        info = self.save_info()
        for session in self.iter_sessions(doc):
            session.save(info)
        for event in self.iter_events(doc):
            event.save(info)
        for place in self.iter_places(doc):
            place.save(info)
        return {
            'parser_info': info.id
        }