def save_info(self): config = as_dict(self.config) self.parser_info = ParserInfo(name=self.name, config=json.dumps(config)) self.parser_info.save() return self.parser_info
class RSSParser: name = 'RSS' class Config: resource = 'test.xml' def __init__(self, **config): self.config = self.Config() self.config.__dict__.update(config) url = urlparse(self.config.resource) if url.scheme == 'file': self.config.resource = url.path elif url.scheme: raise NotImplementedError('Only local documents are supported') def get_document(self): with open(self.config.resource) as f: return etree.parse(f) def iter_events(self, doc): events = doc.findall('events/event') for e in events: event = ParsedEvent(e) event.parse() yield event def iter_places(self, doc): places = doc.findall('places/place') for e in places: place = ParsedPlace(e) place.parse() yield place def iter_sessions(self, doc): sessions = doc.findall('schedule/session') for e in sessions: session = Session(e) session.parse() yield session def save_info(self): config = as_dict(self.config) self.parser_info = ParserInfo(name=self.name, config=json.dumps(config)) self.parser_info.save() return self.parser_info def parse_all(self): '''good if all parsers would implement this ''' doc = self.get_document() return { 'events': [e.parsed_data for e in self.iter_events(doc)], 'places': [p.parsed_data for p in self.iter_places(doc)], 'sessions': [s.parsed_data for s in self.iter_sessions(doc)], } def save_all(self): '''main method ''' doc = self.get_document() info = self.save_info() for session in self.iter_sessions(doc): session.save(info) for event in self.iter_events(doc): event.save(info) for place in self.iter_places(doc): place.save(info) return { 'parser_info': info.id }