def processRecord(self, rawtext): session = Session() datadict = self.scrapeData(rawtext) crecord = Company(name=datadict['nombredelasociedad'], recordid=int(datadict['nodeficha']), scrape_date=None, scrape_source=None, is_current=None, data=None) try: # XXX does this really need to be a register thing dateobj = time.strptime(datadict['registerdate'], '%d-%m-%Y') cleandate = time.strftime('%Y-%m-%d', dateobj) crecord.date_founded = cleandate except ValueError: log('invalid date: %s' % datadict['registerdate']) for subscriber in datadict['suscriptores']: crecord.addPerson(role='subscriber', name=subscriber, session=session) if datadict['agent']: crecord.addPerson(role='agent', name=datadict['agent'], session=session) for director in datadict['directors']: crecord.addPerson(role='director', name=director, session=session) for (role, name) in datadict['titles'].items(): role = role.lower() title = self.officials.get(role, role) crecord.addPerson(role=title, name=name, session=session) session.commit() return crecord