Ejemplo n.º 1
0
 def addNewRecords(self):
     """XXX: should this be a classmethod?"""
     records = xrange(self.highnum, self.lownum, -1)
     for recordid in records:
         try:
             company = Company.byRecordid(recordid)
             print('already handled record %s' % recordid)
             continue
         except SQLObjectNotFound:
             filename = self.filename(recordid)
         try:
             #pagetext = codecs.open(filename, 'r', 'windows-1252').read()
             pagetext = open(filename, 'r').read()
         except IOError:
             if options.skipmissing:
                 print('skipping missing record %s' % recordid)
                 continue
             print('halting on missing record %s' % recordid)
             raise
         if not pagetext:
             log('%s is empty' % recordid)
             continue
         try:
             self.processRecord(pagetext)
         except AttributeError:
             traceback.print_exc()
             log('failed to process record %s' % recordid)
             continue
         print('added record %s' % recordid)
Ejemplo n.º 2
0
 def addNewRecords(self):
     """XXX: should this be a classmethod?"""
     records = xrange(self.highnum, self.lownum, -1)
     for recordid in records:
         try:
             company = Company.byRecordid(recordid)
             print('already handled record %s' % recordid)
             continue
         except SQLObjectNotFound:
             filename = self.filename(recordid)
         try:
             #pagetext = codecs.open(filename, 'r', 'windows-1252').read()
             pagetext = open(filename, 'r').read()
         except IOError:
             if options.skipmissing:
                 print('skipping missing record %s' % recordid)
                 continue
             print('halting on missing record %s' % recordid)
             raise
         if not pagetext:
             log('%s is empty' % recordid)
             continue
         try:
             self.processRecord(pagetext)
         except AttributeError:
             traceback.print_exc()
             log('failed to process record %s' % recordid)
             continue
         print('added record %s' % recordid)
Ejemplo n.º 3
0
 def processRecord(self, rawtext):
     session = Session()
     datadict = self.scrapeData(rawtext)
     crecord = Company(
                     name = datadict['nombredelasociedad'],
                     recordid = int(datadict['nodeficha']),
                     scrape_date = None,
                     scrape_source = None,
                     is_current = None,
                     data = None)
     try: # XXX does this really need to be a register thing
         dateobj = time.strptime(datadict['registerdate'], '%d-%m-%Y')
         cleandate = time.strftime('%Y-%m-%d', dateobj)
         crecord.date_founded = cleandate
     except ValueError:
         log('invalid date: %s' % datadict['registerdate'])
     for subscriber in datadict['suscriptores']:
         crecord.addPerson(
                 role = 'subscriber',
                 name = subscriber,
                 session = session)
     if datadict['agent']:
         crecord.addPerson(
                 role = 'agent',
                 name = datadict['agent'],
                 session = session)
     for director in datadict['directors']:
         crecord.addPerson(
                 role = 'director',
                 name = director,
                 session = session)
     for (role, name) in datadict['titles'].items():
         role = role.lower()
         title = self.officials.get(role, role)
         crecord.addPerson(
                 role = title,
                 name = name,
                 session = session)
     session.commit()
     return crecord
Ejemplo n.º 4
0
 def processRecord(self, rawtext):
     session = Session()
     datadict = self.scrapeData(rawtext)
     crecord = Company(name=datadict['nombredelasociedad'],
                       recordid=int(datadict['nodeficha']),
                       scrape_date=None,
                       scrape_source=None,
                       is_current=None,
                       data=None)
     try:  # XXX does this really need to be a register thing
         dateobj = time.strptime(datadict['registerdate'], '%d-%m-%Y')
         cleandate = time.strftime('%Y-%m-%d', dateobj)
         crecord.date_founded = cleandate
     except ValueError:
         log('invalid date: %s' % datadict['registerdate'])
     for subscriber in datadict['suscriptores']:
         crecord.addPerson(role='subscriber',
                           name=subscriber,
                           session=session)
     if datadict['agent']:
         crecord.addPerson(role='agent',
                           name=datadict['agent'],
                           session=session)
     for director in datadict['directors']:
         crecord.addPerson(role='director', name=director, session=session)
     for (role, name) in datadict['titles'].items():
         role = role.lower()
         title = self.officials.get(role, role)
         crecord.addPerson(role=title, name=name, session=session)
     session.commit()
     return crecord