last_no = 0 current_persons = Person.find(order=['-no'], limit=1) if len(current_persons) > 0: last_date = current_persons[-1]['release_date'] last_no = current_persons[-1]['no'] releases = read_aichi_release(last_date=last_date) for release in releases: # filepath = os.path.join( # config.DATA_DIR, 'aichi/releases', # 'aichi_release_{}.pdf'.format(release['pdf']['current_date'].strftime(r'%Y%m%d'))) filepath = os.path.join( config.DATA_DIR, 'aichi/releases', 'aichi_release_{}.pdf'.format( re.sub(r'^.*\/(\d+)\.pdf$', r'\1', release['pdf']['url']))) if not os.path.exists(filepath): logger.info('Download PDF file %s', filepath) util.download_file(release['pdf']['url'], filepath=filepath) else: logger.info('File %s already exists.', filepath) release['pdf']['filepath'] = filepath result = read_release_pdf(release['pdf']['filepath'], debug=True) persons = list(filter(lambda p: p['no'] > last_no, result['persons'])) logger.info('Add %s persons', len(persons)) for person in persons: print(person) Person.insert(person) exit()