def run(config, limit=None): # read all existing, don't send a request if data is already there url = config.get('careerleaf', 'url') key_secret = key_secret = get_auth_key(config) file_name = config.get('employers-import-csv', 'file') client = EmpClient(url, key_secret) reader = CsvReader(file_name) #output = reader.read() #print(json.dumps(output, indent=4)) total = 0 success_count = 0 skipped = 0 existing = client.get_existing_ids() processed_ids = [] for node in reader.read(): import_logger.debug(node['id']) parser = Parser(node) id = parser.id if id in existing: import_logger.debug('skipping : %s' % id) skipped += 1 continue assert not id in processed_ids processed_ids.append( id) # ensuring that we do not process more than once data = parser.get_data() is_successful = client.save(data) if is_successful: success_count += 1 import_logger.info('successful for: {}'.format(data['name'])) else: import_logger.error('failed, data problem for: {}'.format( parser.record_identity())) total += 1 if limit and total > limit: import_logger.info('reached the limit: {}, stopping'.format(limit)) break if total % 10 == 0: import_logger.info('processing record %s' % total) import_logger.info( 'parsed {} records, {} are successfull, {} are failed, {} skipped'. format(total, success_count, (total - success_count), skipped)) # TODO: delete example
def __init__(self, config): self.url_base = config.get('careerleaf', 'url') if self.url_base.startswith('https'): disable_ssl_warning() self.key_secret = get_auth_key(config) self.save_dir = config.get('jobseekers-export', 'save_dir') self.list_url = '{}/app/api/v1/candidates'.format(self.url_base) self.save_profile_data = config_get_safe(config, 'jobseekers-export', 'save_profile_data', True) self.import_limit = int(config_get_safe(config, 'jobseekers-export', 'import_limit') or 0)
def run(config, limit=None): # read all existing, don't send a request if data is already there url = config.get('careerleaf', 'url') key_secret = key_secret = get_auth_key(config) file_name = config.get('employers', 'file') client = EmpClient(url, key_secret) reader = XmlReader(file_name) reader = cleanup_data(reader) total = 0 success_count = 0 skipped = 0 existing = client.get_existing_ids() processed_ids = [] for node in reader.read(): parser = Parser(node, reader) id = parser.id if id in existing: logger.debug('skipping : %s' % id) skipped +=1 continue assert not id in processed_ids processed_ids.append(id) # ensuring that we do not process more than once data = parser.get_data() if data: is_successful = client.save(data) if is_successful: success_count+=1 logger.info('successfull for: {}'.format(data['name'])) else: import_logger.error('failed, data problem for: {}'.format(parser.record_identity())) total +=1 if limit and total > limit: logger.info('reached the limit: {}, stopping'.format(limit) ) break if total % 10 == 0: logger.info('processing record %s' % total) logger.info('parsed {} records, {} are successfull, {} are failed, {} skipped'.format(total, success_count, (total - success_count), skipped)) # TODO: delete example