class RemoteOAIRELoader(BaseOAIRELoader): """Remote OpenAIRE dataset loader. Fetch the OpenAIRE records from a remote OAI-PMH endpoint. """ def __init__(self, source=None, setspec=None, **kwargs): """Init the loader for remote OAI-PMH access.""" super(RemoteOAIRELoader, self).__init__( source or current_app.config['OPENAIRE_OAIPMH_ENDPOINT'], **kwargs) self.client = Sickle(self.source) self.setspec = setspec or \ current_app.config['OPENAIRE_OAIPMH_DEFAULT_SET'], def iter_grants(self, as_json=True): """Fetch grants from a remote OAI-PMH endpoint. Return the Sickle-provided generator object. """ records = self.client.ListRecords(metadataPrefix='oaf', set=self.setspec) for rec in records: try: grant_out = rec.raw # rec.raw is XML if as_json: grant_out = self.grantxml2json(grant_out) yield grant_out except FunderNotFoundError as e: current_app.logger.warning("Funder '{0}' not found.".format( e.funder_id))
def download_oai_dc(outfile=None, base_url='http://bibliotheque-numerique.inha.fr/oai', sets=16800, force_refresh=False): """Downloads a set from a oai-pmh repository and returns it if given an outfile, save the resuls to it, will also use it as a cache if needed """ if os.path.exists(outfile) and not force_refresh: return read_json_gzip(outfile) sickle = Sickle(base_url) records = sickle.ListRecords(**{ 'metadataPrefix': 'oai_dc', 'set': "oai:sets:%d" % sets }) records_fetched = list() i = 0 for record in tqdm(records): if i == 100: break records_fetched.append(record.metadata) i += 1 records_fetched = records_fetched if outfile: write_json_gzip(outfile, records_fetched) return records_fetched
def descriptions(self): """Retrive descriptions and dumps it in cache file""" s = Sickle(self.__url_api__) records = [record for record in s.ListRecords(metadataPrefix='oai_dc')] descr = OrderedDict() subjects = OrderedDict() cats = [] for record in records: item = record.metadata['source'][0].split(',')[1].strip() descr[item] = record.metadata self.__log__.info('%s', item) if 'subject' in record.metadata: item_subjects = record.metadata['subject'] for t in item_subjects: if t in subjects: subjects[t] = subjects[t] + 1 else: subjects[t] = 1 cats.append(self.subject_to_category(t)) else: self.__log__.warning(' no subject for %s', item) self.__log__.info('Parsed %s items', len(records)) self.__log__.info('Subjects: %s', json.dumps(subjects, indent=2)) for cat in cats: self.__log__.info(' [[%s]]', cat) page = self.__site__.pages[cat] if not page.exists: page.save('[[{}]]'.format(self.__category__), 'Upload cat') self.__log__.info('Dumping metadata %s', self.__cache_meta__) with open(self.__cache_meta__, 'w') as f: json.dump(descr, f, indent=4, ensure_ascii=False)
def main(): sickle = Sickle('http://arizona.openrepository.com/arizona/oai/request?') # sets = sickle.ListSets() recs = sickle.ListRecords(**{'metadataPrefix':'oai_dc','set':'com_10150_129649','from':'2017-04-05'}) # log.debug("Making request to {}".format(recs)) # try: # response = recs # except Exception as e: # log.exception("An error occured in issuing the request!") # raise # log.debug("Request completed") # # log.debug("Response Code: {}".format(response.status_code)) # # log.debug("Response text: {}".format(response.text)) # log.debug("Trying to convert response to JSON...") # try: # response = response # log.debug("Response successfully converted to JSON: {}".format(response)) # except Exception as e: # log.exception("An error occured!") # raise # print(recs.url) newFile.write('<?xml version="1.0" encoding="utf-8"?>') newFile.write('<OAI-PMH xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:OAI-PMH="http://www.openarchives.org/OAI/2.0/" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">') for r in recs: newR = str(r).encode('utf8') newR = newR.decode('utf8') newFile.write(str(newR)) newFile.write('</OAI-PMH>')
def run(self): timestamp = datetime.utcnow() sickle = Sickle('http://invenio.nusl.cz/oai2d/') sickle.class_mapping['ListRecords'] = MarcXMLParser sickle.class_mapping['GetRecord'] = MarcXMLParser oai_logger.info("Loading records") records = sickle.ListRecords(metadataPrefix='marcxml') for idx, record in enumerate(records): print(f"{idx}. {record.id}") oai_logger.info(f"{idx}. {record.id}") try: current_search_client.index( index=self.index, id=record.marc_dict["001"], body=record.marc_dict ) except: exc_traceback = traceback.format_exc() print(exc_traceback) print("\n\n\n") file_name = f'{timestamp.strftime("%Y%m%dT%H%M%S")}.err' file_path = os.path.join(self.path, file_name) with open(file_path, "a") as f: f.write( f"Dictionary: {record.marc_dict}\n\n" f"{exc_traceback}\n\n\n\n") continue
def fetch_oai_recs_day(self, date): """ Generator that returns the key and full record of works deposited on a particular day. :param day: :return: str key, str rec """ api = Sickle(self.endpoint_url) date_str = date.isoformat() # this dict kwargs hack is to work around 'from' as a reserved python keyword # recommended by sickle docs # Question: Why would someone insist on using a keyword for a parameter? try: records = api.ListRecords( **{ 'metadataPrefix': self.metadata_prefix, 'from': date_str, 'until': date_str, }) except oaiexceptions.NoRecordsMatch: records = [] _LOGGER.info('OAI request produced no records.') for item in records: yield item.header.identifier.encode('utf-8'), item.raw.encode( 'utf-8')
def fetch_iter(self): try: sickle = Sickle(self._oaiconfig['OAI']['url']) records_iter = sickle.ListRecords(**self.dic) for record in records_iter: yield record except BadArgument as ba: self._exception_logger.error( "bad argument exception {EXCEPTION}".format(EXCEPTION=str(ba))) except OAIError as oaiError: self._exception_logger.error( "OAIError exception {EXCEPTION}".format( EXCEPTION=str(oaiError))) except NoRecordsMatch as noRecordsmatch: self._summary_logger.error("no records matched {EXCEPTION}".format( EXCEPTION=str(noRecordsmatch))) except Exception as baseException: self._summary_logger.error( "base exception occured - not directly related to OAI {EXCEPTION}" .format(EXCEPTION=str(baseException))) else: print("oai fetching finished successfully")
def main(): parser = argparse.ArgumentParser() parser.add_argument('-f', '--from_date', required=True) params = parser.parse_args() logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s %(message)s', datefmt='%d/%b/%Y %H:%M:%S') oai_client = Sickle(endpoint=OAI_PMH_PREPRINT_ENDPOINT, max_retries=3, verify=False) records = oai_client.ListRecords(**{ 'metadataPrefix': OAI_METADATA_PREFIX, 'from': params.from_date }) logging.info('Obtendo dados do OAI-PMH Preprints para date >= %s' % params.from_date) data = {} for r in records: data.update(parse(r)) filename = ''.join([PREPRINT_DICTIONARY_PREFIX, params.from_date, '.json']) save(data, filename)
def _get_database(self, number): """ This object method makes an api call to neliti api and iteratively yields each record entry for processing, keeping track of the total no of records that has been successfully processed. """ sickle = Sickle(self.url) records = sickle.ListRecords(metadataPrefix='oai_dc', ignore_deleted=True) percentage = 0 no_of_records = 0 while 1: record = records.next() if (record): pass else: break data = self._get_record_data(record) if data: self.database.append(data) no_of_records += 1 else: continue if ((no_of_records % 100 == 0) and (no_of_records != 0)): print("Progress : {no_of_records} records Downloaded".format( no_of_records=no_of_records)) if (no_of_records == number): break
def ssoarharvest(filename='support_data/data_harvest1.json'): sickle = Sickle('https://www.ssoar.info/OAIHandler/request') records = sickle.ListRecords(metadataPrefix='oai_genios') counter = 0 listofcounter = [] for r in records: counter += 1 listofcounter.append(r) if counter % 10000 == 0: print(counter) llt = [] errorls = [] for index, item in enumerate(listofcounter): try: llt.append( eval(json.dumps(xmltodict.parse(etree.tostring(item.xml))))) except: errorls.append(index) a = {} a["result"] = llt with open(filename, 'w') as fp: json.dump(a, fp, indent=4)
def harvest_oai_collection_records_sickle(self, collection): sickle = Sickle(collection.community.repository.base_url) sickle.class_mapping['ListRecords'] = LltRecord sickle.class_mapping['GetRecord'] = LltRecord records = sickle.ListRecords(metadataPrefix='dim', ignore_deleted=True, set=collection.identifier) return records
def harvest_oai(**kwargs): """Create OAI ListRecords Iterator for Harvesting Data.""" oai_endpoint = kwargs.get("oai_endpoint") harvest_params = kwargs.get("harvest_params") logging.info("Harvesting from %s", oai_endpoint) logging.info("Harvesting %s", harvest_params) request = Sickle(oai_endpoint, retry_status_codes=[500, 503]) data = request.ListRecords(**harvest_params) return data
def _fetch_records(endpoint, count): subset = [] sickle = Sickle(endpoint) records = sickle.ListRecords(metadataPrefix='oai_dc', ignore_deleted=True) for idx, rec in enumerate(records): if idx == count: break subset.append(rec.metadata) return subset
def test_pass_request_args(self): mock_response = Mock(text=u'<xml/>', content='<xml/>') mock_get = Mock(return_value=mock_response) with patch('sickle.app.requests.get', mock_get): sickle = Sickle('url', timeout=10, proxies=dict(), auth=('user', 'password')) sickle.ListRecords() mock_get.assert_called_once_with('url', params={'verb': 'ListRecords'}, timeout=10, proxies=dict(), auth=('user', 'password'))
def crawl_metadata(output_file, oai_params=None, fetch_limit=None): """ crawls records, flushes them regularily to a temporary json file. low memory footprint, no loss of intermediate results. """ sickle = Sickle(URL_OAI2) oai_params = oai_params if oai_params else {} logger.info("{} - retrieving records from {} with params {}".format(str(datetime.now()), URL_OAI2, str(oai_params))) t0 = time.time() t_last = t0 raw_records = sickle.ListRecords(**oai_params) # type: OAIItemIterator metadata_list = [] records_size = int(raw_records._get_resumption_token().complete_list_size) batch_counter = 0 batch_size = raw_records.oai_response.http_response.content.decode().count("</record>") batch_sum = int(math.ceil(records_size / float(batch_size))) counter = 0 for raw_record in raw_records: # type: Record # parse element and append try: identifier, record = parse_raw_record(raw_record) if record: metadata_list.append(record) else: logger.debug("Record `{}` was deleted and will therefore not appear in the results.".format(identifier)) except Exception: logger.warning("Failed to parse record %s", str(raw_record), exc_info=1) # write batch to file and write log counter += 1 if counter % batch_size == 0: # write batch to file batch_counter += 1 util.json_write_lines(metadata_list, output_file, append=(batch_counter > 1)) metadata_list = [] # log event t_current = time.time() t_remaining = ((1 / (counter / records_size)) - 1) * (t_current - t0) logger.info("Batch {}/{}: fetched {} of {} records (took {}s, remaining: {} min, resumption token: {})".format( batch_counter, batch_sum, counter, records_size, round(t_current - t_last, 2), round(t_remaining / 60, 1), raw_records._get_resumption_token().token)) t_last = t_current if fetch_limit and counter >= fetch_limit: break # write last batch if len(metadata_list) > 0: logger.info("Batch {}/{}: fetched the remaining {} records".format(batch_sum, batch_sum, len(metadata_list))) util.json_write_lines(metadata_list, output_file, append=True) logger.info("All {} entries were retrieved in {}s and written to {}".format(counter, round(time.time() - t0), output_file))
def download_records(input_file: str): with open(input_file, "r") as f: reader = csv.reader(f, delimiter=",") lines = [line[0] for line in reader] out_dir = lines[0] names = lines[1:] if os.path.exists(out_dir): shutil.rmtree(out_dir) os.mkdir(out_dir) sickle = Sickle(URL) recs = sickle.ListRecords(metadataPrefix="oai_dc", set=DATASET, ignore_deleted=True) infos = [] for rec in recs: metadata = rec.metadata desc = metadata["description"][0] find_base_name = re.search("base_name : (.*)\n", desc) if find_base_name is not None: name = find_base_name.group(1) if name in names: identifier = metadata["identifier"][0] infos.append((name, identifier)) for name, identifier in infos: record_path = os.path.join(out_dir, name) if not os.path.exists(record_path): os.mkdir(record_path) filename = name + ".hdf5" features_filename = name + "_features.hdf5" wcon_filename = name + ".wcon.zip" if not os.path.exists(os.path.join(record_path, filename)): url = identifier + "/files/" + urllib.parse.quote(filename) urllib.request.urlretrieve(url, os.path.join(record_path, filename)) url = identifier + "/files/" + urllib.parse.quote( features_filename) urllib.request.urlretrieve( url, os.path.join(record_path, features_filename)) url = identifier + "/files/" + urllib.parse.quote(wcon_filename) urllib.request.urlretrieve( url, os.path.join(record_path, wcon_filename)) logger.info(f"Downloaded {name}")
def test_no_retry(self): mock_response = Mock(status_code=503, headers={'retry-after': '10'}, raise_for_status=Mock(side_effect=HTTPError)) mock_get = Mock(return_value=mock_response) with patch.object(Session, 'get', mock_get): sickle = Sickle('url') try: sickle.ListRecords() except HTTPError: pass self.assertEqual(1, mock_get.call_count)
def list_records(): sik = Sickle(URL_PREPRINTS_OAI, verify=False) records = sik.ListRecords( **{ 'metadataPrefix': 'oai_dc', 'from': '2021-04-01', 'until': '2021-04-07', 'set': 'scielo' }) for r in records: doc = doc_raw_attrs(r) print(doc)
def run(self, update_all=False, override=False): arxiv = Sickle('http://export.arxiv.org/oai2') # date = datetime.date(2014, 5, 14) # records = arxiv.ListRecords(**{'metadataPrefix': 'arXiv', 'from': str(date)}) # print str(datetime.date(2014, 5, 14)) last_update = Synchronization.query.order_by( Synchronization.id.desc()).first() if (datetime.datetime.utcnow() - last_update.date).days < 1: return 0 if last_update is None or update_all: date = None records = arxiv.ListRecords(metadataPrefix='arXiv') else: date = last_update.date.date() records = arxiv.ListRecords(**{ 'metadataPrefix': 'arXiv', 'from': str(date) }) count = 0 badrecords = [] for r in records: count += 1 if count % 1000 == 0: print count try: a = self.add_article(r.metadata) except Exception as e: badrecords.append(r) print "Exception: ", e # print a.title db.session.commit() db.session.add(Synchronization(date=datetime.datetime.now())) db.session.commit() print "all done!" return count
def get_direct_records(context, params): records = [] i = 0 root = OAIBridge.data["contexts"][context] for name in root: sickle = Sickle(root[name]['url']) sets = root[name]['sets'] if 'sets' in root[name] else None if not sets: try: for record in sickle.ListRecords(**params): i += 1 if not record.deleted: records.append(record.metadata) except NoRecordsMatch: pass except: print(traceback.format_exc()) break else: unknown_error = False for set_name in sets: new_params = dict(params) new_params['set'] = set_name try: for record in sickle.ListRecords(**new_params): i += 1 if not record.deleted: records.append(record.metadata) except NoRecordsMatch: pass except: print(traceback.format_exc()) unknown_error = True break if unknown_error: break return i, records
def test_retry_on_custom_code(self): mock_response = Mock(status_code=500, raise_for_status=Mock(side_effect=HTTPError)) mock_get = Mock(return_value=mock_response) with patch.object(Session, 'get', mock_get): sickle = Sickle('url', max_retries=3, default_retry_after=0, retry_status_codes=(503, 500)) try: sickle.ListRecords() except HTTPError: pass mock_get.assert_called_with('url', params={'verb': 'ListRecords'}) self.assertEqual(4, mock_get.call_count)
def get_events(self, **kwargs): LOG.debug(f"Executing {PORTAL_NAME} get events") if not self.users: LOG.debug("no users. exiting.") return False records_url = self.portal.get("event_urls", {}).get("oai_pmh_url") last_run = datetime.now() most_recent_datetime = self.get_most_recent_date(self.users) if most_recent_datetime: LOG.debug("start date value found in tracker state db entry.") from_datetime_str = most_recent_datetime.strftime( "%Y-%m-%dT%H:%M:%SZ") from_datetime = most_recent_datetime LOG.debug("earliest date allowed: {}".format(from_datetime_str)) else: until = tracker_app.app.config.get("DISALLOW_EVENTS_BEFORE") if until: from_datetime = datetime.strptime(until, "%Y-%m-%dT%H:%M:%SZ") from_datetime_str = from_datetime.strftime( "%Y-%m-%dT%H:%M:%SZ") else: from_datetime = datetime.now() - timedelta(days=1) from_datetime_str = from_datetime.strftime( "%Y-%m-%dT%H:%M:%SZ") LOG.debug("searching oai-pmh interface: %s" % records_url) try: sickle = Sickle(records_url) records = sickle.ListRecords(**{ 'metadataPrefix': 'oai_dc', 'from': from_datetime_str }) if records.oai_response.http_response.status_code != 200: LOG.debug("non-200 response code received. " "updating tracker status and exiting.") self.complete_tracker( records.oai_response.http_response.status_code) return False except oaiexceptions.NoRecordsMatch: LOG.debug("end of records in oai-pmh response") self.complete_tracker( records.oai_response.http_response.status_code) return False self.parse_records(records, from_datetime, last_run)
def main(): # inputs sleep_ct = 900 # number of records until seconds sleep_time = 30 # secs base_url = 'http://export.arxiv.org/oai2' fname_prefix = "./raw_data/arXiv_oai_dc_" fname_log = "./raw_data/harvest.log" # create sickle sickle = Sickle(base_url) # get list of setSpecs ls_setSpec = get_ls_setSpec(sickle) ct_sets = len(ls_setSpec) # read log file to get last harvest date dt_last_harvest = get_dt_last_harvest(fname_log) # append records ct_records = 0 for setSpec in ls_setSpec: print setSpec # get data file fname_data = fname_prefix + setSpec.replace(":", "_") + ".oai" f_data = open(fname_data, 'a') # append records records = sickle.ListRecords(**{ "metadataPrefix": "oai_dc", "set": setSpec, "from": dt_last_harvest }) for record in records: ct_records += 1 f_data.write(str(record.metadata) + '\n') f_data.write(str(record.header) + '\n') if ct_records % sleep_ct == 0: print "sleep for %d secs" % (sleep_time) time.sleep(sleep_time) f_data.close() # log harvest logger = csv.writer(open(fname_log, 'a')) dt_prev = dt_last_harvest dt_curr = datetime.datetime.today().date() - relativedelta(days=1) logger.writerow([dt_curr, dt_prev, ct_sets, ct_records])
def list_set_records(setSpec): set_recs = [] sickle = Sickle(admin.get_repository_url()) try: recs = sickle.ListRecords(metadataPrefix='oai_dc', set=setSpec) for rec in recs: #rec = recs.next() set_recs.append({ "identifier": rec.header.identifier, "datestamp": rec.header.datestamp, "setSpec": rec.header.setSpecs, "dc": rec.metadata, }) except Exception as e: pass #return [rec_type, rec.metadata, rec.header.identifier, rec.header.setSpecs, rec.header.datestamp, rec.header.deleted, rec.raw] return set_recs
def run(self): """ Run the process for update Pre-prints in Solr. """ if self.args.delete: self.solr.delete(self.args.delete, commit=True) else: print("Indexing in {0}".format(self.solr.url)) sickle = Sickle(self.args.oai_url, verify=False) filters = {'metadataPrefix': 'oai_dc'} if self.args.time: filters['from'] = self.from_date.strftime("%Y-%m-%dT%H:%M:%SZ") try: records = sickle.ListRecords(**filters) except NoRecordsMatch as e: print(e) sys.exit(0) else: for i, record in enumerate(records): try: xml = self.pipeline_to_xml(record.xml) print("Indexing record %s with oai id: %s" % (i, record.header.identifier)) self.solr.update(xml, commit=True) except ValueError as e: print("ValueError: {0}".format(e)) print(e) continue except Exception as e: print("Error: {0}".format(e)) print(e) continue # optimize the index self.solr.commit() self.solr.optimize()
def test_retry_on_503(self): mock_response = Mock(status_code=503, headers={'retry-after': '10'}, raise_for_status=Mock(side_effect=HTTPError)) mock_get = Mock(return_value=mock_response) sleep_mock = Mock() with patch('time.sleep', sleep_mock): with patch.object(Session, 'get', mock_get): sickle = Sickle('url', max_retries=3, default_retry_after=0) try: sickle.ListRecords() except HTTPError: pass mock_get.assert_called_with('url', params={'verb': 'ListRecords'}) self.assertEqual(4, mock_get.call_count) self.assertEqual(3, sleep_mock.call_count) sleep_mock.assert_called_with(10)
def parse_list(self, response): sickle = Sickle(self.url) params = { 'metadataPrefix': self.format, 'set': response.meta['set'], 'from': response.meta['from_date'], 'until': self.until_date, } try: records = sickle.ListRecords(**params) except NoRecordsMatch as err: LOGGER.warning(err) raise StopIteration() # Avoid timing out the resumption token # TODO: implemente a storage-based solution, to be able to handle large # amounts of records. records = list(records) LOGGER.info( 'Harvested %s record for params %s', len(records), params, ) for record in records: rec_identifier = self.get_record_identifier(record) if rec_identifier in self._crawled_records: # avoid cross-set repeated records LOGGER.info('Skipping duplicated record %s', rec_identifier) continue LOGGER.debug( 'Not skipping non-duplicated record %s', rec_identifier, ) self._crawled_records[rec_identifier] = record response = XmlResponse(self.url, encoding='utf-8', body=record.raw) selector = Selector(response, type='xml') try: yield self.parse_record(selector) except Exception as err: LOGGER.error(err)
class DOABOAIClient(): def __init__(self): self._sickle = Sickle(const.DOAB_OAI_ENDPOINT) def fetch_records_for_publisher_id(self, publisher_id): return self._fetch_records(publisher_id=publisher_id) def fetch_all_records(self): return self._fetch_records() def _fetch_records(self, publisher_id=None): kwargs = { "metadataPrefix": "oai_dc", } if publisher_id is not None: kwargs["set"] = f"publisher_{publisher_id}" return (DOABRecord(record) for record in self._sickle.ListRecords(**kwargs) if record_is_active_book(record))
def run(self): """ Run the process for update Pre-prints in Solr. """ if self.args.delete: self.solr.delete(self.args.delete, commit=True) else: logger.info("Indexing in {0}".format(self.solr.url)) sickle = Sickle(self.args.oai_url) records = sickle.ListRecords( **{ 'metadataPrefix': 'oai_dc', 'from': self.from_date.strftime("%Y-%m-%dT%H:%M:%SZ") }) for record in records: try: xml = self.pipeline_to_xml(record.xml) self.solr.update(xml, commit=True) except ValueError as e: logger.error("ValueError: {0}".format(e)) logger.exception(e) continue except Exception as e: logger.error("Error: {0}".format(e)) logger.exception(e) continue # optimize the index self.solr.commit() self.solr.optimize()
# forces to output the result of the print command immediately, see: http://stackoverflow.com/questions/230751/how-to-flush-output-of-python-print sys.stdout.flush() runningFromWithinStabi = False # main PPN harvesting savedRecords = [] if runningFromWithinStabi: proxy = urllib.request.ProxyHandler({}) opener = urllib.request.build_opener(proxy) urllib.request.install_opener(opener) # create OAI-PMH reader pointing to the Stabi OAI-PMH endpoint of the digitzed collections sickle = Sickle('http://digital.staatsbibliothek-berlin.de/oai') records = sickle.ListRecords(metadataPrefix='oai_dc', set='DC_all') if True: printLog("Starting OAI-PMH record download...") # initialize some variables for counting and saving the metadata records savedDocs = 0 maxDocs = 146000 # 100 is just for testing, for more interesting results increase this value to 1000. ATTENTION! this will also take more time for reading data. # save the records locally as we don't want to have to rely on a connection to the OAI-PMH server all the time # iterate over all records until maxDocs is reached # ATTENTION! if you re-run this cell, the contents of the savedRecords array will be altered! for record in records: # check if we reach the maximum document value if savedDocs < maxDocs: savedDocs = savedDocs + 1