async def get_thing( identifier: str, full: bool = False, format: isb_format.ISBFormat = isb_format.ISBFormat.ORIGINAL, session: Session = Depends(get_session), ): """Record for the specified identifier""" item = sqlmodel_database.get_thing_with_id(session, identifier) if item is None: raise fastapi.HTTPException( status_code=404, detail=f"Thing not found: {identifier}" ) if full or format == isb_format.ISBFormat.FULL: return item if format == isb_format.ISBFormat.CORE: authority_id = item.authority_id if authority_id == "SESAR": content = SESARTransformer(item.resolved_content).transform() elif authority_id == "GEOME": content = GEOMETransformer(item.resolved_content).transform() elif authority_id == "OPENCONTEXT": content = OpenContextTransformer(item.resolved_content).transform() elif authority_id == "SMITHSONIAN": content = SmithsonianTransformer(item.resolved_content).transform() else: raise fastapi.HTTPException( status_code=400, detail=f"Core format not available for authority_id: {authority_id}", ) else: content = item.resolved_content return fastapi.responses.JSONResponse( content=content, media_type=item.resolved_media_type )
async def _load_open_context_entries(session, max_count, start_from): L = get_logger() records = isb_lib.opencontext_adapter.OpenContextRecordIterator( max_entries=max_count, date_start=start_from, page_size=200) num_ids = 0 for record in records: L.info("got next id from open context %s", record) num_ids += 1 id = record["uri"] existing_thing = sqlmodel_database.get_thing_with_id(session, id) if existing_thing is not None: logging.info("Already have %s", id) isb_lib.opencontext_adapter.update_thing(existing_thing, record, datetime.datetime.now(), records.last_url_str()) session.commit() logging.info("Just saved existing thing") else: logging.debug("Don't have %s", id) thing = isb_lib.opencontext_adapter.load_thing( record, datetime.datetime.now(), records.last_url_str()) try: logging.debug("Going to add thing to session") session.add(thing) logging.debug("Added thing to session") session.commit() logging.debug("committed session") except sqlalchemy.exc.IntegrityError as e: session.rollback() logging.error("Item already exists: %s", record) L.info("total num records %d", num_ids)
def test_get_thing_with_id_thing(session: Session): id = "123456" new_thing = Thing(id=id, authority_id="test", resolved_url="http://foo.bar", resolved_status=200, resolved_content={"foo": "bar"}) session.add(new_thing) session.commit() shouldnt_be_none = get_thing_with_id(session, id) assert shouldnt_be_none is not None assert shouldnt_be_none.primary_key is not None assert id == shouldnt_be_none.id
def _save_record_to_db(session, file_path, record): id = record["id"] logging.info("got next id from smithsonian %s", id) existing_thing = get_thing_with_id(session, id) if existing_thing is not None: logging.info("Already have %s", id) else: logging.debug("Don't have %s", id) thing = isb_lib.smithsonian_adapter.load_thing(record, datetime.datetime.now(), file_path) try: logging.debug("Going to add thing to session") session.add(thing) logging.debug("Added thing to session") session.commit() logging.debug("committed session") except sqlalchemy.exc.IntegrityError as e: session.rollback() logging.error("Item already exists: %s", record)
async def _loadSesarEntries(session, max_count, start_from=None): L = getLogger() futures = [] working = {} ids = isb_lib.sesar_adapter.SESARIdentifiersSitemap( max_entries=countThings(session) + max_count, date_start=start_from ) total_requested = 0 total_completed = 0 more_work = True num_prepared = BACKLOG_SIZE # Number of jobs to prepare for execution with concurrent.futures.ThreadPoolExecutor( max_workers=CONCURRENT_DOWNLOADS ) as executor: while more_work: # populate the futures list with work until the list is full # or there is no more work to get. while ( len(futures) < BACKLOG_SIZE and total_requested < max_count and num_prepared > 0 ): try: _id = next(ids) igsn = igsn_lib.normalize(_id[0]) existing_thing = sqlmodel_database.get_thing_with_id(session, isb_lib.sesar_adapter.fullIgsn(igsn)) if existing_thing is not None: logging.info("Already have %s at %s", igsn, existing_thing) future = executor.submit(wrapLoadThing, igsn, _id[1], existing_thing) else: future = executor.submit(wrapLoadThing, igsn, _id[1]) futures.append(future) working[igsn] = 0 total_requested += 1 except StopIteration as e: L.info("Reached end of identifier iteration.") num_prepared = 0 if total_requested >= max_count: num_prepared = 0 L.debug("%s", working) try: for fut in concurrent.futures.as_completed(futures, timeout=1): igsn, tc, _thing = fut.result() futures.remove(fut) if not _thing is None: try: session.add(_thing) session.commit() except sqlalchemy.exc.IntegrityError as e: session.rollback() logging.error("Item already exists: %s", _id[0]) # for _rel in _related: # try: # session.add(_rel) # session.commit() # except sqlalchemy.exc.IntegrityError as e: # L.debug(e) working.pop(igsn) total_completed += 1 else: if working.get(igsn, 0) < 3: if not igsn in working: working[igsn] = 1 else: working[igsn] += 1 L.info( "Failed to retrieve %s. Retry = %s", igsn, working[igsn] ) future = executor.submit(wrapLoadThing, igsn, tc) futures.append(future) else: L.error("Too many retries on %s", igsn) working.pop(igsn) except concurrent.futures.TimeoutError: # L.info("No futures to process") pass if len(futures) == 0 and num_prepared == 0: more_work = False if total_completed >= max_count: more_work = False L.info( "requested, completed, current = %s, %s, %s", total_requested, total_completed, len(futures), )
def test_get_thing_with_id_no_things(session: Session): should_be_none = get_thing_with_id(session, "12345") assert should_be_none is None