def process_staged_urls(): """Query download all staged URLs, Update Catalog and Document""" engine = db_connect() create_tables(engine) Session = sessionmaker(bind=engine) session = Session() # for event in session.query(EventStage).all(): # copy_event_from_stage(event) for url_record in session.query(UrlStage).all(): # print(url_record.url) place_record = session.query(Place). \ filter(Place.ocd_division_id == url_record.ocd_division_id).first() event_record = session.query(Event). \ filter(Event.ocd_division_id == url_record.ocd_division_id, Event.record_date == url_record.event_date, Event.name == url_record.event).first() print(f'place id: {place_record.id}\n event_id:{event_record.id}') catalog_entry = session.query(Catalog). \ filter(Catalog.url_hash == url_record.url_hash).first() # Document already exists in catalog if catalog_entry: catalog_id = catalog_entry.id print(f'catalog_id---------{catalog_id}') document = map_document( url_record, place_record.id, event_record.id, catalog_id) save_record(document) print("existing in catalog adding reference to document") else: print("Does not exist") # Download and save document catalog = Catalog( url=url_record.url, url_hash=url_record.url_hash, location='placeholder', filename=f'{url_record.url_hash}.pdf' ) doc = Media(url_record) # download result = doc.gather() # Add to doc catalog if result: catalog.location = result catalog_id = save_record(catalog) # Add document reference document = map_document( url_record, place_record.id, event_record.id, catalog_id) doc_id = save_record(document) print(f'Added {url_record.url_hash} doc_id: {doc_id}')