def cleanup_index_batch(hepsubmission_record_ids, index): log.info('Cleaning up index for data records for hepsubmission IDs {0} to {1}'.format(hepsubmission_record_ids[0], hepsubmission_record_ids[-1])) # Find all datasubmission entries matching the given hepsubmission ids, # where the version is not the highest version present (i.e. there is not # a v2 record with the same associated_recid) d1 = aliased(DataSubmission) d2 = aliased(DataSubmission) qry = db.session.query(d1.associated_recid) \ .join(HEPSubmission, and_(HEPSubmission.publication_recid == d1.publication_recid, HEPSubmission.version == d1.version), isouter=True) \ .join(d2, and_(d1.associated_recid == d2.associated_recid, d1.version < d2.version), isouter=True) \ .filter(HEPSubmission.id.in_(hepsubmission_record_ids), d2.id == None) \ .order_by(d1.id) res = qry.all() ids = [x[0] for x in res] if ids: log.info(f'Deleting entries from index with ids {ids}') s = RecordsSearch(index=index).filter('terms', _id=ids) s.delete()