def export_matches(collection, authz): """Export the top N matches of cross-referencing for the given collection to an Excel formatted export.""" excel = ExcelWriter() headers = [ "Score", "Entity Name", "Entity Date", "Entity Countries", "Candidate Collection", "Candidate Name", "Candidate Date", "Candidate Countries", "Entity Link", "Candidate Link", ] sheet = excel.make_sheet("Cross-reference", headers) batch = [] for match in index.iter_matches(collection, authz): batch.append(match) if len(batch) >= BULK_PAGE: _iter_match_batch(excel, sheet, batch) batch = [] if len(batch): _iter_match_batch(excel, sheet, batch) return excel.get_bytesio()
def export_matches(export_id): """Export the top N matches of cross-referencing for the given collection to an Excel formatted export.""" export = Export.by_id(export_id) export_dir = ensure_path(mkdtemp(prefix="aleph.export.")) try: role = Role.by_id(export.creator_id) authz = Authz.from_role(role) collection = Collection.by_id(export.collection_id) file_name = "%s - Crossreference.xlsx" % collection.label file_path = export_dir.joinpath(file_name) excel = ExcelWriter() headers = [ "Score", "Entity Name", "Entity Date", "Entity Countries", "Candidate Collection", "Candidate Name", "Candidate Date", "Candidate Countries", "Entity Link", "Candidate Link", ] sheet = excel.make_sheet("Cross-reference", headers) batch = [] for match in iter_matches(collection, authz): batch.append(match) if len(batch) >= BULK_PAGE: _iter_match_batch(excel, sheet, batch) batch = [] if len(batch): _iter_match_batch(excel, sheet, batch) with open(file_path, "wb") as fp: buffer = excel.get_bytesio() for data in buffer: fp.write(data) complete_export(export_id, file_path) except Exception: log.exception("Failed to process export [%s]", export_id) export = Export.by_id(export_id) export.set_status(status=Status.FAILED) db.session.commit() finally: shutil.rmtree(export_dir)
def test_xref(self): matches = list(iter_matches(self.coll_a, self.authz)) assert 0 == len(matches), len(matches) xref_collection(self.stage, self.coll_a) matches = list(iter_matches(self.coll_a, self.authz)) assert 3 == len(matches), len(matches)