def test_scrub_pii(self): path_to_data = os.path.join("tests", "datasets", "pii") dallinger.data._scrub_participant_table(path_to_data) with open_for_csv(os.path.join(path_to_data, "participant.csv"), 'r') as f: reader = csv.reader(f, delimiter=',') next(reader) # Skip the header for row in reader: assert "PII" not in row
def test_exported_database_includes_headers(self): export_dir = tempfile.mkdtemp() dallinger.data.copy_db_to_csv("dallinger", export_dir) network_table_path = os.path.join(export_dir, "network.csv") assert os.path.isfile(network_table_path) with open_for_csv(network_table_path, "r") as f: reader = csv.reader(f, delimiter=",") header = next(reader) assert "creation_time" in header
def test_copy_db_to_csv_includes_participant_data(self, db_session): dallinger.data.ingest_zip(self.bartlett_export) export_dir = tempfile.mkdtemp() dallinger.data.copy_db_to_csv("dallinger", export_dir, scrub_pii=False) participant_table_path = os.path.join(export_dir, "participant.csv") assert os.path.isfile(participant_table_path) with open_for_csv(participant_table_path, "r") as f: reader = csv.reader(f, delimiter=",") header = next(reader) row1 = next(reader) assert row1[header.index("worker_id")] == "SM6DMD"
def _scrub_participant_table(path_to_data): """Scrub PII from the given participant table.""" path = os.path.join(path_to_data, "participant.csv") with open_for_csv(path, 'r') as input, open("{}.0".format(path), 'w') as output: reader = csv.reader(input) writer = csv.writer(output) headers = next(reader) writer.writerow(headers) for i, row in enumerate(reader): row[headers.index("worker_id")] = row[headers.index("id")] row[headers.index("unique_id")] = "{}:{}".format( row[headers.index("id")], row[headers.index("assignment_id")]) writer.writerow(row) os.rename("{}.0".format(path), path)