コード例 #1
0
ファイル: test_data.py プロジェクト: istresearch/Dallinger
 def test_scrub_pii(self):
     path_to_data = os.path.join("tests", "datasets", "pii")
     dallinger.data._scrub_participant_table(path_to_data)
     with open_for_csv(os.path.join(path_to_data, "participant.csv"), 'r') as f:
         reader = csv.reader(f, delimiter=',')
         next(reader)  # Skip the header
         for row in reader:
             assert "PII" not in row
コード例 #2
0
 def test_exported_database_includes_headers(self):
     export_dir = tempfile.mkdtemp()
     dallinger.data.copy_db_to_csv("dallinger", export_dir)
     network_table_path = os.path.join(export_dir, "network.csv")
     assert os.path.isfile(network_table_path)
     with open_for_csv(network_table_path, "r") as f:
         reader = csv.reader(f, delimiter=",")
         header = next(reader)
         assert "creation_time" in header
コード例 #3
0
 def test_copy_db_to_csv_includes_participant_data(self, db_session):
     dallinger.data.ingest_zip(self.bartlett_export)
     export_dir = tempfile.mkdtemp()
     dallinger.data.copy_db_to_csv("dallinger", export_dir, scrub_pii=False)
     participant_table_path = os.path.join(export_dir, "participant.csv")
     assert os.path.isfile(participant_table_path)
     with open_for_csv(participant_table_path, "r") as f:
         reader = csv.reader(f, delimiter=",")
         header = next(reader)
         row1 = next(reader)
         assert row1[header.index("worker_id")] == "SM6DMD"
コード例 #4
0
def _scrub_participant_table(path_to_data):
    """Scrub PII from the given participant table."""
    path = os.path.join(path_to_data, "participant.csv")
    with open_for_csv(path, 'r') as input, open("{}.0".format(path),
                                                'w') as output:
        reader = csv.reader(input)
        writer = csv.writer(output)
        headers = next(reader)
        writer.writerow(headers)
        for i, row in enumerate(reader):
            row[headers.index("worker_id")] = row[headers.index("id")]
            row[headers.index("unique_id")] = "{}:{}".format(
                row[headers.index("id")], row[headers.index("assignment_id")])
            writer.writerow(row)

        os.rename("{}.0".format(path), path)