def test_valueholder_negpos(): neg, zero, pos = ValueHolder(-1), ValueHolder(0), ValueHolder(1) assert -neg == pos assert -pos == neg assert -zero == zero assert +pos == pos assert +neg == neg
def test_valueholder_ordering(rlo, rhi): vlo, vhi = ValueHolder(rlo), ValueHolder(rhi) for lo in (rlo, vlo): for hi in (rhi, vhi): assert lo < hi assert hi > lo assert lo <= lo assert not (lo < lo) assert lo >= lo
def test_valueholders_containers(): x = ValueHolder({1, 2, 3, 5, 8, 13}) assert 5 in x assert 42 not in x y = ValueHolder({"foo": "bar", "corp": "acme"}) assert "foo" in y assert y["foo"] == "bar" with pytest.raises(KeyError): y["no"] y["no"] = "oh, wait" assert "no" in y assert "oh, wait" == y["no"]
def test_valueholder_integer_operations(x, y, operation, inplace_operation): v = ValueHolder(x) is_supported = operation not in unsupported_operations.get(type(x), set()) isdiv = ('div' in operation.__name__) or ('mod' in operation.__name__) # forward... with optional_contextmanager(pytest.raises(TypeError), ignore=is_supported): with optional_contextmanager(pytest.raises(ZeroDivisionError), ignore=y or not isdiv): assert operation(x, y) == operation(v, y) # backward... with optional_contextmanager(pytest.raises(TypeError), ignore=is_supported): with optional_contextmanager(pytest.raises(ZeroDivisionError), ignore=x or not isdiv): assert operation(y, x) == operation(y, v) # in place... if inplace_operation is not None: with optional_contextmanager(pytest.raises(TypeError), ignore=is_supported): with optional_contextmanager(pytest.raises(ZeroDivisionError), ignore=y or not isdiv): inplace_operation(v, y) assert v == operation(x, y)
def writer(self, context, fs, file, lineno): writer = csv.writer(file, delimiter=self.delimiter, quotechar=self.quotechar, lineterminator=self.eol) headers = ValueHolder(list(self.headers) if self.headers else None) yield writer, headers
def acc(self, context, **kwargs): centres = yield ValueHolder(set()) for centre in sorted(centres.get()): print(centre) if not NO_OUTPUT_FILE: with open("/tmp/message.txt", "w") as f: for centre in sorted(centres.get()): print(centre, file=f)
def test_valueholder(): x = ValueHolder(42) assert x == 42 x += 1 assert x == 43 assert x + 1 == 44 assert x == 43 y = ValueHolder(44) assert y == 44 y -= 1 assert y == 43 assert y - 1 == 42 assert y == 43 assert y == x assert y is not x assert repr(x) == repr(y) == repr(43)
def test_read_from_opendatasoft_api(): extract = OpenDataSoftAPI(dataset='test-a-set') with patch('requests.get', return_value=ResponseMock([{ 'fields': { 'foo': 'bar' } }, { 'fields': { 'foo': 'zab' } }])): for line in extract('http://example.com/', ValueHolder(0)): assert 'foo' in line
def lineno(self, context, fs, file): lineno = ValueHolder(0, type=int) yield lineno
def acc(self, context): centres = yield ValueHolder(set()) for centre in sorted(centres.get()): print(centre)
def start(self, context, base_url): yield ValueHolder(0)
def _count_counter(self, context): counter = ValueHolder(0) yield counter context.send(Bag(counter._value))
def buffer(self, context): buffer = yield ValueHolder([]) if len(buffer): last_value = buffer.get() last_value += [None] * (self.length - len(last_value)) context.send(*last_value)
def test_read_from_opendatasoft_api(): extract = OpenDataSoftAPI(dataset="test-a-set") with patch("requests.get", return_value=ResponseMock([{"fields": {"foo": "bar"}}, {"fields": {"foo": "zab"}}])): for line in extract("http://example.com/", ValueHolder(0)): assert "foo" in line
def test_valueholder_notequal(): x = ValueHolder(42) assert x != 41 assert not (x != 42)
def counter(self, context): yield ValueHolder(0)
def _count(self, context): counter = yield ValueHolder(0) context.send(counter.get())
def csv_headers(self, context, fs, file): yield ValueHolder(self.headers)
def unique_set(self, context): yield ValueHolder(set())
def pickle_headers(self, context, fs, file): yield ValueHolder(self.item_names)
def acc(self, context, *, s3client): records = yield ValueHolder(dict()) # At the end of the processing of all previous nodes, # it will continue from here values = records.get() images = [] csv_settings = dict(index=False, header=True) collection = {"archive": [], "path": []} batch_date = datetime.now().isoformat() for modality in ["ct", "xray", "mri"]: if modality in values: df = pd.DataFrame.from_dict(values[modality], orient="index") images += [df] file_name = f"{modality}.csv" output_path = file_name df.to_csv(output_path, **csv_settings) collection["archive"] += [modality] if not LOCAL_ONLY: output_path = f"{batch_date}/{file_name}" s3client.upload_file(output_path, file_name) collection["path"] += [output_path] patient = pd.DataFrame.from_dict(values["patient"], orient="index") file_name = "patient.csv" output_path = file_name patient.to_csv(output_path, **csv_settings) collection["archive"] += ["patient"] if not LOCAL_ONLY: output_path = f"{batch_date}/{file_name}" s3client.upload_file(output_path, file_name) collection["path"] += [output_path] patient = clean_data_df(patient, patient_df_pipeline) # This should really be called enriched patient_clean = patient_data_dicom_update(patient, images) file_name = "patient_clean.csv" output_path = file_name patient_clean.to_csv(output_path, **csv_settings) collection["archive"] += ["patient_clean"] if not LOCAL_ONLY: output_path = f"{batch_date}/{file_name}" s3client.upload_file(output_path, file_name) collection["path"] += [output_path] # Save storate stats storage_stats = ( pd.DataFrame.from_dict(values["stats"]) .reset_index() .rename(columns={"index": "prefix", 0: "storage"}) ) file_name = "storage.csv" output_path = file_name storage_stats.to_csv(output_path, **csv_settings) collection["archive"] += ["storage"] if not LOCAL_ONLY: output_path = f"{batch_date}/{file_name}" s3client.upload_file(output_path, file_name) collection["path"] += [output_path] # Save a list of latest files file_name = "latest.csv" output_path = file_name pd.DataFrame.from_dict(collection, orient="columns").to_csv( output_path, **csv_settings ) if not LOCAL_ONLY: s3client.upload_file(output_path, file_name)