def setup_example(self): """ setup required params for the example: - create cerebralcortex-kernel object - generate sample phone-battery data/metadata - create a DataStream object - save sample stream using cerebralcortex-kernel. """ # create cerebralcortex object self.CC = Kernel("../../conf/") # sample data params self.stream_name = "BATTERY--org.md2k.phonesensor--PHONE" self.user_id = "00000000-afb8-476e-9872-6472b4e66b68" # generate sample phone-battery data/metadata data = gen_phone_battery_data(user_id=self.user_id) metadata = gen_phone_battery_metadata(stream_name=self.stream_name) # create a DataStream object ds = DataStream(data, metadata) # save sample data using cerebralcortex-kernal. # now we have some sample data stored in CerebralCortex format to play with!!! self.CC.save_stream(ds)
def test_01_read_file(self): """ Test functionality related to save a stream """ config_filepath = "./../../conf/" self.CC = Kernel(config_filepath, new_study=True) df=self.CC.sparkSession.read.format(format).load("/Users/ali/IdeaProjects/MD2K_DATA/cc1/data/NU/00465b72-18db-3541-9698-56584da2ff2a/00465b72-18db-3541-9698-56584da2ff2a+17284+org.md2k.datakit+PRIVACY+PHONE.csv.bz2") df.show()
def setUp(self): """ Setup test params to being testing with. Notes: DO NOT CHANGE PARAMS DEFINED UNDER TEST-PARAMS! OTHERWISE TESTS WILL FAIL. These values are hardcoded in util/data_helper file as well. """ warnings.simplefilter("ignore") config_filepath = "./../../conf/" # create sample_data directory. Note: make sure this path is same as the filesystem path in cerebralcortex.yml pathlib.Path("./sample_data/").mkdir(parents=True, exist_ok=True) self.study_name = "default" self.CC = Kernel(config_filepath, study_name=self.study_name, new_study=True) self.cc_conf = self.CC.config # TEST-PARAMS # sql/nosql params self.stream_name = "battery--org.md2k.phonesensor--phone" self.stream_version = 1 self.metadata_hash = "48e367f8-12f1-32d5-8d18-d17a8dae82ef" self.username = "******" self.user_id = "bfb2ca0c-e19c-3956-9db2-5459ccadd40c" self.user_password = "******" self.user_password_encrypted = "10a6e6cc8311a3e2bcc09bf6c199adecd5dd59408c343e926b129c4914f3cb01" self.user_role = "test_role" self.auth_token = "xxx" self.user_metadata = {"study_name": self.study_name} self.user_settings = { 'mcerebrum': 'confs', 'username': self.username, 'user_id': self.user_id } # object test params self.bucket_name = "test_bucket" self.obj_file_path = os.getcwd() + "/sample_data/objects/test_obj.zip" self.obj_metadata_file_path = os.getcwd( ) + "/sample_data/objects/test_obj.json" # kafka test params self.test_topic_name = "test_topic" self.test_message = "{'msg1':'some test message'}"
def test_00(self): warnings.simplefilter("ignore") config_filepath = "./../../conf/" # create sample_data directory. Note: make sure this path is same as the filesystem path in cerebralcortex.yml pathlib.Path("./sample_data/").mkdir(parents=True, exist_ok=True) self.study_name = "dbdp" self.CC = Kernel(config_filepath, study_name=self.study_name) spark = self.CC.sparkSession df = spark.read.load( "./sample_data/data/study=dbdp/stream=cgm_glucose_variability_metrics/" ) df.show() ds = DataStream(data=df, metadata=Metadata()) results = glucose_var(ds) results.show()
from cerebralcortex.core.datatypes import DataStream from cerebralcortex.core.metadata_manager.stream import Metadata from cerebralcortex.test_suite.util.data_helper import gen_phone_battery_data, gen_phone_battery_metadata class ImportData: def test_01_read_file(self): """ Test functionality related to save a stream """ config_filepath = "./../../conf/" self.CC = Kernel(config_filepath, new_study=True) df=self.CC.sparkSession.read.format(format).load("/Users/ali/IdeaProjects/MD2K_DATA/cc1/data/NU/00465b72-18db-3541-9698-56584da2ff2a/00465b72-18db-3541-9698-56584da2ff2a+17284+org.md2k.datakit+PRIVACY+PHONE.csv.bz2") df.show() config_filepath = "./../../conf/" CC = Kernel(config_filepath, new_study=True) column_names = ["timestamp", "user", "version", "latitude", "longitude", "altitude", "speed", "bearing","accuracy"] #column_names = ["timestamp","localtime","ecg","version", "user"] df = CC.read_csv("/Users/ali/IdeaProjects/MD2K_DATA/demo/csv_data/gps_with_timestamp_column.csv", stream_name="sample-stream-data", column_names=column_names) #df=CC.sparkSession.read.format("csv").schema("ts Date, offset Integer, data String").load("/Users/ali/IdeaProjects/MD2K_DATA/cc1/data/NU/00465b72-18db-3541-9698-56584da2ff2a/00465b72-18db-3541-9698-56584da2ff2a+17284+org.md2k.datakit+PRIVACY+PHONE.csv.bz2") #CC.save_data_to_influxdb(df) ss=df.window(groupByColumnName=["user"]) print(type(ss)) df.show(truncate=False)