def setup_example(self): """ setup required params for the example: - create cerebralcortex-kernel object - generate sample phone-battery data/metadata - create a DataStream object - save sample stream using cerebralcortex-kernel. """ # create cerebralcortex object self.CC = Kernel("../../conf/") # sample data params self.stream_name = "BATTERY--org.md2k.phonesensor--PHONE" self.user_id = "00000000-afb8-476e-9872-6472b4e66b68" # generate sample phone-battery data/metadata data = gen_phone_battery_data(user_id=self.user_id) metadata = gen_phone_battery_metadata(stream_name=self.stream_name) # create a DataStream object ds = DataStream(data, metadata) # save sample data using cerebralcortex-kernal. # now we have some sample data stored in CerebralCortex format to play with!!! self.CC.save_stream(ds)
class TestCerebralCortex(unittest.TestCase, NoSqlStorageTest, SqlStorageTest): def setUp(self): """ Setup test params to being testing with. Notes: DO NOT CHANGE PARAMS DEFINED UNDER TEST-PARAMS! OTHERWISE TESTS WILL FAIL. These values are hardcoded in util/data_helper file as well. """ warnings.simplefilter("ignore") config_filepath = "./../../conf/" # create sample_data directory. Note: make sure this path is same as the filesystem path in cerebralcortex.yml pathlib.Path("./sample_data/").mkdir(parents=True, exist_ok=True) self.study_name = "default" self.CC = Kernel(config_filepath, study_name=self.study_name, new_study=True) self.cc_conf = self.CC.config # TEST-PARAMS # sql/nosql params self.stream_name = "battery--org.md2k.phonesensor--phone" self.stream_version = 1 self.metadata_hash = "48e367f8-12f1-32d5-8d18-d17a8dae82ef" self.username = "******" self.user_id = "bfb2ca0c-e19c-3956-9db2-5459ccadd40c" self.user_password = "******" self.user_password_encrypted = "10a6e6cc8311a3e2bcc09bf6c199adecd5dd59408c343e926b129c4914f3cb01" self.user_role = "test_role" self.auth_token = "xxx" self.user_metadata = {"study_name": self.study_name} self.user_settings = {"mcerebrum": "confs"} # object test params self.bucket_name = "test_bucket" self.obj_file_path = os.getcwd() + "/sample_data/objects/test_obj.zip" self.obj_metadata_file_path = os.getcwd( ) + "/sample_data/objects/test_obj.json" # kafka test params self.test_topic_name = "test_topic" self.test_message = "{'msg1':'some test message'}" def test_00(self): """ This test will create required entries in sql database. """ try: if not os.path.isdir( self.cc_conf["filesystem"]["filesystem_path"]): os.mkdir(self.cc_conf["filesystem"]["filesystem_path"]) self.CC.create_user(self.username, self.user_password, self.user_role, self.user_metadata, self.user_settings) except: pass
class Examples: def __init__(self): """ load/set example params/data. This example perform following operations: - create sample phone battery data stream - perform windowing operation on the stream - store windowed data asa new stream Args: example_name: windowing Notes: this example generatess ome random gps coordinates of Memphis, TN """ self.setup_example() self.gps_clustering() def setup_example(self): """ setup required params for the example: - create cerebralcortex-kernel object - generate sample phone-battery data/metadata - create a DataStream object - save sample stream using cerebralcortex-kernel. """ # create cerebralcortex object self.CC = Kernel("../../conf/") # sample data params self.stream_name = "LOCATION--org.md2k.phonesensor--PHONE" self.user_id = "00000000-afb8-476e-9872-6472b4e66b68" # generate sample phone-battery data/metadata ds = gen_location_datastream(user_id=self.user_id, stream_name=self.stream_name) # save sample data using cerebralcortex-kernal. # now we have some sample data stored in CerebralCortex format to play with!!! self.CC.save_stream(ds) def gps_clustering(self): # create CC Kernel object # get stream data gps_stream = self.CC.get_stream( "LOCATION--org.md2k.phonesensor--PHONE") # apply GPS clustering algorithm centroids = gps_stream.groupby("user").compute(gps_clusters) # print the centroids print("*" * 10, " CLUSTER CENTROIDS COORDINATES ", "*" * 10) centroids.show(truncate=False)
def test_01_read_file(self): """ Test functionality related to save a stream """ config_filepath = "./../../conf/" self.CC = Kernel(config_filepath, new_study=True) df=self.CC.sparkSession.read.format(format).load("/Users/ali/IdeaProjects/MD2K_DATA/cc1/data/NU/00465b72-18db-3541-9698-56584da2ff2a/00465b72-18db-3541-9698-56584da2ff2a+17284+org.md2k.datakit+PRIVACY+PHONE.csv.bz2") df.show()
def test_00(self): warnings.simplefilter("ignore") config_filepath = "./../../conf/" # create sample_data directory. Note: make sure this path is same as the filesystem path in cerebralcortex.yml pathlib.Path("./sample_data/").mkdir(parents=True, exist_ok=True) self.study_name = "dbdp" self.CC = Kernel(config_filepath, study_name=self.study_name) spark = self.CC.sparkSession df = spark.read.load( "./sample_data/data/study=dbdp/stream=cgm_glucose_variability_metrics/" ) df.show() ds = DataStream(data=df, metadata=Metadata()) results = glucose_var(ds) results.show()
from cerebralcortex.core.datatypes import DataStream from cerebralcortex.core.metadata_manager.stream import Metadata from cerebralcortex.test_suite.util.data_helper import gen_phone_battery_data, gen_phone_battery_metadata class ImportData: def test_01_read_file(self): """ Test functionality related to save a stream """ config_filepath = "./../../conf/" self.CC = Kernel(config_filepath, new_study=True) df=self.CC.sparkSession.read.format(format).load("/Users/ali/IdeaProjects/MD2K_DATA/cc1/data/NU/00465b72-18db-3541-9698-56584da2ff2a/00465b72-18db-3541-9698-56584da2ff2a+17284+org.md2k.datakit+PRIVACY+PHONE.csv.bz2") df.show() config_filepath = "./../../conf/" CC = Kernel(config_filepath, new_study=True) column_names = ["timestamp", "user", "version", "latitude", "longitude", "altitude", "speed", "bearing","accuracy"] #column_names = ["timestamp","localtime","ecg","version", "user"] df = CC.read_csv("/Users/ali/IdeaProjects/MD2K_DATA/demo/csv_data/gps_with_timestamp_column.csv", stream_name="sample-stream-data", column_names=column_names) #df=CC.sparkSession.read.format("csv").schema("ts Date, offset Integer, data String").load("/Users/ali/IdeaProjects/MD2K_DATA/cc1/data/NU/00465b72-18db-3541-9698-56584da2ff2a/00465b72-18db-3541-9698-56584da2ff2a+17284+org.md2k.datakit+PRIVACY+PHONE.csv.bz2") #CC.save_data_to_influxdb(df) ss=df.window(groupByColumnName=["user"]) print(type(ss)) df.show(truncate=False)
class Examples: def __init__(self, example_name): """ load/set example params/data. This example perform following operations: - create sample phone battery data stream - perform windowing operation on the stream - store windowed data asa new stream Args: example_name: windowing """ self.setup_example() if example_name == "window": self.window_example() def setup_example(self): """ setup required params for the example: - create cerebralcortex-kernel object - generate sample phone-battery data/metadata - create a DataStream object - save sample stream using cerebralcortex-kernel. """ # create cerebralcortex object self.CC = Kernel("../../conf/") # sample data params self.stream_name = "BATTERY--org.md2k.phonesensor--PHONE" self.user_id = "00000000-afb8-476e-9872-6472b4e66b68" # generate sample phone-battery data/metadata data = gen_phone_battery_data(user_id=self.user_id) metadata = gen_phone_battery_metadata(stream_name=self.stream_name) # create a DataStream object ds = DataStream(data, metadata) # save sample data using cerebralcortex-kernal. # now we have some sample data stored in CerebralCortex format to play with!!! self.CC.save_stream(ds) def window_example(self): """ This example will window phone battery stream into 1 minutes chunks and take the average of battery level """ # get sample stream data ds = self.CC.get_stream(self.stream_name) new_ds = ds.window(windowDuration=60) new_ds.show(5) # save newly create data as a new stream in cerebralcortex new_stream_name = "BATTERY--org.md2k.phonesensor--PHONE-windowed-data" new_ds.metadata.set_name(new_stream_name).set_description("1 minute windowed data of phone battery with average battery levels of each window.") \ .add_dataDescriptor( DataDescriptor().set_attribute("description", "start/end time of a window")) \ .add_dataDescriptor( DataDescriptor().set_attribute("description", "average battery values of a window")) \ .add_module( ModuleMetadata().set_name("cerebralcortex.examples.main").set_version("0.1.2").set_attribute("description", "CerebralCortex-kernel example code to window phone battery data").set_author( "test_user", "test_user@test_email.com")) if self.CC.save_stream(new_ds): print(new_stream_name, "has been stored.\n\n")
class Examples: def __init__(self): """ load/set example params/data. This example perform following operations: - create sample phone battery data stream - perform windowing operation on the stream - store windowed data asa new stream Args: example_name: windowing """ self.setup_example() self.basic_datastream_operations() def setup_example(self): """ setup required params for the example: - create cerebralcortex-kernel object - generate sample phone-battery data/metadata - create a DataStream object - save sample stream using cerebralcortex-kernel. """ # create cerebralcortex object self.CC = Kernel("../../conf/") # sample data params self.stream_name="BATTERY--org.md2k.phonesensor--PHONE" self.user_id = "00000000-afb8-476e-9872-6472b4e66b68" # generate sample phone-battery data/metadata data = gen_phone_battery_data(user_id=self.user_id) metadata = gen_phone_battery_metadata(stream_name=self.stream_name) # create a DataStream object ds = DataStream(data, metadata) # save sample data using cerebralcortex-kernal. # now we have some sample data stored in CerebralCortex format to play with!!! self.CC.save_stream(ds) def basic_datastream_operations(self): """ This example will window phone battery stream into 1 minutes chunks and take the average of battery level """ streams = self.CC.list_streams() print(streams) # [{'name': 'BATTERY--org.md2k.phonesensor--PHONE', 'version': 1, 'description': [''], 'metadata_hash':.......] stream_names = self.CC.search_stream("battery") print(stream_names) #['BATTERY--org.md2k.phonesensor--PHONE', 'BATTERY--org.md2k.phonesensor--PHONE-windowed-data'] # get sample stream data ds = self.CC.get_stream(self.stream_name) metadata = ds.get_metadata(version=1) print(metadata) # {'name': 'BATTERY--org.md2k.phonesensor--PHONE', 'version': 1, 'description': [''], 'metadata_ha......... ds.show(3,False) # +-------------------+--------+-------------+---+-------+------------------------------------+ # |timestamp |offset |battery_level|ver|version|user | # +-------------------+--------+-------------+---+-------+------------------------------------+ # |2019-01-09 11:49:28|21600000|92 |1 |1 |00000000-afb8-476e-9872-6472b4e66b68| # |2019-01-09 11:49:29|21600000|92 |1 |1 |00000000-afb8-476e-9872-6472b4e66b68| # |2019-01-09 11:49:30|21600000|92 |1 |1 |00000000-afb8-476e-9872-6472b4e66b68| # +-------------------+--------+-------------+---+-------+------------------------------------+ ds.filter("battery_level", ">", 97) ds.show(3,False) # +-------------------+--------+-------------+---+-------+------------------------------------+ # |timestamp |offset |battery_level|ver|version|user | # +-------------------+--------+-------------+---+-------+------------------------------------+ # |2019-01-09 11:39:08|21600000|98 |1 |1 |00000000-afb8-476e-9872-6472b4e66b68| # |2019-01-09 11:39:09|21600000|98 |1 |1 |00000000-afb8-476e-9872-6472b4e66b68| # |2019-01-09 11:39:10|21600000|98 |1 |1 |00000000-afb8-476e-9872-6472b4e66b68| # +-------------------+--------+-------------+---+-------+------------------------------------+ ds.filter_user("00000000-afb8-476e-9872-6472b4e66b68") ds.show(3,False) # +-------------------+--------+-------------+---+-------+------------------------------------+ # |timestamp |offset |battery_level|ver|version|user | # +-------------------+--------+-------------+---+-------+------------------------------------+ # |2019-01-09 11:39:08|21600000|98 |1 |1 |00000000-afb8-476e-9872-6472b4e66b68| # |2019-01-09 11:39:09|21600000|98 |1 |1 |00000000-afb8-476e-9872-6472b4e66b68| # |2019-01-09 11:39:10|21600000|98 |1 |1 |00000000-afb8-476e-9872-6472b4e66b68| # +-------------------+--------+-------------+---+-------+------------------------------------+ ds.filter_version("1") ds.show(3,False) # +-------------------+--------+-------------+---+-------+------------------------------------+ # |timestamp |offset |battery_level|ver|version|user | # +-------------------+--------+-------------+---+-------+------------------------------------+ # |2019-01-09 11:39:08|21600000|98 |1 |1 |00000000-afb8-476e-9872-6472b4e66b68| # |2019-01-09 11:39:09|21600000|98 |1 |1 |00000000-afb8-476e-9872-6472b4e66b68| # |2019-01-09 11:39:10|21600000|98 |1 |1 |00000000-afb8-476e-9872-6472b4e66b68| # +-------------------+--------+-------------+---+-------+------------------------------------+ ds.window(windowDuration=60) ds.show(3, False) # +------------------------------------+------------------------------------------+--------------------------------- # |user |window |collect_list(battery_level) # +------------------------------------+------------------------------------------+--------------------------------- # |00000000-afb8-476e-9872-6472b4e66b68|[2019-01-09 11:35:00, 2019-01-09 11:36:00]|[100, 100, 100, 100, 100......... # |00000000-afb8-476e-9872-6472b4e66b68|[2019-01-09 11:38:00, 2019-01-09 11:39:00]|[99, 99, 99, 99, 99, 99, 99,..... # |00000000-afb8-476e-9872-6472b4e66b68|[2019-01-09 11:39:00, 2019-01-09 11:40:00]|[98, 98, 98, 98, 98, 98, 98,..... # +------------------------------------+------------------------------------------+--------------------------------- ds.to_pandas() print(ds.data)