class DataSource(object): """ DataSource acts as an abstract representation of the data source, though in reality it also pulls its data from the XOMBIE stream. Handles pushing data to possibly multiple listeners in a thread-safe manner. class variables: sources - a mapping from signal-names to all live data sources class methods: find - Either finds the existing data source for some signal name, or creates a new one for that signal instance variables: name - the signal name that this data source tracks, in the format id-in-hex:message-name. For example, the identifier for the Tritium Motor Drive Command Motor current is "0x501:Motor Current" queue - the internal data queue that the data source uses to pull data from the stream in a thread-safe manner data - the GraphData object that handles filtering (not used right now) and storing the data for use with collections method summary: push - notifies all listeners that new data is pending and copies any data from the internal queue to the GraphData storage pull - pulls all data from a queue into the internal data queue. Intended for initializing with accumulated data """ def __init__(self, identifier, desc=None): self.name = identifier self.queue = PriorityQueue() self.data = GraphData([]) self.descriptor = desc self.last_received = datetime.datetime(1993, 6, 20) def __hash__(self): return hash(self.name) def __eq__(self, other): return self.name == other.name def put(self, point): "Add data from the stream to the internal data queue" time, datum = point self.queue.put(point) self.last_received = max(self.last_received, time) def pull(self): "Adds all of the data from the stream's queue to its internal queue" while not self.queue.empty(): self.data.addPoint(self.queue.get_nowait()) def __repr__(self): return "DataSource(%r)" % self.name
def __init__(self, args): self.animate_alarm = None self.save_csv = args.csv self.terminal = args.terminal self.json = args.json self.mode = GraphMode() self.data = GraphData(is_admin=is_admin) self.view = GraphView(self) # use the first mode as the default mode = self.get_modes()[0] self.mode.set_mode(mode) # update the view self.view.on_mode_change(mode) self.view.update_displayed_information()
def __init__(self, identifier, desc=None): self.name = identifier self.queue = PriorityQueue() self.data = GraphData([]) self.descriptor = desc self.last_received = datetime.datetime(1993, 6, 20)
class GraphClusterer(object): CODE_DIR = '/Users/divya/work/repo/Dissertation' #CODE_DIR = '/Users/divya/Documents/Dissertation/Dissertation' DATA_DIR = '/Users/divya/Documents/input/Dissertation/data' LOG_DIR = '/Users/divya/Documents/logs/Dissertation' configdata = ConfigData(CODE_DIR, DATA_DIR, LOG_DIR) graphdata = GraphData() def __init__(self, K): self.phase1data = None self.phase2data = None self.phase1_allKevaluationdata = None self.phase2_allKevaluationdata = None self.helper = MKNN_Helper() ############################################################## #1. Initialize MKNN #2. As a wrapper, call MKNN_worker for different values of K ############################################################## def MKNN_worker_wrapper(self): #GraphClusterer.configdata.logger.info("Initialization phase of GMKNN begins") self.MKNN_init() GraphClusterer.configdata.logger.info("Initialization phase of GMKNN ends.") GraphClusterer.configdata.logger.info("Working of G-MKNN begins") GraphClusterer.configdata.logger.info("Running G-MKNN for different values of K.") K_range = list(range((int)(GraphClusterer.configdata.K_min), (int)(GraphClusterer.configdata.K_max)+1)) for K in K_range: GraphClusterer.configdata.logger.info("Running G-MKNN for the value of K: ") GraphClusterer.configdata.logger.info(K) #Call MKNN_worker for the value of K self.MKNN_worker(K) #(CL_List_P2, CL_List_P1, SM, SM_orig, num_clusters_P2, num_clusters_P1, num_nodes) = MKNN_worker(K, max_num_clusters, SM, SM_orig, num_nodes, node_codes, currentdate_str, dataset_name, eval_results_dir, log) #Plot evaluation measures for all values of K for phase 1 self.phase1_allKevaluationdata.plot_evaluation_measures_for_all_K() #Plot evaluation measures for all values of K for phase 2 self.phase2_allKevaluationdata.plot_evaluation_measures_for_all_K() ############################################ #Set up configuration #Set up the input matrices for the algorithm ############################################ def MKNN_init(self): #setting up configuration GraphClusterer.configdata.do_config() GraphClusterer.configdata.data_dir logger = GraphClusterer.configdata.logger logger.debug("Debugging from inside MKNN_init method") #setting up the input matrices #Create SM and SM_orig GraphClusterer.graphdata.create_SM_from_relfile(GraphClusterer.configdata.inp_rel_file) #Expand SM GraphClusterer.graphdata.setup_expanded_SM(GraphClusterer.configdata.nhops, GraphClusterer.configdata.inp_rel_file) #Create Edge Objects GraphClusterer.graphdata.create_edge_objects() #self.helper.print_dict(GraphClusterer.graphdata.edge_dict) #self.helper.print_set(GraphClusterer.graphdata.node_dict[1].node_edges_dict[EdgeType.secondary]) #print(GraphClusterer.graphdata.edge_dict[011].edge_id) #Initialize all K evaluation objects for both phases self.phase1_allKevaluationdata = AllKEvaluationData(self.configdata, 1) #1 for phase=1 self.phase2_allKevaluationdata = AllKEvaluationData(self.configdata, 2) #2 for phase=2 ############################# #Run MKNN for one value of K ############################# def MKNN_worker(self, K): self.phase1data = Phase1Data(GraphClusterer.graphdata, GraphClusterer.configdata, K) self.MKNN_Phase1() self.phase2data = Phase2Data(GraphClusterer.graphdata, GraphClusterer.configdata, K, self.phase1data.cnodes_dict, self.phase1data.next_cluster_label, self.phase1data.num_clusters ) self.MKNN_Phase2() def MKNN_Phase1(self): #Initialize Phase 1 self.phase1data.initialize_phase() self.helper.print_list(self.phase1data.graphdata.node_dict[10].MKNN_list) print('Degree') print((self.phase1data.graphdata.node_dict[10].degree)) print('CI_list') #self.helper.print_list(self.phase1data.cluster_initiator_list) self.helper.convert_list_ids_to_codes(self.graphdata, self.phase1data.cluster_initiator_list) #print((self.phase1data.graphdata.CI_list[0])) #Execute Phase 1 self.phase1data.execute_phase() #Visualize Phase 1 results self.phase1data.visualize_phase() #Evaluate phase self.phase1data.evaluate_phase() self.phase1_allKevaluationdata.add_evaluation_for_K(self.phase1data.phase1_evaluation_data) def MKNN_Phase2(self): #Initialize phase 2 self.phase2data.initialize_phase() #self.helper.print_list(self.phase2data.c_SM) #self.helper.print_list(self.phase2data.c_SM_sort) #Execute phase 2 self.phase2data.execute_phase() #Visualize phase self.phase2data.visualize_phase() #Evaluate Phase self.phase2data.evaluate_phase() self.phase2_allKevaluationdata.add_evaluation_for_K(self.phase2data.phase2_evaluation_data) #self.helper.print_list(self.phase2data.phase2_evaluation_data.gold_standard_CL_list) self.helper.print_list(self.phase2data.phase2_evaluation_data.contingency_matrix) print("sensitivity:") print(self.phase2data.phase2_evaluation_data.sensitivity) print("PPV") print(self.phase2data.phase2_evaluation_data.PPV) print("accuracy") print(self.phase2data.phase2_evaluation_data.accuracy)
class GraphController: """ A class responsible for setting up the model and view and running the application. """ def __init__(self, args): self.animate_alarm = None self.save_csv = args.csv self.terminal = args.terminal self.json = args.json self.mode = GraphMode() self.data = GraphData(is_admin=is_admin) self.view = GraphView(self) # use the first mode as the default mode = self.get_modes()[0] self.mode.set_mode(mode) # update the view self.view.on_mode_change(mode) self.view.update_displayed_information() def get_modes(self): """Allow our view access to the list of modes.""" return self.mode.get_modes() def set_mode(self, m): """Allow our view to set the mode.""" rval = self.mode.set_mode(m) self.view.update_displayed_information() return rval def main(self): self.loop = MainLoop(self.view, DEFAULT_PALETTE) self.animate_graph() if not (self.terminal or self.json): self.loop.run() def animate_graph(self, loop=None, user_data=None): """update the graph and schedule the next update""" # Width of bar graph is needed to know how long of a list of data to keep self.data.update_data() if self.terminal: self.data.output_to_terminal() if self.json: self.data.output_json() if self.save_csv: self.data.output_to_csv(DEFAULT_CSV_FILE) self.view.update_displayed_information() self.animate_alarm = self.loop.set_alarm_in(UPDATE_INTERVAL, self.animate_graph) def start_stress(self): mode = self.mode if mode.get_current_mode() == 'Stress Operation': try: kill_child_processes(mode.get_stress_process()) except: logging.debug('Could not kill process') stress_cmd = [stress_program] if int(self.view.stress_menu.sqrt_workers) > 0: stress_cmd.append('-c') stress_cmd.append(self.view.stress_menu.sqrt_workers) if int(self.view.stress_menu.sync_workers) > 0: stress_cmd.append('-i') stress_cmd.append(self.view.stress_menu.sync_workers) if int(self.view.stress_menu.memory_workers) > 0: stress_cmd.append('--vm') stress_cmd.append(self.view.stress_menu.memory_workers) stress_cmd.append('--vm-bytes') stress_cmd.append(self.view.stress_menu.malloc_byte) stress_cmd.append('--vm-stride') stress_cmd.append(self.view.stress_menu.byte_touch_cnt) if self.view.stress_menu.no_malloc: stress_cmd.append('--vm-keep') if int(self.view.stress_menu.write_workers) > 0: stress_cmd.append('--hdd') stress_cmd.append(self.view.stress_menu.write_workers) stress_cmd.append('--hdd-bytes') stress_cmd.append(self.view.stress_menu.write_bytes) if self.view.stress_menu.time_out != 'none': stress_cmd.append('-t') stress_cmd.append(self.view.stress_menu.time_out) with open(os.devnull, 'w') as DEVNULL: try: stress_proc = subprocess.Popen(stress_cmd, stdout=DEVNULL, stderr=DEVNULL, shell=False) mode.set_stress_process(psutil.Process(stress_proc.pid)) except: logging.debug("Unable to start stress") self.data.max_perf_lost = 0 self.data.samples_taken = 0 elif mode.get_current_mode() == 'FIRESTARTER': logging.debug('Started FIRESTARTER mode') try: kill_child_processes(mode.get_stress_process()) except: logging.debug('Could not kill process') stress_cmd = [os.path.join(os.getcwd(), fire_starter)] with open(os.devnull, 'w') as DEVNULL: try: stress_proc = subprocess.Popen(stress_cmd, stdout=DEVNULL, stderr=DEVNULL, shell=False) mode.set_stress_process(psutil.Process(stress_proc.pid)) logging.debug('Started process' + str(mode.get_stress_process())) except: logging.debug("Unable to start stress") else: logging.debug('Regular operation mode') try: kill_child_processes(mode.get_stress_process()) except: try: logging.debug('Could not kill process' + str(mode.get_stress_process())) except: logging.debug('Could not kill process FIRESTARTER')
def NormalizeData(rootDir, codeDir, uniqueDayArray): """ Written by Kevin L. Turner Purpose: filter and resample date based on dataType Inputs: raw data array Outputs: processed data array Last Revised: April 2nd, 2019 """ import numpy as np import pandas as pd import sys import os sys.path.append(codeDir) from GraphData import GraphData # Load the baseline csv file baseFileStr = ("baselineInfo.csv") # Determine the number of unique file dates used in baseline calculations allDays = pd.read_csv(rootDir + baseFileStr) allDays['fileIDs'] = allDays['fileIDs'].str[2:8] uniqueDays = list(set(allDays.iloc[:, 0])) uniqueDays = np.sort(uniqueDays) # sort to ascending order # Create the list of all processed csv data files allProcDataFiles = [] for files in os.listdir(rootDir): if files.endswith('ProcData.csv'): allProcDataFiles.append(files) allProcDataFiles = [snip[0:15] for snip in allProcDataFiles] allProcDataFiles = np.sort(allProcDataFiles) # sort to ascending order for a in range(len(allProcDataFiles)): procDataFile = allProcDataFiles[a] print("\n\nNormalizing data from file number", a, "->", procDataFile) procData = pd.read_csv(rootDir + procDataFile + '_ProcData.csv') procData = procData.drop(columns='Unnamed: 0') day = procDataFile[0:6] baseData = uniqueDayArray.loc[[day]] dataTypes = list(baseData.columns.values) normData = pd.DataFrame() for b in range(len(dataTypes)): dataType = dataTypes[b] if dataType == 'forceSensor' or dataType == 'whiskerAngle': normArray = procData.loc[:, dataType] normArray = pd.Series.to_frame(normArray) else: baseVal = baseData.loc[:, dataType] dataArray = procData.loc[:, dataType] normArray = (dataArray - float(baseVal)) / float(baseVal) normArray = pd.Series.to_frame(normArray) normData = pd.concat([normData, normArray], axis=1) GraphData(normData, procDataFile, rootDir, 'Norm') normData.to_csv(rootDir + procDataFile + '_Normata.csv') return
def ConvMAT2CSV(rootDir, codeDir): """ Written by Christina Echagarruga and Kevin L. Turner to work with macOS/Unix-based systems Purpose: Extract data from .mat files and format into DataFrames Export as csv file Inputs: PythonData.mat files, animalNotes_baselines.mat file Outputs: .csv files Last Revised: April 2nd, 2019 """ from scipy.io import loadmat import numpy as np import pandas as pd import sys import os sys.path.append(codeDir) from PreProcData import ResampFiltData from GraphData import GraphData # Load the baseline file baseFileStr = ("baselineInfo.mat") baseData = loadmat(rootDir + baseFileStr) # Build list of keys and values for the baseline data baseVals = baseData['animalNotes_baselines'][0, 0] baseKeys = baseData['animalNotes_baselines'][0, 0].dtype.descr baseResultsArray = pd.DataFrame() # Assemble the baseline file keys and values into variables for a in range(len(baseKeys)): baseKey = baseKeys[a][0] baseVal = baseVals[baseKey][:] df = pd.DataFrame(baseVal) baseResultsArray = pd.concat([baseResultsArray, df], axis=1, ignore_index=True) for b in range(len(baseKeys)): baseResultsArray = baseResultsArray.rename({b: baseKeys[b][0]}, axis='columns') baseResultsArray.to_csv(rootDir + "baselineInfo.csv", encoding='utf-8', index=False) # Creating List of mat files to read allMatFiles = [] for files in os.listdir(rootDir): if files.endswith("PythonData.mat"): allMatFiles.append(files) # Create the matlab data csv file with the relevant information for c in range(len(allMatFiles)): fileStr = str(rootDir + allMatFiles[c]) print("\n\nPulling data from file number", c, "->", fileStr[51:]) matData = loadmat(fileStr) # Build list of keys and values for each entry in the structure matVals = matData['PythonData'][0, 0] matKeys = matData['PythonData'][0, 0].dtype.descr resultsArray = np.empty((0, 9000)) dataTypeArray = [] # Assemble the keys and values into variables for d in range(len(matKeys)): matKey = matKeys[d][0] matVal = np.squeeze( matVals[matKey][0][:] ) # squeeze is used to covert matlab (1,n) arrays into numpy (1,n) arrays. if matKey == 'rawNeural_LH': dataTypes = [ 'deltaBandPower_LH', 'thetaBandPower_LH', 'gammaBandPower_LH' ] for dT in range(len(dataTypes)): dataType = dataTypes[dT] result = list(ResampFiltData(dataType, matVal)) resultsArray = np.append(resultsArray, [result], axis=0) dataTypeArray.append(dataType) elif matKey == 'rawNeural_RH': dataTypes = [ 'deltaBandPower_RH', 'thetaBandPower_RH', 'gammaBandPower_RH' ] for dT in range(len(dataTypes)): dataType = dataTypes[dT] result = list(ResampFiltData(dataType, matVal)) resultsArray = np.append(resultsArray, [result], axis=0) dataTypeArray.append(dataType) elif matKey == 'EMG': dataType = 'EMG' result = list(ResampFiltData(dataType, matVal)) resultsArray = np.append(resultsArray, [result], axis=0) dataTypeArray.append(dataType) elif matKey == 'forceSensor': dataType = 'forceSensor' result = list(ResampFiltData(dataType, matVal)) resultsArray = np.append(resultsArray, [result], axis=0) dataTypeArray.append(dataType) elif matKey == 'whiskerAngle': dataType = 'whiskerAngle' result = list(ResampFiltData(dataType, matVal)) resultsArray = np.append(resultsArray, [result], axis=0) dataTypeArray.append(dataType) resultsArray = [*zip(*resultsArray)] allData = pd.DataFrame.from_records(resultsArray, columns=dataTypeArray) GraphData(allData, fileStr[51:66], rootDir, 'Proc') allData.to_csv(rootDir + fileStr[51:66] + '_ProcData.csv') return