Example #1
0
class DataSource(object):
    """
    DataSource acts as an abstract representation of the data source,
    though in reality it also pulls its data from the XOMBIE stream.
    Handles pushing data to possibly multiple listeners in a thread-safe manner.

    class variables:
        sources - a mapping from signal-names to all live data sources

    class methods:
        find - Either finds the existing data source for some signal name,
               or creates a new one for that signal

    instance variables:
        name  - the signal name that this data source tracks, in the format
                id-in-hex:message-name. For example, the identifier for
                the Tritium Motor Drive Command Motor current is
                "0x501:Motor Current"
        queue - the internal data queue that the data source uses to pull
                data from the stream in a thread-safe manner
        data  - the GraphData object that handles filtering (not used right now)
                and storing the data for use with collections

    method summary:
        push  - notifies all listeners that new data is pending and copies
                any data from the internal queue to the GraphData storage
        pull  - pulls all data from a queue into the internal data queue.
                Intended for initializing with accumulated data
    """
    def __init__(self, identifier, desc=None):
        self.name = identifier
        self.queue = PriorityQueue()
        self.data = GraphData([])
        self.descriptor = desc

        self.last_received = datetime.datetime(1993, 6, 20)

    def __hash__(self):
        return hash(self.name)

    def __eq__(self, other):
        return self.name == other.name

    def put(self, point):
        "Add data from the stream to the internal data queue"
        time, datum = point
        self.queue.put(point)
        self.last_received = max(self.last_received, time)

    def pull(self):
        "Adds all of the data from the stream's queue to its internal queue"
        while not self.queue.empty():
            self.data.addPoint(self.queue.get_nowait())

    def __repr__(self):
        return "DataSource(%r)" % self.name
Example #2
0
 def __init__(self, args):
     self.animate_alarm = None
     self.save_csv = args.csv
     self.terminal = args.terminal
     self.json = args.json
     self.mode = GraphMode()
     self.data = GraphData(is_admin=is_admin)
     self.view = GraphView(self)
     # use the first mode as the default
     mode = self.get_modes()[0]
     self.mode.set_mode(mode)
     # update the view
     self.view.on_mode_change(mode)
     self.view.update_displayed_information()
Example #3
0
    def __init__(self, identifier, desc=None):
        self.name = identifier
        self.queue = PriorityQueue()
        self.data = GraphData([])
        self.descriptor = desc

        self.last_received = datetime.datetime(1993, 6, 20)
Example #4
0
class GraphClusterer(object):
    CODE_DIR = '/Users/divya/work/repo/Dissertation'
    #CODE_DIR = '/Users/divya/Documents/Dissertation/Dissertation'
    DATA_DIR = '/Users/divya/Documents/input/Dissertation/data'
    LOG_DIR = '/Users/divya/Documents/logs/Dissertation'
    configdata = ConfigData(CODE_DIR, DATA_DIR, LOG_DIR)
    graphdata = GraphData()

    def __init__(self, K):
        self.phase1data = None
        self.phase2data = None
        self.phase1_allKevaluationdata = None
        self.phase2_allKevaluationdata = None
        self.helper = MKNN_Helper()

    ##############################################################
    #1. Initialize MKNN
    #2. As a wrapper, call MKNN_worker for different values of K
    ##############################################################
    def MKNN_worker_wrapper(self):
        #GraphClusterer.configdata.logger.info("Initialization phase of GMKNN begins")
        self.MKNN_init()
        GraphClusterer.configdata.logger.info("Initialization phase of GMKNN ends.")

        GraphClusterer.configdata.logger.info("Working of G-MKNN begins")
        GraphClusterer.configdata.logger.info("Running G-MKNN for different values of K.")

        K_range = list(range((int)(GraphClusterer.configdata.K_min), (int)(GraphClusterer.configdata.K_max)+1))

        for K in K_range:
            GraphClusterer.configdata.logger.info("Running G-MKNN for the value of K: ")
            GraphClusterer.configdata.logger.info(K)
            #Call MKNN_worker for the value of K
            self.MKNN_worker(K)
            #(CL_List_P2, CL_List_P1, SM, SM_orig, num_clusters_P2, num_clusters_P1, num_nodes) = MKNN_worker(K, max_num_clusters, SM, SM_orig, num_nodes, node_codes, currentdate_str, dataset_name, eval_results_dir, log)

        #Plot evaluation measures for all values of K for phase 1
        self.phase1_allKevaluationdata.plot_evaluation_measures_for_all_K()

        #Plot evaluation measures for all values of K for phase 2
        self.phase2_allKevaluationdata.plot_evaluation_measures_for_all_K()

    ############################################
    #Set up configuration
    #Set up the input matrices for the algorithm
    ############################################
    def MKNN_init(self):

        #setting up configuration
        GraphClusterer.configdata.do_config()
        GraphClusterer.configdata.data_dir
        logger = GraphClusterer.configdata.logger
        logger.debug("Debugging from inside MKNN_init method")

        #setting up the input matrices
        #Create SM and SM_orig
        GraphClusterer.graphdata.create_SM_from_relfile(GraphClusterer.configdata.inp_rel_file)

        #Expand SM
        GraphClusterer.graphdata.setup_expanded_SM(GraphClusterer.configdata.nhops, GraphClusterer.configdata.inp_rel_file)

        #Create Edge Objects
        GraphClusterer.graphdata.create_edge_objects()
        #self.helper.print_dict(GraphClusterer.graphdata.edge_dict)
        #self.helper.print_set(GraphClusterer.graphdata.node_dict[1].node_edges_dict[EdgeType.secondary])
        #print(GraphClusterer.graphdata.edge_dict[011].edge_id)

        #Initialize all K evaluation objects for both phases
        self.phase1_allKevaluationdata = AllKEvaluationData(self.configdata, 1) #1 for phase=1
        self.phase2_allKevaluationdata = AllKEvaluationData(self.configdata, 2) #2 for phase=2

    #############################
    #Run MKNN for one value of K
    #############################
    def MKNN_worker(self, K):
        self.phase1data = Phase1Data(GraphClusterer.graphdata,
                                     GraphClusterer.configdata,
                                     K)
        self.MKNN_Phase1()
        self.phase2data = Phase2Data(GraphClusterer.graphdata,
                                     GraphClusterer.configdata,
                                     K,
                                     self.phase1data.cnodes_dict,
                                     self.phase1data.next_cluster_label,
                                     self.phase1data.num_clusters
                                     )
        self.MKNN_Phase2()

    def MKNN_Phase1(self):

        #Initialize Phase 1
        self.phase1data.initialize_phase()
        self.helper.print_list(self.phase1data.graphdata.node_dict[10].MKNN_list)
        print('Degree')
        print((self.phase1data.graphdata.node_dict[10].degree))
        print('CI_list')
        #self.helper.print_list(self.phase1data.cluster_initiator_list)
        self.helper.convert_list_ids_to_codes(self.graphdata, self.phase1data.cluster_initiator_list)

        #print((self.phase1data.graphdata.CI_list[0]))

        #Execute Phase 1
        self.phase1data.execute_phase()


        #Visualize Phase 1 results
        self.phase1data.visualize_phase()

        #Evaluate phase
        self.phase1data.evaluate_phase()
        self.phase1_allKevaluationdata.add_evaluation_for_K(self.phase1data.phase1_evaluation_data)

    def MKNN_Phase2(self):

        #Initialize phase 2
        self.phase2data.initialize_phase()

        #self.helper.print_list(self.phase2data.c_SM)

        #self.helper.print_list(self.phase2data.c_SM_sort)

        #Execute phase 2
        self.phase2data.execute_phase()

        #Visualize phase
        self.phase2data.visualize_phase()

        #Evaluate Phase
        self.phase2data.evaluate_phase()
        self.phase2_allKevaluationdata.add_evaluation_for_K(self.phase2data.phase2_evaluation_data)

        #self.helper.print_list(self.phase2data.phase2_evaluation_data.gold_standard_CL_list)
        self.helper.print_list(self.phase2data.phase2_evaluation_data.contingency_matrix)
        print("sensitivity:")
        print(self.phase2data.phase2_evaluation_data.sensitivity)
        print("PPV")
        print(self.phase2data.phase2_evaluation_data.PPV)
        print("accuracy")
        print(self.phase2data.phase2_evaluation_data.accuracy)
Example #5
0
class GraphController:
    """
    A class responsible for setting up the model and view and running
    the application.
    """
    def __init__(self, args):
        self.animate_alarm = None
        self.save_csv = args.csv
        self.terminal = args.terminal
        self.json = args.json
        self.mode = GraphMode()
        self.data = GraphData(is_admin=is_admin)
        self.view = GraphView(self)
        # use the first mode as the default
        mode = self.get_modes()[0]
        self.mode.set_mode(mode)
        # update the view
        self.view.on_mode_change(mode)
        self.view.update_displayed_information()

    def get_modes(self):
        """Allow our view access to the list of modes."""
        return self.mode.get_modes()

    def set_mode(self, m):
        """Allow our view to set the mode."""
        rval = self.mode.set_mode(m)
        self.view.update_displayed_information()
        return rval

    def main(self):
        self.loop = MainLoop(self.view, DEFAULT_PALETTE)
        self.animate_graph()
        if not (self.terminal or self.json):
            self.loop.run()

    def animate_graph(self, loop=None, user_data=None):
        """update the graph and schedule the next update"""
        # Width of bar graph is needed to know how long of a list of data to keep
        self.data.update_data()
        if self.terminal:
            self.data.output_to_terminal()
        if self.json:
            self.data.output_json()
        if self.save_csv:
            self.data.output_to_csv(DEFAULT_CSV_FILE)
        self.view.update_displayed_information()
        self.animate_alarm = self.loop.set_alarm_in(UPDATE_INTERVAL,
                                                    self.animate_graph)

    def start_stress(self):
        mode = self.mode
        if mode.get_current_mode() == 'Stress Operation':
            try:
                kill_child_processes(mode.get_stress_process())
            except:
                logging.debug('Could not kill process')
            stress_cmd = [stress_program]
            if int(self.view.stress_menu.sqrt_workers) > 0:
                stress_cmd.append('-c')
                stress_cmd.append(self.view.stress_menu.sqrt_workers)

            if int(self.view.stress_menu.sync_workers) > 0:
                stress_cmd.append('-i')
                stress_cmd.append(self.view.stress_menu.sync_workers)

            if int(self.view.stress_menu.memory_workers) > 0:
                stress_cmd.append('--vm')
                stress_cmd.append(self.view.stress_menu.memory_workers)
                stress_cmd.append('--vm-bytes')
                stress_cmd.append(self.view.stress_menu.malloc_byte)
                stress_cmd.append('--vm-stride')
                stress_cmd.append(self.view.stress_menu.byte_touch_cnt)

            if self.view.stress_menu.no_malloc:
                stress_cmd.append('--vm-keep')

            if int(self.view.stress_menu.write_workers) > 0:
                stress_cmd.append('--hdd')
                stress_cmd.append(self.view.stress_menu.write_workers)
                stress_cmd.append('--hdd-bytes')
                stress_cmd.append(self.view.stress_menu.write_bytes)

            if self.view.stress_menu.time_out != 'none':
                stress_cmd.append('-t')
                stress_cmd.append(self.view.stress_menu.time_out)

            with open(os.devnull, 'w') as DEVNULL:
                try:
                    stress_proc = subprocess.Popen(stress_cmd,
                                                   stdout=DEVNULL,
                                                   stderr=DEVNULL,
                                                   shell=False)
                    mode.set_stress_process(psutil.Process(stress_proc.pid))
                except:
                    logging.debug("Unable to start stress")

            self.data.max_perf_lost = 0
            self.data.samples_taken = 0

        elif mode.get_current_mode() == 'FIRESTARTER':
            logging.debug('Started FIRESTARTER mode')
            try:
                kill_child_processes(mode.get_stress_process())
            except:
                logging.debug('Could not kill process')

            stress_cmd = [os.path.join(os.getcwd(), fire_starter)]
            with open(os.devnull, 'w') as DEVNULL:
                try:
                    stress_proc = subprocess.Popen(stress_cmd,
                                                   stdout=DEVNULL,
                                                   stderr=DEVNULL,
                                                   shell=False)
                    mode.set_stress_process(psutil.Process(stress_proc.pid))
                    logging.debug('Started process' +
                                  str(mode.get_stress_process()))
                except:
                    logging.debug("Unable to start stress")

        else:
            logging.debug('Regular operation mode')
            try:
                kill_child_processes(mode.get_stress_process())
            except:
                try:
                    logging.debug('Could not kill process' +
                                  str(mode.get_stress_process()))
                except:
                    logging.debug('Could not kill process FIRESTARTER')
def NormalizeData(rootDir, codeDir, uniqueDayArray):
    """
    Written by Kevin L. Turner 
    
    Purpose: filter and resample date based on dataType
        
    Inputs: raw data array
    
    Outputs: processed data array
    
    Last Revised: April 2nd, 2019
    """

    import numpy as np
    import pandas as pd
    import sys
    import os

    sys.path.append(codeDir)
    from GraphData import GraphData

    # Load the baseline csv file
    baseFileStr = ("baselineInfo.csv")

    # Determine the number of unique file dates used in baseline calculations
    allDays = pd.read_csv(rootDir + baseFileStr)
    allDays['fileIDs'] = allDays['fileIDs'].str[2:8]
    uniqueDays = list(set(allDays.iloc[:, 0]))
    uniqueDays = np.sort(uniqueDays)  # sort to ascending order

    # Create the list of all processed csv data files
    allProcDataFiles = []
    for files in os.listdir(rootDir):
        if files.endswith('ProcData.csv'):
            allProcDataFiles.append(files)

    allProcDataFiles = [snip[0:15] for snip in allProcDataFiles]
    allProcDataFiles = np.sort(allProcDataFiles)  # sort to ascending order

    for a in range(len(allProcDataFiles)):
        procDataFile = allProcDataFiles[a]
        print("\n\nNormalizing data from file number", a, "->", procDataFile)
        procData = pd.read_csv(rootDir + procDataFile + '_ProcData.csv')
        procData = procData.drop(columns='Unnamed: 0')
        day = procDataFile[0:6]
        baseData = uniqueDayArray.loc[[day]]
        dataTypes = list(baseData.columns.values)
        normData = pd.DataFrame()
        for b in range(len(dataTypes)):
            dataType = dataTypes[b]
            if dataType == 'forceSensor' or dataType == 'whiskerAngle':
                normArray = procData.loc[:, dataType]
                normArray = pd.Series.to_frame(normArray)
            else:
                baseVal = baseData.loc[:, dataType]
                dataArray = procData.loc[:, dataType]
                normArray = (dataArray - float(baseVal)) / float(baseVal)
                normArray = pd.Series.to_frame(normArray)
            normData = pd.concat([normData, normArray], axis=1)
        GraphData(normData, procDataFile, rootDir, 'Norm')
        normData.to_csv(rootDir + procDataFile + '_Normata.csv')

    return
def ConvMAT2CSV(rootDir, codeDir):
    """
    Written by Christina Echagarruga and Kevin L. Turner to work with macOS/Unix-based systems
    
    Purpose: Extract data from .mat files and format into DataFrames
        Export as csv file
        
    Inputs: PythonData.mat files, animalNotes_baselines.mat file
    
    Outputs: .csv files
    
    Last Revised: April 2nd, 2019
    """

    from scipy.io import loadmat
    import numpy as np
    import pandas as pd
    import sys
    import os

    sys.path.append(codeDir)
    from PreProcData import ResampFiltData
    from GraphData import GraphData

    # Load the baseline file
    baseFileStr = ("baselineInfo.mat")
    baseData = loadmat(rootDir + baseFileStr)

    # Build list of keys and values for the baseline data
    baseVals = baseData['animalNotes_baselines'][0, 0]
    baseKeys = baseData['animalNotes_baselines'][0, 0].dtype.descr
    baseResultsArray = pd.DataFrame()

    # Assemble the baseline file keys and values into variables
    for a in range(len(baseKeys)):
        baseKey = baseKeys[a][0]
        baseVal = baseVals[baseKey][:]
        df = pd.DataFrame(baseVal)
        baseResultsArray = pd.concat([baseResultsArray, df],
                                     axis=1,
                                     ignore_index=True)

    for b in range(len(baseKeys)):
        baseResultsArray = baseResultsArray.rename({b: baseKeys[b][0]},
                                                   axis='columns')

    baseResultsArray.to_csv(rootDir + "baselineInfo.csv",
                            encoding='utf-8',
                            index=False)

    # Creating List of mat files to read
    allMatFiles = []
    for files in os.listdir(rootDir):
        if files.endswith("PythonData.mat"):
            allMatFiles.append(files)

    # Create the matlab data csv file with the relevant information
    for c in range(len(allMatFiles)):
        fileStr = str(rootDir + allMatFiles[c])
        print("\n\nPulling data from file number", c, "->", fileStr[51:])
        matData = loadmat(fileStr)

        # Build list of keys and values for each entry in the structure
        matVals = matData['PythonData'][0, 0]
        matKeys = matData['PythonData'][0, 0].dtype.descr

        resultsArray = np.empty((0, 9000))
        dataTypeArray = []
        # Assemble the keys and values into variables
        for d in range(len(matKeys)):
            matKey = matKeys[d][0]
            matVal = np.squeeze(
                matVals[matKey][0][:]
            )  # squeeze is used to covert matlab (1,n) arrays into numpy (1,n) arrays.
            if matKey == 'rawNeural_LH':
                dataTypes = [
                    'deltaBandPower_LH', 'thetaBandPower_LH',
                    'gammaBandPower_LH'
                ]
                for dT in range(len(dataTypes)):
                    dataType = dataTypes[dT]
                    result = list(ResampFiltData(dataType, matVal))
                    resultsArray = np.append(resultsArray, [result], axis=0)
                    dataTypeArray.append(dataType)

            elif matKey == 'rawNeural_RH':
                dataTypes = [
                    'deltaBandPower_RH', 'thetaBandPower_RH',
                    'gammaBandPower_RH'
                ]
                for dT in range(len(dataTypes)):
                    dataType = dataTypes[dT]
                    result = list(ResampFiltData(dataType, matVal))
                    resultsArray = np.append(resultsArray, [result], axis=0)
                    dataTypeArray.append(dataType)

            elif matKey == 'EMG':
                dataType = 'EMG'
                result = list(ResampFiltData(dataType, matVal))
                resultsArray = np.append(resultsArray, [result], axis=0)
                dataTypeArray.append(dataType)

            elif matKey == 'forceSensor':
                dataType = 'forceSensor'
                result = list(ResampFiltData(dataType, matVal))
                resultsArray = np.append(resultsArray, [result], axis=0)
                dataTypeArray.append(dataType)

            elif matKey == 'whiskerAngle':
                dataType = 'whiskerAngle'
                result = list(ResampFiltData(dataType, matVal))
                resultsArray = np.append(resultsArray, [result], axis=0)
                dataTypeArray.append(dataType)

        resultsArray = [*zip(*resultsArray)]
        allData = pd.DataFrame.from_records(resultsArray,
                                            columns=dataTypeArray)
        GraphData(allData, fileStr[51:66], rootDir, 'Proc')
        allData.to_csv(rootDir + fileStr[51:66] + '_ProcData.csv')

    return