def get_data(self, node):
        """
        Return telemetry data for the specified node
        :param node: InfoGraph node
        :return: pandas.DataFrame
        """
        queries = InfoGraphNode.get_queries(node)
        ret_val = pandas.DataFrame()
        try:
            ret_val = self._get_data(queries)
        except Exception as ex:
            LOG.debug("Exception in user code: \n{} {} {}".format('-' * 60),
                      traceback.print_exc(file=sys.stdout), '-' * 60)
        #ret_val.set_index(keys='timestamp')
        if InfoGraphNode.node_is_vm(node):
            if not ret_val.empty:
                ret_val.columns = tm_utils.clean_vm_telemetry_colnames(
                    ret_val.columns)

        return ret_val
Beispiel #2
0
    def run(self, workload, optimal_node_type='machine'):
        """
        Ranks machines by CPU utilization.

        :param workload: Contains workload related info and results.

        :return: heuristic results
        """
        workload_config = workload.get_configuration()
        graph = workload.get_latest_graph()
        if not graph:
            raise KeyError('No graph to be processed.')

        scores = LandscapeScore.utilization_scores(graph)
        scores_sat = LandscapeScore.saturation_scores(graph)
        heuristic_results = pd.DataFrame(columns=[
            'node_name',
            'type',
            'ipaddress',
            'compute utilization',
            'compute saturation',
            'memory utilization',
            'memory saturation',
            'network utilization',
            'network saturation',
            'disk utilization',
            'disk saturation',
        ])
        heuristic_results_nt = heuristic_results.copy()
        device_id_col_name = None
        project = None
        if workload_config.get('project'):
            project = workload_config['project']
            device_id_col_name = workload_config['project'] + '_device_id'
            heuristic_results[device_id_col_name] = None

        telemetry_filter = workload_config.get('telemetry_filter')
        for node in graph.nodes(data=True):
            node_name = InfoGraphNode.get_name(node)
            node_type = InfoGraphNode.get_type(node)
            list_node_name = node_name
            if node_type == optimal_node_type:
                if InfoGraphNode.node_is_vm(node):
                    vm_name = InfoGraphNode.get_properties(node).get('vm_name')
                    if vm_name:
                        list_node_name = vm_name
                data = {
                    'node_name':
                    list_node_name,
                    'type':
                    node_type,
                    'ipaddress':
                    InfoGraphNode.get_attributes(node).get('ipaddress'),
                    'compute utilization':
                    scores[node_name]['compute'],
                    'compute saturation':
                    scores_sat[node_name]['compute'],
                    'memory utilization':
                    scores[node_name]['memory'],
                    'memory saturation':
                    scores_sat[node_name]['memory'],
                    'network utilization':
                    scores[node_name]['network'],
                    'network saturation':
                    scores_sat[node_name]['network'],
                    'disk utilization':
                    scores[node_name]['disk'],
                    'disk saturation':
                    scores_sat[node_name]['disk']
                }
                if device_id_col_name:
                    dev_id = InfoGraphNode.get_properties(node).get(
                        device_id_col_name)
                    if project == 'mf2c':
                        dev_id = dev_id.replace('_', '-')
                    data[device_id_col_name] = dev_id
                if InfoGraphNode.get_properties(node).get(
                        "telemetry_data") is not None:
                    heuristic_results = heuristic_results.append(
                        data, ignore_index=True)
                elif not telemetry_filter:
                    heuristic_results_nt = heuristic_results.append(
                        data, ignore_index=True)

            if not workload.get_workload_name().startswith('optimal_'):
                if InfoGraphNode.get_type(
                        node
                ) == "docker_container" and optimal_node_type == 'machine':
                    node_name = InfoGraphNode.get_docker_id(node)
                    heuristic_results = heuristic_results.append(
                        {
                            'node_name': node_name,
                            'type': node_type,
                            'ipaddress': None,
                            'compute utilization':
                            scores[node_name]['compute'],
                            'compute saturation': None,
                            'memory utilization': scores[node_name]['memory'],
                            'memory saturation': None,
                            'network utilization':
                            scores[node_name]['network'],
                            'network saturation': None,
                            'disk utilization': scores[node_name]['disk'],
                            'disk saturation': None
                        },
                        ignore_index=True)
        sort_fields = ['compute utilization']
        sort_order = workload_config.get('sort_order')
        if sort_order:
            sort_fields = []
            for val in sort_order:
                if val == 'cpu':
                    sort_fields.append('compute utilization')
                if val == 'memory':
                    sort_fields.append('memory utilization')
                if val == 'network':
                    sort_fields.append('network utilization')
                if val == 'disk':
                    sort_fields.append('disk utilization')
        heuristic_results_nt = heuristic_results_nt.replace([0], [None])
        heuristic_results = heuristic_results.sort_values(by=sort_fields,
                                                          ascending=True)
        heuristic_results = heuristic_results.append(heuristic_results_nt,
                                                     ignore_index=True)
        workload.append_metadata(self.__filter_name__, heuristic_results)
        LOG.info('AVG: {}'.format(heuristic_results))
        return heuristic_results
    def run(self, workload):

        tmp_path = "/media/iolie/WORK/data/"

        # Extract data from Info Core
        service_subgraphs = workload.get_latest_graph()
        telemetry = {}
        cols = []
        if not service_subgraphs or len(service_subgraphs) == 0:
            return
        # first add telemetry data of all nodes to a dictionary
        print "Data merger started " + str(time.time())
        for subgraph in service_subgraphs:
            for node in subgraph.nodes(data=True):
                node_id = node[0]
                node_tm = InfoGraphNode.get_telemetry_data(node)
                if InfoGraphNode.node_is_vm(node):
                    if not node_tm.empty:
                        node_tm.columns = tm_utils.clean_vm_telemetry_colnames(node_tm.columns)
                        vm_name = InfoGraphNode.get_attributes(node).get("vm_name")
                        if vm_name:
                            node_id = vm_name
                if not node_tm.empty:
                    tm = telemetry.get(node_id)
                    if not isinstance(tm, pd.DataFrame):
                    #if not tm:
                        telemetry[node_id] = node_tm
                        #telemetry[node_id] = [node_tm]
                        #node_tm.to_csv(tmp_path+node_id, index=False)
                    else:
                        telemetry[node_id] = pd.concat([tm, node_tm])
                        #telemetry[node_id].append(node_tm)
                        #node_tm.to_csv(tmp_path + node_id, mode='a', header=False, index=False)
                InfoGraphNode.set_telemetry_data(node, pd.DataFrame())
        print "Data merger finished " + str(time.time())
        print telemetry.keys()
        print len(telemetry)
        # merge subgraphs
        graph = None
        counter = 0
        for subgraph in service_subgraphs:
            counter = counter + 1
            if not graph and len(subgraph.nodes()) > 0:
                graph = subgraph
            elif len(subgraph.nodes()) > 0:
                graphs.merge_graph(graph, subgraph)
            #print "Merged {} subgraphs out of {} subgraphs in all".format(counter, len(service_subgraphs))
        # merge telemetry data

        #for key in telemetry.keys():
        #    val = telemetry[key]
            # print key + ' {}'.format(len(val))
        #    if len(val) > 1:
        #        telemetry[key] = pd.concat(val)
        #    elif len(val) == 1:
        #        telemetry[key] = val[0]
            # print node_id + ', ' + str(time.time())
            # print "Merged telemetry data of {} nodes out of {} nodes in all".format(counter, len(telemetry.keys()))

        # set telemetry data on merged graph
        for node in graph.nodes(data=True):
            node_id = node[0]
            if InfoGraphNode.node_is_vm(node):
                vm_name = InfoGraphNode.get_attributes(node).get("vm_name")
                if vm_name:
                    node_id = vm_name
            tm = telemetry.get(node_id)
            #try:
            #    tm = pd.read_csv(tmp_path + node_id)
            #except:
            #    tm = pd.DataFrame()
            if isinstance(tm, pd.DataFrame):
                if not tm.empty:
                    InfoGraphNode.set_telemetry_data(node, tm)
                    del telemetry[node_id]  # delete telemetry data so that only one copy exists in the graph
            else:
                InfoGraphNode.set_telemetry_data(node, None)
            # print "Set telemetry data of node {}".format(node_id)
        print "Set telemetry data of merged graph"
        workload.save_results(self.__filter_name__, graph)
        return graph