Esempio n. 1
0
class FeatureBuilder:
    def __init__(self, csv_file_name):
        self.df = None
        self.new_df = pd.DataFrame()
        self.read_data(csv_file_name)
        self.logger = Logger(self)

    def read_data(self, csv_file_name):
        self.df = pd.read_csv(csv_file_name)

    def hasattributes(self, obj, attr_list, filename):
        for attr in attr_list:
            if not hasattr(obj, attr):
                self.logger.log('warn', 'module [',filename, '] doesn\'t have [', attr, '], skip.')
                return False
        return True

    def build_new_features(self):
        mod = None
        for filename in glob.glob(FEATURE_MODULE_DIR+"/*.py"):
            if mod is not None:
                del mod
                mod = None
            module_name = os.path.splitext(os.path.basename(filename))[0]

            if module_name.startswith('__'):
                continue

            if config.black_white_list_use_mode == 'disabled':
                pass
            elif config.black_white_list_use_mode == 'black':
                if module_name in config.black_list:
                    continue
            elif config.black_white_list_use_mode == 'white':
                if module_name not in config.white_list:
                    continue
            elif config.black_white_list_use_mode == 'both':
                if module_name not in config.white_list or module_name in config.black_list:
                    continue

            self.logger.log('info', 'applying module [', module_name, ']')
            mod = __import__('modules.'+module_name)
            mod = getattr(mod, module_name)

            if not self.hasattributes(mod, ['input_columns', 'output_columns', 'run'], filename):
                continue
            input_columns = mod.input_columns
            output_columns = mod.output_columns
            run = mod.run

            if config.run_level is 1 and hasattr(mod, 'run_aggr'):
                if hasattr(mod, 'output_columns_aggr'):
                    run = mod.run_aggr
                    output_columns = mod.output_columns_aggr
                else:
                    self.logger.log('warn', 'module [', module_name, '] has run_aggr() but no output_columns_aggr, downgrade to run().')

            try:
                tuple_df = self.df.apply(lambda row: run(*row[input_columns]), axis=1)
                tmp_df = tuple_df.apply(pd.Series)
                tmp_df.columns = output_columns
            except ValueError, err:
                print err
                self.logger.log('warn', 'module [', module_name, '] has incorrect columns define, skip.')
                continue

            self.new_df = pd.concat([self.new_df,tmp_df], axis=1)
Esempio n. 2
0
class StixParseWorker:

    # stix_list=[]
    # indicator_list=[]
    stix_fields_list_of_list=[]
    indicator_fields_list_of_list=[]
    kill_chain_list_of_list = []

    stix_fields_dict_of_list={}
    indicator_fields_dict_of_list={}
    kill_chain_dict_of_list = {}

    stix_title_list = []
    stix_package_intent_list = []
    stix_description_list = []
    stix_marking_color_list = []
    stix_produced_time_list = []
    stix_indicator_id_list_list = []
    stix_kill_chain_id_list_list = []

    indicator_id_list = []
    indicator_description_list = []
    indicator_type_list = []

    def __init__(self):
        self.logger = Logger(self)

    def __mergeKillChainPhase(self, kill_chain):
        pass

    def __processKillChainFieldsList(self, kill_chain):
        pass

    def __processStixFieldsList(self, stix_package):

        assert isinstance(stix_package, STIXPackage)
        self.stix_title_list.append(stix_package.stix_header.title)
        self.stix_package_intent_list.append(stix_package.stix_header.package_intents[0].__str__())
        self.stix_description_list.append(stix_package.stix_header.description.__str__())
        # self.stix_marking_color_list.append('|'.join(ms.color for ms in stix_package.stix_header.handling.marking[0].marking_structures))
        tmpstr=''
        self.logger.log('handling type:',type(stix_package.stix_header.handling))
        self.logger.log('handling type:',type(stix_package.stix_header.handling.marking))
        self.logger.log('handling type:',type(stix_package.stix_header.handling.marking[0]))
        for ms in stix_package.stix_header.handling.marking[0].marking_structures:
            if isinstance(ms, TLPMarkingStructure):
                tmpstr += ms.color
        self.stix_marking_color_list.append(tmpstr)
        self.stix_produced_time_list.append(stix_package.stix_header.information_source.time.produced_time.to_dict())

        indicator_id_list = []
        self.logger.log('indicators type:', type(stix_package.indicators))
        self.logger.log('indicator[0] type:',type(stix_package.indicators[0]))
        for indicator in stix_package.indicators:
            self.logger.log('indicator type:',type(indicator))
            if not indicator.composite_indicator_expression:
                indicator_id_list.append(indicator.id_)
                self.__processIndicatorFieldsList(indicator)
        ind_ids_str = '|'.join(id for id in indicator_id_list)
        self.stix_indicator_id_list_list.append(ind_ids_str)

        # kill_chain_id_list = []
        # for kill_chain in stix_package.kill_chains:
        #     kill_chain_id_list.append(kill_chain.id)
        #     self.__processKillChainFieldsList(kill_chain)
        # kchain_ids_str = '|'.join(id for id in kill_chain_id_list)
        # self.stix_kill_chain_id_list_list.append(kchain_ids_str)


    def __processIndicatorFieldsList(self, indicator):
        self.indicator_id_list.append(indicator.id_)
        self.indicator_description_list.append(indicator.description.__str__())
        self.indicator_type_list.append(indicator.indicator_types[0].__str__())

    def consumeStix(self, stix_package):
        self.__processStixFieldsList(stix_package)

        self.stix_fields_list_of_list.append(self.stix_title_list)
        self.stix_fields_list_of_list.append(self.stix_package_intent_list)
        self.stix_fields_list_of_list.append(self.stix_description_list)
        self.stix_fields_list_of_list.append(self.stix_marking_color_list)
        self.stix_fields_list_of_list.append(self.stix_produced_time_list)
        self.stix_fields_list_of_list.append(self.stix_indicator_id_list_list)
        self.stix_fields_list_of_list.append(self.stix_kill_chain_id_list_list)

        self.stix_fields_dict_of_list['title'] = self.stix_title_list
        self.stix_fields_dict_of_list['package_intent'] = self.stix_package_intent_list
        self.stix_fields_dict_of_list['description'] = self.stix_description_list
        self.stix_fields_dict_of_list['marking_color'] = self.stix_marking_color_list
        self.stix_fields_dict_of_list['produced_time'] = self.stix_produced_time_list
        self.stix_fields_dict_of_list['indicators_ids'] = self.stix_indicator_id_list_list
        self.stix_fields_dict_of_list['kill_chains_ids'] = self.stix_kill_chain_id_list_list

        self.indicator_fields_list_of_list.append(self.indicator_id_list)
        self.indicator_fields_list_of_list.append(self.indicator_description_list)
        self.indicator_fields_list_of_list.append(self.indicator_type_list)


    def getStixFieldsList(self):
        return self.stix_fields_list_of_list

    def getIndicatorFieldsList(self):
        return self.indicator_fields_list_of_list

    def getStixFieldsDict(self):
        return self.stix_fields_dict_of_list

    def getIndicatorFieldsDict(self):
        return self.indicator_fields_dict_of_list
Esempio n. 3
0
class GraphicWithDataWorker():
    def __init__(self):
        self.logger = Logger(self)
        self.is_clustering_node_by_name = False
        self.is_full_structure = False
        self.is_display_list_node_index = False
        self.is_save_avg_degree_connectivity = False
        self.clear_graph()
        self.hook_on_node = None
        self.all_avg_degree_con = {}

    # def initG(self):
    # self.G=nx.DiGraph()
    # self.G = nx.balanced_tree()
    # self.G=nx.Graph()
    # self.leafNode=[]
    # self.node_index=0
    # self.nodelabel_to_displaylabel_dict={}

    def set_is_clustering_node_by_name(self):
        self.is_clustering_node_by_name = True

    def set_is_full_structure(self):
        self.is_full_structure = True

    def set_is_display_list_node_index(self):
        self.is_display_list_node_index = True

    def set_hook_on_node(self, func_on_node):
        self.hook_on_node = func_on_node

    def set_is_save_avg_degree_connectivity(self):
        self.is_save_avg_degree_connectivity = True

    def clear_graph(self):
        self.G = nx.DiGraph()
        self.edge_weight_dict = {}
        self.is_first_node = True
        self.first_node_label = 'root_start_123123123123123'
        self.node_index = 0
        self.nodelabel_to_displaylabel_dict = {}

    def __getTreeID(self, node, label):
        # return str(type(node))+'@@'+str(label)
        typename = str(type(node))
        typename = typename.split('\'')[1]
        typename = typename.split('.')[-1]
        # return str(label)+'@'+typename
        if not self.is_clustering_node_by_name:
            displaylabel = str(label).split('|')[-1]
            nodelabel = str(self.node_index) + '|' + displaylabel
            self.node_index += 1
            self.nodelabel_to_displaylabel_dict[nodelabel] = displaylabel
            return nodelabel
        else:
            return str(label)
        # return str(type(node))

    def __getListObjTreeID(self, node, listname, i):
        typename = str(type(node))
        typename = typename.split('\'')[1]
        typename = typename.split('.')[-1]
        # return listname+'['+str(i)+']@'+typename
        if not self.is_clustering_node_by_name:
            if self.is_display_list_node_index:
                displaylabel = listname.split('|')[-1] + '[' + str(i) + ']'
            else:
                displaylabel = listname.split('|')[-1] + '[i]'
            nodelabel = str(self.node_index) + '|' + displaylabel
            self.node_index += 1
            self.nodelabel_to_displaylabel_dict[nodelabel] = displaylabel
            return nodelabel
        else:
            if self.is_display_list_node_index:
                return listname + '[' + str(i) + ']'
            else:
                return listname + '[i]'
        # return listname + '[i]'

    def __dumpObjFields(self, obj):
        return obj.__dict__['_fields']

    def dontDraw(self, label):
        # return False
        # return label[:5] == '<type' or label in ['id', 'valueOf_']
        return label in [self.first_node_label, 'id', 'valueOf_']

    def __add_edge_weight(self, fr, to):
        if not self.edge_weight_dict.has_key(fr):
            self.edge_weight_dict[fr] = {}
        if not self.edge_weight_dict[fr].has_key(to):
            self.edge_weight_dict[fr][to] = 0
        self.edge_weight_dict[fr][to] += 1

    def iterField(self,
                  cur_node,
                  cur_label,
                  father_id,
                  prefix='',
                  func_on_field=None):
        if self.hook_on_node is not None:
            # if hook return False, then skip this node.
            if not self.hook_on_node(cur_node):
                return
        # init the row if this is the first this father show up.
        # if not self.fieldTree.has_key(father_id):
        #     self.fieldTree[father_id] = []
        if not self.G.has_node(father_id) and not self.dontDraw(father_id):
            if father_id == self.first_node_label:
                self.logger.log('WTF')
            self.G.add_node(father_id)

        # combine node name and label and get an id used in the tree
        my_id = self.__getTreeID(cur_node, cur_label)

        # insert child node(this node) in father's row
        # if my_id not in self.fieldTree[father_id]:
        #     self.fieldTree[father_id].append(my_id)
        self.__add_edge_weight(father_id, my_id)
        if self.G.has_edge(father_id, my_id):
            self.logger.log('shouldnt have output', father_id, my_id)
        if not self.G.has_edge(father_id, my_id) and not self.dontDraw(
                father_id) and not self.dontDraw(my_id):
            self.G.add_edge(father_id, my_id)

        # if cur_node is a list or EntityList or TypedList or else, enum them
        # if isinstance(cur_node, list) or isinstance(cur_node, MutableSequence):
        '''
        if isinstance(cur_node, MutableSequence):
            for ind, item in enumerate(cur_node):
                if cur_label is 'Marking_Structure':
                    print father_id
                # i_label = cur_label + '['+str(ind)+']'
                # i_label = cur_label + '[i]'
                # i_label = self.__getListObjTreeID(cur_label, ind)
                # self.iterField(item, i_label, self.__getTreeID(cur_node, i_label), prefix + '--')
                i_label = self.__getListObjTreeID(item, my_id, ind)
                self.iterField(item, i_label, my_id, prefix + '--')
                # self.iterField(item, cur_label, self.__getTreeID(cur_node, cur_label), prefix + '--')
        '''

        # if cur_node has listable fields, enum them
        # if cur_label in ['kill_chain_phase','kill_chain_phases', 'kill_chain', 'kill_chains', 'TTPs', 'ttps']:
        if hasattr(cur_node, '_fields'):
            fdict = self.__dumpObjFields(cur_node)
            for f in fdict:
                # print prefix,f,'<<',fdict[f]#str(father)
                # if cur_node.__getattribute__(f):

                # self.iterField(fdict[f], str(f), self.__getTreeID(cur_node, cur_label), prefix + '--')
                # if isinstance(cur_node, MutableSequence) and isinstance(fdict[f], MutableSequence):
                # if (isinstance(cur_node, MutableSequence) or isinstance(cur_node, TypedList)) and (isinstance(fdict[f], MutableSequence) or isinstance(fdict[f], TypedList)):
                if (isinstance(cur_node, MutableSequence) and isinstance(
                        fdict[f], MutableSequence)) or isinstance(
                            fdict[f], TypedList) or isinstance(fdict[f], list):
                    # if (isinstance(fdict[f], MutableSequence) or isinstance(fdict[f], TypedList)):
                    for ind, item in enumerate(fdict[f]):
                        # i_label = cur_label + '['+str(ind)+']'
                        # i_label = cur_label + '[i]'
                        # i_label = self.__getListObjTreeID(cur_label, ind)
                        # self.iterField(item, i_label, self.__getTreeID(cur_node, i_label), prefix + '--')
                        i_label = self.__getListObjTreeID(item, str(f), ind)
                        self.iterField(item, i_label, my_id, prefix + '--')
                        # self.iterField(item, cur_label, self.__getTreeID(cur_node, cur_label), prefix + '--')

                elif self.is_full_structure or fdict[f] is not None:
                    self.iterField(fdict[f], str(f), my_id, prefix + '--')

    def __get_display_labels(self, G):
        labels = {}
        for node in G:
            labels[node] = self.nodelabel_to_displaylabel_dict[node]
        return labels

    def __set_edge_weights(self, G):
        for u, v, d in G.edges(data=True):
            d['weight'] = self.edge_weight_dict[u][v]

    def __get_edge_weights(self, G):
        return [G[u][v]['weight'] for u, v in G.edges()]

    def avg_degree_conn_to_console(self):
        self.logger.log('rst', 'average_degree_connectivity',
                        nx.average_degree_connectivity(self.G))

    def avg_degree_conn_save_to_dict(self, stixname):
        self.all_avg_degree_con[stixname] = nx.average_degree_connectivity(
            self.G)

    def get_edge_weight_dict(self):
        return self.edge_weight_dict

    def get_graph(self):
        return self.G

    def get_all_avg_degree_con(self):
        return self.all_avg_degree_con

    # def get_child_parent_lists(self):
    #     parent_list=[]
    #     child_list=[]
    #     for p in self.edge_weight_dict:
    #         parent_list.append(p)
    #         for c in self.edge_weight_dict[p]:
    #             try:
    #                 child_list.index(c)
    #             except ValueError, err:
    #                 child_list.append(c)
    #     return child_list, parent_list

    def draw(self,
             stix_name=0,
             is_width_as_weight=False,
             is_draw_min_spin_tree=False,
             pic_num_minspintree=100000):
        # nx.draw(self.G, with_labels=True)
        # nx.draw_graphviz(self.G)

        # nx.nx_agraph.write_dot(self.G, 'test.dot')
        # nx.draw(self.G, pos=graphviz_layout(self.G))

        self.DiG = self.G

        # self.DiG = nx.path_graph(6)
        # self.DiG.edge[1][2]['weight'] = 3

        if isinstance(stix_name, int):
            stix_name = '#' + stix_name
        plt.figure("Structure Tree for STIX PACKAGE [ " + stix_name + ' ]')
        # plt.title("stix structure tree")
        mng = plt.get_current_fig_manager()
        mng.resize(*mng.window.maxsize())
        pos = graphviz_layout(self.DiG, prog='dot', args='-Grankdir=LR')

        if is_width_as_weight:
            self.__set_edge_weights(self.DiG)
            weights = self.__get_edge_weights(self.DiG)

            nx.draw(self.DiG,
                    node_size=40,
                    pos=pos,
                    edge_color='y',
                    with_labels=False,
                    width=weights)
        else:
            nx.draw(self.DiG,
                    node_size=40,
                    pos=pos,
                    edge_color='y',
                    with_labels=False)
        if not self.is_clustering_node_by_name:
            labels = self.__get_display_labels(self.DiG)
            nx.draw_networkx_labels(self.DiG,
                                    pos=pos,
                                    labels=labels,
                                    font_color='b')
        else:
            nx.draw_networkx_labels(self.DiG, pos=pos, font_color='b')

        if is_draw_min_spin_tree:
            self.UnDiG = self.G.to_undirected()
            self.UnDiG = nx.minimum_spanning_tree(self.UnDiG)

            plt.figure("Minimun Spinning Tree for STIX PACKAGE [ " +
                       stix_name + ' ]')
            # plt.title("minimum spinning tree")
            mng = plt.get_current_fig_manager()
            mng.resize(*mng.window.maxsize())
            pos = graphviz_layout(self.UnDiG, prog='dot', args='-Grankdir=LR')
            nx.draw(self.UnDiG, node_size=40, pos=pos, edge_color='y')
            nx.draw_networkx_labels(self.UnDiG, pos=pos, font_color='b')
            # nx.draw_networkx_nodes(self.G, nodelist=self.leafNode, node_color='b')

        # nx.draw_graphviz(self.G,'dot')
        # nx.draw_networkx(self.G)
        # plt.show()
        # nx.draw_shell(self.G, with_labels=True)

    def draw_show(self):
        plt.show()

    def doYourWork(self, stix_package):
        if isinstance(stix_package, STIXPackage):
            start_label = 'stix_package'
        elif isinstance(stix_package, Indicator):
            start_label = 'Indicator'

        # self.initG()

        # self.logger.log('info', 'working: stix id:',stix_package.id_)
        # self.logger.log('info', 'working: stix kill...:',type(stix_package.ttps.kill_chains[0].kill_chain_phases))
        # self.logger.log('info', 'working: stix kill...:',stix_package.ttps.kill_chains[0].kill_chain_phases[1])
        # self.logger.log('info', 'working: stix kill...:',stix_package.ttps.kill_chains[0].kill_chain_phases[1])
        # self.logger.log('info', 'working: stix ttp:',type(stix_package.ttps))

        self.iterField(stix_package, start_label,
                       self.__getTreeID(STIXPackage, self.first_node_label),
                       '-')
Esempio n. 4
0
class GraphicWorker:
    '''
    Describe: Walk through each node/attribute, and record the father-child
              relationship in self.fieldTree

    Params: cur_node: current node;
            cur_label: the lable of current node
            father_id: the node name + the label of parent node
            prefix: use in case that want to pprint the layers, don't care about it otherwise
    '''
    def initG(self):
        self.G = nx.DiGraph()
        # self.G = nx.balanced_tree()
        # self.G=nx.Graph()
        # self.leafNode=[]
        self.logger = Logger(self)

    '''
    Describe: Combine node name and lable to a id used in the tree
    Params: node: the node
            label: the label of the node
    '''

    def __getTreeID(self, node, label):
        # return str(type(node))+'@@'+str(label)
        return str(label)

    def __dumpObjFields(self, obj):
        return obj.__dict__['_fields']

    def dontDraw(self, label):
        return False
        # return label[:5] == '<type' or label in ['id', 'valueOf_']
        # return label in ['id', 'valueOf_']

    def iterField(self, cur_node, cur_label, father_id, prefix=''):

        # init the row if this is the first this father show up.
        # if not self.fieldTree.has_key(father_id):
        #     self.fieldTree[father_id] = []
        if not self.G.has_node(father_id) and not self.dontDraw(father_id):
            self.G.add_node(father_id)

        # combine node name and label and get an id used in the tree
        my_id = self.__getTreeID(cur_node, cur_label)

        # insert child node(this node) in father's row
        # if my_id not in self.fieldTree[father_id]:
        #     self.fieldTree[father_id].append(my_id)
        if not self.G.has_edge(father_id, my_id) and not self.dontDraw(my_id):
            self.G.add_edge(father_id, my_id)

        # if cur_node is a list or EntityList or TypedList or else, enum them
        if isinstance(cur_node, list) or isinstance(cur_node, MutableSequence):
            for ind, item in enumerate(cur_node):
                # self.iterField(item, cur_label + '[i]', self.__getTreeID(cur_node, cur_label), prefix + '--')
                self.iterField(item, cur_label,
                               self.__getTreeID(cur_node, cur_label),
                               prefix + '--')

        # if cur_node has listable fields, enum them
        if hasattr(cur_node, '_fields'):
            fdict = self.__dumpObjFields(cur_node)
            for f in fdict:
                # print prefix,f,'<<',fdict[f]#str(father)
                self.iterField(fdict[f], str(f),
                               self.__getTreeID(cur_node, cur_label),
                               prefix + '--')

    def iterField_diff_color(self, cur_node, cur_label, father_id, prefix=''):

        # init the row if this is the first this father show up.
        # if not self.fieldTree.has_key(father_id):
        #     self.fieldTree[father_id] = []
        if not self.G.has_node(father_id) and not self.dontDraw(father_id):
            self.G.add_node(father_id)
            self.leafNode.append(father_id)

        # combine node name and label and get an id used in the tree
        my_id = self.__getTreeID(cur_node, cur_label)

        # insert child node(this node) in father's row
        # if my_id not in self.fieldTree[father_id]:
        #     self.fieldTree[father_id].append(my_id)
        if not self.G.has_edge(father_id, my_id) and not self.dontDraw(my_id):
            self.G.add_edge(father_id, my_id)
            if father_id in self.leafNode:
                self.leafNode.remove(father_id)

        # if cur_node is a list or EntityList or TypedList or else, enum them
        if isinstance(cur_node, list) or isinstance(cur_node, MutableSequence):
            for ind, item in enumerate(cur_node):
                # self.iterField(item, cur_label + '[i]', self.__getTreeID(cur_node, cur_label), prefix + '--')
                self.iterField(item, cur_label,
                               self.__getTreeID(cur_node, cur_label),
                               prefix + '--')

        # if cur_node has listable fields, enum them
        if hasattr(cur_node, '_fields'):
            fdict = self.__dumpObjFields(cur_node)
            for f in fdict:
                # print prefix,f,'<<',fdict[f]#str(father)
                self.iterField(fdict[f], str(f),
                               self.__getTreeID(cur_node, cur_label),
                               prefix + '--')

    def outputDegree(self, filename):
        degreelist = self.G.degree()
        self.logger.log('info', 'output degree list to:', filename)
        with open(filename, 'wb') as f:
            f.write('NODE NAME' + ',' + 'DEGREE\n')
            for k in degreelist:
                f.write(str(k) + ',' + str(degreelist[k]) + '\n')
        self.logger.log('info', 'output degree list done')

    def outputWeight(self, filename):
        self.outputDegree(filename)

    def draw(self):
        # nx.draw(self.G, with_labels=True)
        # nx.draw_graphviz(self.G)

        # nx.nx_agraph.write_dot(self.G, 'test.dot')
        # nx.draw(self.G, pos=graphviz_layout(self.G))
        self.DiG = self.G
        self.UnDiG = self.G.to_undirected()
        self.UnDiG = nx.minimum_spanning_tree(self.UnDiG)

        plt.figure(1)
        # plt.title("stix structure tree")
        mng = plt.get_current_fig_manager()
        mng.resize(*mng.window.maxsize())
        pos = graphviz_layout(self.UnDiG, prog='dot', args='-Grankdir=LR')
        nx.draw(self.UnDiG, node_size=40, pos=pos, edge_color='y')
        nx.draw_networkx_labels(self.UnDiG, pos=pos, font_color='b')

        plt.figure(2)
        # plt.title("minimum spinning tree")
        mng = plt.get_current_fig_manager()
        mng.resize(*mng.window.maxsize())
        pos = graphviz_layout(self.DiG, prog='dot', args='-Grankdir=LR')
        nx.draw(self.DiG, node_size=40, pos=pos, edge_color='y')
        nx.draw_networkx_labels(self.DiG, pos=pos, font_color='b')
        # nx.draw_networkx_nodes(self.G, nodelist=self.leafNode, node_color='b')

        # nx.draw_graphviz(self.G,'dot')
        # nx.draw_networkx(self.G)
        # plt.show()
        # nx.draw_shell(self.G, with_labels=True)
        plt.show()

    def doYourWork(self, stix_package):
        assert isinstance(stix_package, STIXPackage)

        self.initG()

        self.iterField(stix_package, 'stix_package',
                       self.__getTreeID(STIXPackage, 'root_start'), '-')
Esempio n. 5
0
class DataParsingFactory:
    def __init__(self):
        self.logger = Logger(self)
        self.tmp_G = None
        self.tmp_dict = None
        self.jobs = []
        self.time_per_job = []

    def stix_packages_fn_iterater(self, fn_or_dir, stopafter, onlyuse=None):
        # if fn_or_dir[-1] == '/':
        if os.path.isdir(fn_or_dir):
            self.is_dir = True
            for i in stixFileNameInDirectory(fn_or_dir,
                                             stopafter=stopafter,
                                             onlyuse=onlyuse):
                yield i
        else:
            self.is_dir = False
            for i in xmlFileName2EnumStixFileName(fn_or_dir,
                                                  stopafter=stopafter):
                yield i

    def node_iterator(self, stix_package, iter_start):
        if iter_start == 'indicator':
            if stix_package.indicators is None:
                return
            for i in stix_package.indicators:
                yield i
        else:  # use stix_package
            yield stix_package

    def __getParam(self, argname, nonevalue=None, errmsg=None):
        if errmsg is not None and not self.requirements.has_key(argname):
            self.logger.log('err', errmsg)
            exit(-1)
        return self.requirements[argname] if self.requirements.has_key(
            argname) else nonevalue

    def goFindSomeoneDoThisJob(self, *jobs, **requirements):
        self.requirements = requirements
        self.jobs.append(jobs)
        for job in jobs:
            self.logger.log('info', 'Start to work on', job)
            time_start = time.time()
            time_end = -1

            if job is JobType.ParseStixFromXmlAndPrintValuesToConsole:
                if not requirements.has_key('xmlfilename'):
                    self.logger.log(
                        'err', '{', job, '}',
                        'at least give me a xml file name to parse, please :)')
                    return -1
                xmlfilename = requirements['xmlfilename']
                stopAfterFinishRound = requirements[
                    'stopafter'] if requirements.has_key('stopafter') else -1
                justDoThisRound = requirements[
                    'justdo'] if requirements.has_key('justdo') else -1

                worker = StixParseWorker()
                for ind, stix_fn in enumerate(
                        self.stix_packages_fn_iterater(
                            xmlfilename, stopafter=stopAfterFinishRound)):
                    # if stopAfterFinishRound > -1:
                    #     if ind > stopAfterFinishRound:
                    #         break
                    if justDoThisRound > -1:
                        if ind is not justDoThisRound:
                            continue
                    self.logger.log(
                        'info', 'I\'m working on stix_package #' + str(ind))
                    stix_package = stixFileName2StixPackageObj(stix_fn)
                    worker.consumeStix(stix_package)

                stix_fields_list_of_list = worker.getStixFieldsList()
                stix_fields_dict_of_list = worker.getStixFieldsDict()
                pprintDict(stix_fields_dict_of_list)

            if job is JobType.AnalyzeStixFromXmlAndBuildFieldTree:
                if not requirements.has_key('xmlfilename'):
                    self.logger.log(
                        'err', '{', job, '}',
                        'at least give me a xml file name to parse, please :)')
                    return -1
                xmlfilename = requirements['xmlfilename']
                stopAfterFinishRound = requirements[
                    'stopafter'] if requirements.has_key('stopafter') else -1
                justDoThisRound = requirements[
                    'justdo'] if requirements.has_key('justdo') else -1

                worker = FieldsDocumentaryWorker()
                for ind, stix_fn in enumerate(
                        self.stix_packages_fn_iterater(
                            xmlfilename, stopafter=stopAfterFinishRound)):
                    # if stopAfterFinishRound > -1:
                    #     if ind > stopAfterFinishRound:
                    #         break
                    if justDoThisRound > -1:
                        if ind is not justDoThisRound:
                            continue
                    self.logger.log(
                        'info', 'I\'m working on stix_package #' + str(ind))
                    stix_package = stixFileName2StixPackageObj(stix_fn)
                    worker.consumeStix(stix_package)

                self.fieldTree = worker.getTree()

            if job is JobType.SaveFieldTree:
                if not hasattr(self, 'fieldTree'):
                    self.logger.log(
                        'err', '{', job, '}',
                        'we don\'t even a field-tree at this point to save, we need to build it first.'
                    )
                    return -1
                fieldtreepklfn = requirements[
                    'fieldtreepklfn'] if requirements.has_key(
                        'fieldtreepklfn') else DEFAULT_TREE_PICKLE_FILE_NAME
                worker = FieldsDocumentaryWorker()
                worker.saveTreeToFile(self.fieldTree, fieldtreepklfn)

            if job is JobType.LoadFieldTree:
                fieldtreepklfn = requirements[
                    'fieldtreepklfn'] if requirements.has_key(
                        'fieldtreepklfn') else DEFAULT_TREE_PICKLE_FILE_NAME
                worker = FieldsDocumentaryWorker()
                self.fieldTree = worker.loadTreeFrFile(fieldtreepklfn)

            if job is JobType.PrintFieldTreeToConsole:
                if not hasattr(self, 'fieldTree'):
                    self.logger.log(
                        'err', '{', job, '}',
                        'we don\'t even a field-tree at this point to save, we need to build it first.'
                    )
                    return -1
                worker = FieldsDocumentaryWorker()
                worker.printTree2Console(self.fieldTree)

            if job is JobType.PrintFieldTreeToCsvFile:
                if not hasattr(self, 'fieldTree'):
                    self.logger.log(
                        'err', '{', job, '}',
                        'we don\'t even a field-tree at this point to save, we need to build it first.'
                    )
                    return -1
                if not requirements.has_key('csvfilename'):
                    self.logger.log(
                        'err', '{', job, '}',
                        'we need a CSV file name to save your tree')
                    return -1
                csvfilename = requirements['csvfilename']
                worker = FieldsDocumentaryWorker()
                worker.printTree2Csv(self.fieldTree, csvfilename)
            '''
            notice: there are two job types
            '''
            if job in [
                    JobType.AnalyzeStixFromXmlAndDrawAGraph,
                    JobType.FeedDataAndDrawWeightedGraph
            ]:
                if not requirements.has_key('xmlfilename'):
                    self.logger.log(
                        'err', '{', job, '}',
                        'at least give me a xml file name to parse, please :)')
                    return -1
                xmlfilename = requirements['xmlfilename']
                stopAfterFinishRound = requirements[
                    'stopafter'] if requirements.has_key('stopafter') else -1
                justDoThisRound = requirements[
                    'justdo'] if requirements.has_key('justdo') else -1
                weightCsvFileName = requirements[
                    'csvfilename'] if requirements.has_key(
                        'csvfilename') else -1
                isdrawgraph = requirements[
                    'isdrawgraph'] if requirements.has_key(
                        'isdrawgraph') else False
                isforeachpackage = requirements[
                    'isforeachpackage'] if requirements.has_key(
                        'isforeachpackage') else False
                isdrawminspintree = requirements[
                    'isdrawminspintree'] if requirements.has_key(
                        'isdrawminspintree') else False
                iswidthasweight = requirements[
                    'iswidthasweight'] if requirements.has_key(
                        'iswidthasweight') else False
                isclusteringnodebyname = requirements[
                    'isclusteringnodebyname'] if requirements.has_key(
                        'isclusteringnodebyname') else False
                islistnodeidx = requirements[
                    'islistnodeidx'] if requirements.has_key(
                        'islistnodeidx') else False
                isfullstructure = requirements[
                    'isfullstructure'] if requirements.has_key(
                        'isfullstructure') else False
                isuseexistedtablenames = requirements[
                    'isuseexistedtablenames'] if requirements.has_key(
                        'isuseexistedtablenames') else False
                issavetablesforeachpackage = requirements[
                    'issavetablesforeachpackage'] if requirements.has_key(
                        'issavetablesforeachpackage') else False
                issaverowforeachpackage = requirements[
                    'issaverowforeachpackage'] if requirements.has_key(
                        'issaverowforeachpackage') else False
                rowsCsvFileName = requirements[
                    'rowscsvfilename'] if requirements.has_key(
                        'rowscsvfilename') else -1
                pickleFileName = requirements[
                    'picklefilename'] if requirements.has_key(
                        'picklefilename') else -1
                isavgdegreecon = requirements[
                    'isavgdegreecon'] if requirements.has_key(
                        'isavgdegreecon') else -1
                hookOnNode = requirements['hookonnode'] if requirements.has_key(
                    'hookonnode') else None
                hookOnStart = requirements[
                    'hookonstart'] if requirements.has_key(
                        'hookonstart') else None
                hookOnEnd = requirements['hookonend'] if requirements.has_key(
                    'hookonend') else None
                hookOnBeforePackage = requirements[
                    'hookonafterpackage'] if requirements.has_key(
                        'hookonbeforepackage') else None
                hookOnAfterPackage = requirements[
                    'hookonafterpackage'] if requirements.has_key(
                        'hookonafterpackage') else None

                if hookOnStart is not None:
                    hookOnStart()

                # worker = FieldsDocumentaryWorker()
                worker = GraphicWorker(
                ) if job is JobType.AnalyzeStixFromXmlAndDrawAGraph else GraphicWithDataWorker(
                )
                if isclusteringnodebyname:
                    worker.set_is_clustering_node_by_name()
                if isfullstructure:
                    worker.set_is_full_structure()
                if islistnodeidx:
                    worker.set_is_display_list_node_index()
                if weightCsvFileName is not -1 or rowsCsvFileName is not -1 or pickleFileName is not -1:
                    ioworker = IOWorker()
                    if issavetablesforeachpackage:
                        if isuseexistedtablenames:
                            rowlist, collist = ioworker.get_child_parent_lists(
                                self.tmp_dict)
                    if issaverowforeachpackage:
                        if isuseexistedtablenames:
                            allstructure = self.tmp_dict

                if hookOnNode is not None:
                    worker.set_hook_on_node(hookOnNode)

                # if isfullstructure:
                #     worker.doYourWork(STIXPackage())

                # for ind, stix_fn in enumerate(xmlFileName2EnumStixFileName(xmlfilename,stopafter=stopAfterFinishRound)):
                for ind, stix_fn in enumerate(
                        self.stix_packages_fn_iterater(
                            xmlfilename, stopafter=stopAfterFinishRound)):
                    # if stopAfterFinishRound > -1:
                    #     if ind > stopAfterFinishRound:
                    #         break
                    # if full, then try not to use specific package
                    # if isfullstructure:
                    #     break
                    if hookOnBeforePackage is not None:
                        hookOnBeforePackage()

                    if justDoThisRound != -1:
                        if justDoThisRound == -2:
                            break
                        if type(justDoThisRound) is str:
                            if os.path.split(stix_fn)[1] != justDoThisRound:
                                continue
                            else:
                                justDoThisRound = -2  # need to break after this round
                        elif ind != justDoThisRound:
                            continue
                    self.logger.log(
                        'info', 'I\'m working on stix_package #' + str(ind))
                    stix_package = stixFileName2StixPackageObj(stix_fn)

                    for node_ind, node in enumerate(
                            self.node_iterator(stix_package, 'indicator')):
                        if isforeachpackage:
                            worker.clear_graph()

                        # worker.doYourWork(stix_package)
                        worker.doYourWork(node)

                        if isforeachpackage:
                            thisStixName = stix_fn.split('/')[-1]
                            self.tmp_dict = worker.get_edge_weight_dict()
                            self.tmp_G = worker.get_graph()
                            if isavgdegreecon:
                                if pickleFileName == -1:
                                    worker.avg_degree_conn_to_console()
                                else:
                                    worker.avg_degree_conn_save_to_dict(
                                        thisStixName)
                            if weightCsvFileName is not -1:
                                if issavetablesforeachpackage:
                                    twoparts = weightCsvFileName.split('%s')
                                    if len(twoparts) is not 2:
                                        self.logger.log(
                                            'err',
                                            'please include %s (only once) inside the target csv filename, to decide where to write the corresponding stix file name'
                                        )
                                        exit(-1)
                                    if os.path.dirname(twoparts[1]) is not '':
                                        self.logger.log(
                                            'warn',
                                            '%s is in a directory name not a filename, will create many directories.'
                                        )
                                    thisCsvFileName = twoparts[
                                        0] + stix_fn.split(
                                            '/')[-1] + twoparts[1]
                                    try:
                                        os.makedirs(
                                            os.path.dirname(thisCsvFileName))
                                    except OSError as e:
                                        if not e.errno is errno.EEXIST:
                                            self.logger.log(
                                                'err', 'directory create fail')
                                            raise

                                    if isuseexistedtablenames:
                                        ioworker.outputWeightTable(
                                            self.tmp_dict, thisCsvFileName,
                                            rowlist, collist)
                                    else:
                                        ioworker.outputWeightTable(
                                            self.tmp_dict, thisCsvFileName)
                            # if rowsCsvFileName != -1:
                            #     if issaverowforeachpackage:
                            #         if isuseexistedtablenames:
                            #             ioworker.outputWeightRow(weights=self.tmp_dict, filename=rowsCsvFileName, stixname=thisStixName, allstructure=allstructure)
                            #         else:
                            #             ioworker.outputWeightRow(weights=self.tmp_dict, filename=rowsCsvFileName, stixname=thisStixName)
                            if isdrawgraph:
                                stix_name = stix_fn.split(
                                    '/')[-1] if self.is_dir else ind
                                worker.draw(
                                    stix_name + str(node_ind),
                                    is_width_as_weight=iswidthasweight,
                                    is_draw_min_spin_tree=isdrawminspintree)
                        if hookOnAfterPackage is not None:
                            hookOnAfterPackage(stixname=thisStixName,
                                               weights=self.tmp_dict)

                if job is JobType.FeedDataAndDrawWeightedGraph and not isforeachpackage:
                    self.tmp_dict = worker.get_edge_weight_dict()
                    self.tmp_G = worker.get_graph()
                    if isavgdegreecon:
                        if pickleFileName == -1:
                            worker.avg_degree_conn_to_console()
                        else:
                            worker.avg_degree_conn_save_to_dict('all_stix')
                    if isdrawgraph:
                        worker.draw("All Stix Packages",
                                    is_width_as_weight=iswidthasweight,
                                    is_draw_min_spin_tree=isdrawminspintree)

                    if weightCsvFileName is not -1:
                        if issavetablesforeachpackage:
                            twoparts = weightCsvFileName.split('%s')
                            if len(twoparts) > 1:
                                self.logger.log(
                                    'err',
                                    'remember to remove %s in the target file name'
                                )
                                exit(-1)
                            if not os.path.dirname(
                                    weightCsvFileName) is not '':
                                self.logger.log(
                                    'err',
                                    'target file name is a directory, please change to a file name'
                                )
                                exit(-1)
                            if isuseexistedtablenames:
                                ioworker.outputWeightTable(
                                    self.tmp_dict, weightCsvFileName, rowlist,
                                    collist)
                            else:
                                ioworker.outputWeightTable(
                                    self.tmp_dict, weightCsvFileName)
                    # if rowsCsvFileName != -1:
                    #     if issaverowforeachpackage:
                    #         if isuseexistedtablenames:
                    #             ioworker.outputWeightRow(weights=self.tmp_dict, filename=rowsCsvFileName, stixname='all_stix', allstructure=allstructure)
                    #         else:
                    #             ioworker.outputWeightRow(weights=self.tmp_dict, filename=rowsCsvFileName, stixname='all_stix')

                if pickleFileName != -1:
                    ioworker.pickle_dump(worker.get_all_avg_degree_con(),
                                         pickleFileName)

                if isdrawgraph:
                    time_end = time.time()
                    worker.draw_show()
                if hookOnEnd is not None:
                    hookOnEnd()

            if time_end == -1:
                time_end = time.time()
            self.time_per_job.append(time_end - time_start)

            self.logger.log('info', 'Job', job, 'has done!')

        self.logger.log('info', 'All the jobs have done! Enjoy your data!')

    def print_time_per_job(self):
        for i, j in enumerate(self.jobs):
            self.logger.log('info', 'Time of Job #' + str(i),
                            self.time_per_job[i], 'seconds')
Esempio n. 6
0
class FieldsDocumentaryWorker():
    # stix_fields_list = []
    # indicator_fields_list = []
    # stix_values_list = []
    # indicator_values_list = []
    fieldTree = {}

    def __init__(self):
        self.logger = Logger(self)

    '''
    Describe: Main method, takes a stix object in and parse it.
    Params: stix_package: STIXPackage object
    '''
    def consumeStix(self, stix_package):
        assert isinstance(stix_package, STIXPackage)

        '''Enable this if want to use the test function, ignore otherwise'''
        # self.iterFieldAndPrint(stix_package, '-', stix_package)

        self.iterField(stix_package, 'stix_package', self.__getTreeID(STIXPackage,'root_start'), '-')

        '''for test'''
        # for node in stix_package.walk():
            # if isinstance(stix_package, )
            # print type(node)



    '''
    Describe: This version is similar to iterField(), but it's a
              test version which is going to print the tree to 
              console in a tree-like way as the parsing goes by.
              This is only an experimental function.
    '''
    def iterFieldAndPrint(self, obj, prefix, father, label=''):
        # if isinstance(obj, list) or isinstance(obj, EntityList):
        if isinstance(obj, list) or isinstance(obj, MutableSequence):
            for item in obj:
                self.iterFieldAndPrint(item, prefix+'--', obj)
        self.logger.log('rst',prefix,obj.__class__,'@@',label,'<<',father.__class__)#str(father)
        # don't delete this, enable this to check if there is any List like Entity List
        print '+'+prefix[1:],obj,'<<',father#str(father)
        if not hasattr(obj, '_fields'):
            return
        fdict = self.__dumpObjFields(obj)
        for f in fdict:
            # print prefix,f,'<<',fdict[f]#str(father)
            # print '+'+prefix[1:],type(f),'<<',fdict[f]#str(father)

            # if isinstance(fdict[f], Indicators):
            # if isinstance(fdict[f], EntityList):
            #     print 'here we go!'
            #     for item in fdict[f]:
            #         print type(item)
            #     print 'here we go end!'
            self.iterFieldAndPrint(fdict[f], prefix+'--', obj, f)


    '''
    Describe: Combine node name and lable to a id used in the tree
    Params: node: the node
            label: the label of the node
    '''
    def __getTreeID(self, node, label):
        return str(type(node))+'@@'+str(label)


    '''
    Describe: Walk through each node/attribute, and record the father-child 
              relationship in self.fieldTree
    
    Params: cur_node: current node; 
            cur_label: the lable of current node
            father_id: the node name + the label of parent node
            prefix: use in case that want to pprint the layers, don't care about it otherwise
    '''
    def iterField(self, cur_node, cur_label, father_id, prefix=''):

        # init the row if this is the first this father show up.
        if not self.fieldTree.has_key(father_id):
            self.fieldTree[father_id] = []

        # combine node name and label and get an id used in the tree
        my_id = self.__getTreeID(cur_node, cur_label)

        # insert child node(this node) in father's row
        if my_id not in self.fieldTree[father_id]:
            self.fieldTree[father_id].append(my_id)

        # if cur_node is a list or EntityList or TypedList or else, enum them
        if isinstance(cur_node, list) or isinstance(cur_node, MutableSequence):
            for ind, item in enumerate(cur_node):
                self.iterField(item, cur_label + '[i]', self.__getTreeID(cur_node, cur_label), prefix + '--')

        # if cur_node has listable fields, enum them
        if hasattr(cur_node, '_fields'):
            fdict = self.__dumpObjFields(cur_node)
            for f in fdict:
                # print prefix,f,'<<',fdict[f]#str(father)
                self.iterField(fdict[f], str(f), self.__getTreeID(cur_node, cur_label), prefix + '--')


    def __dumpObjFields(self, obj):
        return obj.__dict__['_fields']


    '''
    Describe: write the tree structure to a csv file
    '''
    def printTree2Csv(self, fieldTree, csvfilename):
        self.logger.log('info', 'Writing tree to CSV file \'', csvfilename, '\'...')
        with open(csvfilename,'wb') as f:
            for father in fieldTree:
                ostr = str(father)
                ostr += ';'
                ostr += ';'.join(str(child) for child in fieldTree[father])
                f.write(ostr+'\n')
        self.logger.log('info', 'Writing finished.')

    '''
    Describe: print the tree structure to console
    '''
    def printTree2Console(self, fieldTree):
        for father in fieldTree:
            ostr = str(father)
            ostr += ';'
            ostr += ';'.join(str(child) for child in fieldTree[father])
            self.logger.log('rst', ostr)

    def getTree(self):
        return self.fieldTree

    '''
    Describe: Dump the field tree with pickle and store it to file
    '''
    def saveTreeToFile(self, fieldTree, filename):
        if not filename or type(filename) is not str or filename=='':
            self.logger.log('err', 'I need a file name where the object is going be saved')
            return -1
        self.logger.log('info', 'Dumping tree object to pickle file \'', filename, '\'...')
        saveObjToFile(fieldTree, filename)
        self.logger.log('info', 'Dumping finished')

    '''
    Describe: Load field tree from pickle file
    '''
    def loadTreeFrFile(self, filename=''):
        if not filename or type(filename) is not str or filename=='':
            self.logger.log('err', 'I need a file name to load from')
            return -1
        self.logger.log('info', 'Loading tree object from pickle file \'', filename, '\'...')
        return loadObjFrFile(filename)
Esempio n. 7
0
class IOWorker:
    def __init__(self):
        self.logger = Logger(self)
        # self.is_titles_wrote = False

    '''
        weights: ex.: {'parent1': {'child1': 5, 'child2': 4}, 'parent2': {'child2':3, 'child3': 1}}
        rowlist: ex.: ['child1', 'child2'...]
        collist: ex.: ['parent1', 'parent2'...]
        target table:
                  parent1     parent2
        child1      5            0
        child2      4            3
        child3      0            1
    '''

    def outputWeightTable(self, weights, filename, rowlist=None, collist=None):
        assert isinstance(weights, dict)
        assert isinstance(rowlist, list) or rowlist is None
        assert isinstance(collist, list) or collist is None

        self.logger.log('info', 'writing weight dict to:', filename)
        if rowlist is not None and collist is not None:
            self.logger.log('info', 'using provided row and col index.')
        else:
            self.logger.log('info', 'building row and col index by myself.')
            rowlist, collist = self.get_child_parent_lists(weights)

        target_table = []
        for i in range(len(rowlist)):
            child_name = rowlist[i]
            this_row = []
            for j in range(len(collist)):
                parent_name = collist[j]
                w = weights[parent_name][child_name] if weights.has_key(
                    parent_name) and weights[parent_name].has_key(
                        child_name) else 0
                this_row.append(w)
            target_table.append(this_row)

        self.logger.log('info', target_table)

        with open(filename, 'wb') as f:
            for parent_name in collist:
                f.write(',' + parent_name)
            f.write('\n')
            for ind, row in enumerate(target_table):
                f.write(rowlist[ind])
                for cell in row:
                    f.write(',' + str(cell))
                f.write('\n')
        self.logger.log('info', 'writing weight dict is done.')

        return
        # degreelist = self.G.

    '''
    def writeWeightTitle(self, filename, allstructure=None, weights=None):
        if allstructure is not None:
            self.logger.log('info', 'using provided col names.')
        else:
            self.logger.log('info', 'building row and col names by myself.')
            allstructure = weights
        title_row = 'Stix_Name'
        for p in allstructure:
            for c in allstructure[p]:
                # title_row = title_row + p+'_to_'+c + ','
                title_row = title_row + ',' + p+'->'+c
        with open(filename, 'w') as f:
                f.write(title_row + '\n')
        # self.is_titles_wrote = True
    '''

    def get_child_parent_lists(self, weight_dict):
        parent_list = []
        child_list = []
        for p in weight_dict:
            parent_list.append(p)
            for c in weight_dict[p]:
                try:
                    child_list.index(c)
                except ValueError, err:
                    child_list.append(c)
        return child_list, parent_list