Example #1
0
    def __init__(self):
        self.conf = Conf()
        self.xml_path = self.conf.getConfig("path", "xml_path")
        self.index_name = self.conf.getConfig("search", "index_name")
        self.doc_type = self.conf.getConfig("search", "doc_type")
        # 读取设定

        self.tokenizer = RegexpTokenizer(r"\w+")
        self.lem = WordNetLemmatizer()
        self.stemmer = PorterStemmer()
        self.stopwords = set(stopwords.words("english"))

        self.es = Elasticsearch()
        self.fields = self.conf.getImportant()
        # self.mapping = self.conf.getMapping()

        # es的index和doc_type相当于mysql的db和table
        # 如果要创建的index已存在,则删除原有index
        if self.es.indices.exists(index=self.index_name):
            self.es.indices.delete(index=self.index_name)

        # 创建index
        self.es.indices.create(index=self.index_name)
        # self.es.indices.put_mapping(index=self.index_name, doc_type=self.doc_type, body=self.mapping)
        print("created index:" + self.index_name)
Example #2
0
 def __init__(self):
     self.options = CommandLineOptions().get_options()
     self.server_connection  = None
     self.send_packet_queue = Queue.Queue(maxsize=0)
     self.recv_packet_queue = Queue.Queue(maxsize=0)
     self.config = Conf()
     self.config.read([os.path.join(os.path.dirname(os.path.realpath(__file__)),"config.cfg")])
Example #3
0
 def run(self):
     conf = Conf()
     conf.read(['config.cfg'], check_neededEntries=False)
     output = OutputDB(conf)
     sep = SnortEventsParser(output)
     sep.process()
     print conf
Example #4
0
 def __init__(self):
     self.tokenizer = RegexpTokenizer(r'\w+')
     self.lem = WordNetLemmatizer()
     self.stopwords = set(stopwords.words('english'))
     self.dict = None
     self.corpus = None
     self.bm25model = None
     self.docs_list = []
     self.conf = Conf()
     self.xml_path = self.conf.getConfig('path', 'xml_path')
Example #5
0
class IRSearch(object):
    def __init__(self):
        self.conf = Conf()
        self.xml_path = self.conf.getConfig('path', 'xml_path')
        self.index_name = self.conf.getConfig('search', 'index_name')
        self.doc_type = self.conf.getConfig('search', 'doc_type')
        self.es = Elasticsearch()
        self.search_body = {}
        self.search_type_support = ['match_all', 'term', 'terms',
                                    'match', 'multi_match', 'bool', 'range', 'prefix', 'wildcard']
        self.search_type_model = self.conf.getSeachModel()
        self.conn = ES('127.0.0.1:9200')
        self.search_result = None
        
        self.conn.default_indices = [self.index_name]

    def makeQuery(self, searchtype, searchfield, keyword, is_sort=False, is_aggs=False, is_multi_match=False, use_bool=""):
        if searchtype not in self.search_type_support:
            print('Ops, your search type is not supported')
            print('Supported search types:\n')
            print(self.search_type_support)
            return
        self.search_body = self.search_type_model[searchtype]
        if is_multi_match:
            self.search_body["query"][searchtype] = {
                "query": keyword,
                "fields": searchfield
            }
        elif use_bool:
            self.search_body["query"][searchtype][use_bool] = [{
                "term": {
                    searchfield: keyword
                }
            }]
        else:
            self.search_body["query"][searchtype][searchfield] = keyword

        print(self.search_body)
        return self.search_body

    # I don't know what I am doing because I'm an idiot.
    def Query(self, searchtype, searchfield, keyword, is_sort=False, is_aggs=False, is_multi_match=False, use_bool=""):
        query_body = self.makeQuery(
            searchtype, searchfield, keyword, is_sort, is_aggs, is_multi_match, use_bool)
        result = self.es.search(index=self.index_name,
                                doc_type=self.doc_type, body=query_body)
        return result

    def querySingle(self, searchfield, keyword):
        q = TermQuery(searchfield, keyword)
        self.search_result = self.conn.search(query=q)
Example #6
0
 def __init__(self):
     self.conf = Conf()
     self.xml_path = self.conf.getConfig('path', 'xml_path')
     self.index_name = self.conf.getConfig('search', 'index_name')
     self.doc_type = self.conf.getConfig('search', 'doc_type')
     self.es = Elasticsearch()
     self.search_body = {}
     self.search_type_support = ['match_all', 'term', 'terms',
                                 'match', 'multi_match', 'bool', 'range', 'prefix', 'wildcard']
     self.search_type_model = self.conf.getSeachModel()
     self.conn = ES('127.0.0.1:9200')
     self.search_result = None
     
     self.conn.default_indices = [self.index_name]
Example #7
0
class Test(object):
    def __init__(self):
        self.conf = Conf()
        self.xml_path = self.conf.getConfig("path", "xml_path")
        self.index_name = self.conf.getConfig("search", "index_name")
        self.doc_type = self.conf.getConfig("search", "doc_type")
        self.es = Elasticsearch(timeout=30, max_retries=10, retry_on_timeout=True)
        self.search_body = {}
        self.search_type_support = [
            "match_all",
            "term",
            "terms",
            "match",
            "multi_match",
            "bool",
            "range",
            "prefix",
            "wildcard",
        ]
        self.search_type_model = self.conf.getSeachModel()

    def getCount(self):
        print(self.es.count(index=self.index_name, doc_type=self.doc_type))

    def searchSingle(self):
        res = self.es.search(
            index=self.index_name,
            doc_type=self.doc_type,
            body={
                "query": {"match": {"id_info": "NCT02065063"}},
                "size": 10000,
            },
        )

        # body={"query": {"match": {"detailed_description": "carcinoma"}}},
        # body={"query": {"match": {"id_info": "NCT00001431"}}},

        for r in res["hits"]["hits"]:
            print(r["_source"])
            with open("carcinoma", 'a') as f:
                f.write("{}\n".format(r["_source"]["id_info"]))

    def getPickles(self, pickle_path):
        with open(pickle_path, 'rb') as pf:
            data = pickle.load(pf)
            # pprint.pprint(data)
            return data
Example #8
0
class OutputServer(OutputPlugins):

    def __init__(self, conn):
        logger.info("Added Server output (%s:%s)"%(conn.get_server_ip(), conn.get_server_port()))
        self.conn=conn
        self.activated=True
        self.send_events=False
        self.conf=Conf()
        self.options=CommandLineOptions().get_options()

        if self.options.config_file:
            conffile=self.options.config_file

        else:
            conffile=self.conf.DEFAULT_CONFIG_FILE

        self.conf.read([conffile],False)

        if self.conf.has_section("output-server"):
            if self.conf.getboolean("output-server", "send_events"):
                self.send_events=True


    def event(self, e):
        if self.activated and self.send_events:
            try:
                if self.conn.get_is_alive():
                    self.conn.send(str(e))

            except:
                return


    def plugin_state(self, msg):
        if self.activated:
            try:
                self.conn.send(msg)

            except:
                return


    def shutdown(self):
        self.conn.close()
        self.activated=False
Example #9
0
 def __init__(self):
     self.conf = Conf()
     self.xml_path = self.conf.getConfig("path", "xml_path")
     self.index_name = self.conf.getConfig("search", "index_name")
     self.doc_type = self.conf.getConfig("search", "doc_type")
     self.es = Elasticsearch(timeout=30, max_retries=10, retry_on_timeout=True)
     self.search_body = {}
     self.search_type_support = [
         "match_all",
         "term",
         "terms",
         "match",
         "multi_match",
         "bool",
         "range",
         "prefix",
         "wildcard",
     ]
     self.search_type_model = self.conf.getSeachModel()
Example #10
0
    def __init__(self, conn):
        logger.info("Added Server output (%s:%s)" % (conn.ip, conn.port))
        self.conn = conn
        self.activated = True
        self.send_events = False
        self.conf = Conf()
        self.options = CommandLineOptions().get_options()

        if self.options.config_file:
            conffile = self.options.config_file

        else:
            conffile = self.conf.DEFAULT_CONFIG_FILE

        self.conf.read([conffile], 'latin1')

        if self.conf.has_section("output-server"):
            if self.conf.getboolean("output-server", "send_events"):
                self.send_events = True
Example #11
0
class OutputServer(OutputPlugins):
    def __init__(self, conn):
        logger.info("Added Server output (%s:%s)" %
                    (conn.get_server_ip(), conn.get_server_port()))
        self.conn = conn
        self.activated = True
        self.send_events = False
        self.conf = Conf()
        self.options = CommandLineOptions().get_options()

        if self.options.config_file:
            conffile = self.options.config_file

        else:
            conffile = self.conf.DEFAULT_CONFIG_FILE

        self.conf.read([conffile], 'latin1')

        if self.conf.has_section("output-server"):
            if self.conf.getboolean("output-server", "send_events"):
                self.send_events = True

    def event(self, e):
        if self.activated and self.send_events:
            try:
                if self.conn.get_is_alive():
                    self.conn.send(str(e))

            except:
                return

    def plugin_state(self, msg):
        if self.activated:
            try:
                self.conn.send(msg)

            except:
                return

    def shutdown(self):
        self.conn.close()
        self.activated = False
Example #12
0
    def __init__(self):
        self.conf = Conf()
        self.query_xml_path = self.conf.getConfig("path", "query_xml_path")
        self.index_name = self.conf.getConfig("search", "index_name")
        self.doc_type = self.conf.getConfig("search", "doc_type")
        self.meshDict = self.getPickles(
            self.conf.getConfig("path", "dict_pickle_path"))
        self.es = Elasticsearch(timeout=30,
                                max_retries=10,
                                retry_on_timeout=True)
        # 设定es的超时时限为30秒,默认为10秒
        # 最大重试次数为10次
        # 防止因数据量太大导致的超时
        self.fields = self.conf.getImportant()
        self.extracted = []

        self.tokenizer = RegexpTokenizer(r"\w+")
        self.lem = WordNetLemmatizer()
        self.stemmer = PorterStemmer()
        self.stopwords = set(stopwords.words("english"))
Example #13
0
    def __init__(self, plugin, watch_rule):
        self.plugin = plugin
        self.options = CommandLineOptions().get_options()

        # read configuration
        self._conf = Conf()
        if self.options.config_file:
            conffile = self.options.config_file
        else:
            conffile = self._conf.DEFAULT_CONFIG_FILE
        self._conf.read([conffile], 'latin1')
        self.watch_rule = watch_rule
        groups = self.watch_rule.dict()
        for item, value in groups.iteritems():
            if item in self.watch_rule.EVENT_BASE64:
                groups[item] = b64decode(value)
        self.queries = \
            self.get_replaced_values('query',groups)
        self.regexps = \
            self.get_replaced_values('regexp', groups)
        self.results = \
            self.get_replaced_values('result', groups)
        self.initial_time = int(time.time())  # initial time at object call
        self.first_value = None

        if "tzone" in self.plugin.hitems("DEFAULT"):
            self.timezone = self.plugin.get("DEFAULT", "tzone")
            logger.debug("Plugin %s (%s) with specific tzone = %s" % \
                         (self.plugin.get("config", "name"),
                          self.plugin.get("DEFAULT", "plugin_id"),
                          self.timezone))
        else:
            self.timezone = self._conf.get("plugin-defaults", "tzone")

        self.__agenttimezone = None
        self.__EventTimeZone = None
        self.__systemTimeZone = None
        self.__set_system_tzone()
        self.__setTZData()

        self.open()
Example #14
0
    def __init__(self, plugin, watch_rule):
        self.plugin = plugin
        self.options = CommandLineOptions().get_options()

        # read configuration
        self._conf = Conf()
        if self.options.config_file:
            conffile = self.options.config_file
        else:
            conffile = self._conf.DEFAULT_CONFIG_FILE
        self._conf.read([conffile], False)
        self.watch_rule = watch_rule

        self.queries = \
            self.get_replaced_values('query', self.watch_rule.dict())
        self.regexps = \
            self.get_replaced_values('regexp', self.watch_rule.dict())
        self.results = \
            self.get_replaced_values('result', self.watch_rule.dict())

        self.initial_time = int(time.time())  # initial time at object call
        self.first_value = None

        self.open()
Example #15
0
    def __init__(self, conn):
        logger.info("Added Server output (%s:%s)"%(conn.get_server_ip(), conn.get_server_port()))
        self.conn=conn
        self.activated=True
        self.send_events=False
        self.conf=Conf()
        self.options=CommandLineOptions().get_options()

        if self.options.config_file:
            conffile=self.options.config_file

        else:
            conffile=self.conf.DEFAULT_CONFIG_FILE

        self.conf.read([conffile],False)

        if self.conf.has_section("output-server"):
            if self.conf.getboolean("output-server", "send_events"):
                self.send_events=True
Example #16
0
    def __init__(self, plugin, watch_rule):
        self.plugin = plugin
        self.options = CommandLineOptions().get_options()

        # read configuration
        self._conf = Conf()
        if self.options.config_file:
            conffile = self.options.config_file
        else:
            conffile = self._conf.DEFAULT_CONFIG_FILE
        self._conf.read([conffile],'latin1')
        self.watch_rule = watch_rule
        groups =  self.watch_rule.dict()
        for item,value in groups.iteritems():
            if item in self.watch_rule.EVENT_BASE64:
                groups[item] = b64decode(value)
        self.queries = \
            self.get_replaced_values('query',groups)
        self.regexps = \
            self.get_replaced_values('regexp', groups)
        self.results = \
            self.get_replaced_values('result', groups)
        self.initial_time = int(time.time()) # initial time at object call
        self.first_value = None

        if "tzone" in self.plugin.hitems("DEFAULT"):
            self.timezone = self.plugin.get("DEFAULT", "tzone")
            logger.debug("Plugin %s (%s) with specific tzone = %s" % \
                         (self.plugin.get("config", "name"),
                          self.plugin.get("DEFAULT", "plugin_id"),
                          self.timezone))
        else:
            self.timezone = self._conf.get("plugin-defaults", "tzone")

        self.__agenttimezone = None
        self.__EventTimeZone = None
        self.__systemTimeZone = None
        self.__set_system_tzone()
        self.__setTZData()


        self.open()
Example #17
0
    def __init__(self, plugin, watch_rule):
        self.plugin = plugin
        self.options = CommandLineOptions().get_options()

        # read configuration
        self._conf = Conf()
        if self.options.config_file:
            conffile = self.options.config_file
        else:
            conffile = self._conf.DEFAULT_CONFIG_FILE
        self._conf.read([conffile],False)
        self.watch_rule = watch_rule

        self.queries = \
            self.get_replaced_values('query', self.watch_rule.dict())
        self.regexps = \
            self.get_replaced_values('regexp', self.watch_rule.dict())
        self.results = \
            self.get_replaced_values('result', self.watch_rule.dict())

        self.initial_time = int(time.time()) # initial time at object call
        self.first_value = None

        self.open()
Example #18
0
class Monitor:
    def __init__(self, plugin, watch_rule):
        self.plugin = plugin
        self.options = CommandLineOptions().get_options()

        # read configuration
        self._conf = Conf()
        if self.options.config_file:
            conffile = self.options.config_file
        else:
            conffile = self._conf.DEFAULT_CONFIG_FILE
        self._conf.read([conffile], 'latin1')
        self.watch_rule = watch_rule
        groups = self.watch_rule.dict()
        for item, value in groups.iteritems():
            if item in self.watch_rule.EVENT_BASE64:
                groups[item] = b64decode(value)
        self.queries = \
            self.get_replaced_values('query',groups)
        self.regexps = \
            self.get_replaced_values('regexp', groups)
        self.results = \
            self.get_replaced_values('result', groups)
        self.initial_time = int(time.time())  # initial time at object call
        self.first_value = None

        if "tzone" in self.plugin.hitems("DEFAULT"):
            self.timezone = self.plugin.get("DEFAULT", "tzone")
            logger.debug("Plugin %s (%s) with specific tzone = %s" % \
                         (self.plugin.get("config", "name"),
                          self.plugin.get("DEFAULT", "plugin_id"),
                          self.timezone))
        else:
            self.timezone = self._conf.get("plugin-defaults", "tzone")

        self.__agenttimezone = None
        self.__EventTimeZone = None
        self.__systemTimeZone = None
        self.__set_system_tzone()
        self.__setTZData()

        self.open()

    def get_replaced_values(self, key, groups):

        # replace plugin variables with watch_rule data
        #
        # for example, given the following watch_rule:
        #
        #     watch-rule plugin_id="2006" plugin_sid="1" condition="eq"
        #                value="1" from="192.168.6.64" to="192.168.6.63"
        #                port_from="5643" port_to="22"
        #
        #  and the following plugin query:
        #     query = {$from}:{$port_from} {$to}:{$port_to}
        #
        #  replace the variables with the watch-rule data:
        #     query = 192.168.6.64:5643 192.168.6.63:22

        values = {}
        for rule_name, rule in self.plugin.rules().iteritems():
            if key != 'result':
                values[rule_name] = self.plugin.get_replace_value(
                    rule[key], groups)
            else:
                values[rule_name] = rule[key]

        return values

    def _plugin_defaults(self, event, log):

        # get default values from config
        #
        ipv4_reg = "^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$"
        if self._conf.has_section("plugin-defaults"):

            # 1) date
            default_date_format = self._conf.get("plugin-defaults",
                                                 "date_format")
            if event["date"] is None and default_date_format:
                event["date"] = time.strftime('%Y-%m-%d %H:%M:%S',
                                              time.localtime(time.time()))
                event["fdate"] = time.strftime('%Y-%m-%d %H:%M:%S',
                                               time.localtime(time.time()))

        # 2) sensor
            default_sensor = self._conf.get("plugin-defaults", "sensor")
            if event["sensor"] is None and default_sensor:
                event["sensor"] = default_sensor

        # 3) interface
            default_iface = self._conf.get("plugin-defaults", "interface")
            if event["interface"] is None and default_iface:
                event["interface"] = default_iface

        # 4) source ip
            if event["src_ip"] is None:
                event["src_ip"] = event["from"]

        # 5) dest ip
            if event["dst_ip"] is None:
                event["dst_ip"] = event["to"]

        # 6) protocol
            if event["protocol"] is None:
                event["protocol"] = "TCP"

        # 7) ports
            if event["src_port"] is None:
                event["src_port"] = event["port_from"]
            if event["dst_port"] is None:
                event["dst_port"] = event["port_to"]
            if event["src_port"] is None:
                event["src_port"] = 0
            if event["dst_port"] is None:
                event["dst_port"] = 0
            if event["src_ip"] is None:
                event["src_ip"] = event["sensor"]
            if event["dst_ip"] is None:
                event["dst_ip"] = event["sensor"]

        # 8) Time zone
            if 'tzone' in event.EVENT_ATTRS:
                Utils.normalizeToUTCDate(event, self.__EventTimeZone)

        #Check if valid ip, if not we put 0.0.0.0 in sensor field
        if event['src_ip'] is not None:
            if not re.match(ipv4_reg, event['src_ip']):
                data = event['src_ip']
                event['src_ip'] = '0.0.0.0'
                print(
                    "Event's field src_ip (%s) is not a valid IP.v4/IP.v6 address, set it to default ip 0.0.0.0 and real data on userdata8"
                    % (data))
                event['userdata8'] = data
        elif 'src_ip' in event.EVENT_ATTRS:
            event['src_ip'] = '0.0.0.0'
        #Check if valid ip, if not we put 0.0.0.0 in sensor field
        if event['dst_ip'] is not None:
            if not re.match(ipv4_reg, event['dst_ip']):
                data = event['dst_ip']
                print(
                    "Event's field dst_ip (%s) is not a valid IP.v4 address, set it to default ip 0.0.0.0 and real data on userdata9"
                    % (data))
                event['dst_ip'] = '0.0.0.0'
                event['userdata9'] = data
        elif 'dst_ip' in event.EVENT_ATTRS:
            event['dst_ip'] = '0.0.0.0'
        event["log"] = log

        # the type of this event should always be 'monitor'
        if event["type"] is None:
            event["type"] = 'monitor'

        # Clean up mess
        event["port_from"] = ""
        event["port_to"] = ""
        event["to"] = ""
        event["from"] = ""
        event["absolute"] = ""
        event["interval"] = ""

        return event

    def __set_system_tzone(self):
        """Sets the system timezone by reading the timezone """
        try:
            #read local timezone information.
            f = open('/etc/timezone', 'r')
            used_tzone = f.readline().rstrip()
            f.close()
            if used_tzone not in all_timezones:
                logger.info(
                    "Warning, we can't read valid timezone data.Using GMT")
                used_tzone = 'GMT'
            self.systemtzone = used_tzone
        except Exception, e:
            used_tzone = 'GMT'
            logger.info("Warning, we can't read valid timezone data.Using GMT")
Example #19
0
class Monitor:

    def __init__(self, plugin, watch_rule):
        self.plugin = plugin
        self.options = CommandLineOptions().get_options()

        # read configuration
        self._conf = Conf()
        if self.options.config_file:
            conffile = self.options.config_file
        else:
            conffile = self._conf.DEFAULT_CONFIG_FILE
        self._conf.read([conffile],False)
        self.watch_rule = watch_rule

        self.queries = \
            self.get_replaced_values('query', self.watch_rule.dict())
        self.regexps = \
            self.get_replaced_values('regexp', self.watch_rule.dict())
        self.results = \
            self.get_replaced_values('result', self.watch_rule.dict())

        self.initial_time = int(time.time()) # initial time at object call
        self.first_value = None

        self.open()

    def get_replaced_values(self, key, groups):

        # replace plugin variables with watch_rule data
        #
        # for example, given the following watch_rule:
        # 
        #     watch-rule plugin_id="2006" plugin_sid="1" condition="eq"
        #                value="1" from="192.168.6.64" to="192.168.6.63"
        #                port_from="5643" port_to="22"
        #
        #  and the following plugin query:
        #     query = {$from}:{$port_from} {$to}:{$port_to}
        #
        #  replace the variables with the watch-rule data:
        #     query = 192.168.6.64:5643 192.168.6.63:22

        values = {}
        for rule_name, rule in self.plugin.rules().iteritems():
            values[rule_name] = \
                self.plugin.get_replace_value(rule[key],
                                              groups)

        return values

    def _plugin_defaults(self, event, log):


        # get default values from config
        #

        if self._conf.has_section("plugin-defaults"):

        # 1) date
            default_date_format = self._conf.get("plugin-defaults",
                                                 "date_format")
            if event["date"] is None and default_date_format:
                event["date"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
                event["fdate"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))



        # 2) sensor
            default_sensor = self._conf.get("plugin-defaults", "sensor")
            if event["sensor"] is None and default_sensor:
                event["sensor"] = default_sensor

        # 3) interface
            default_iface = self._conf.get("plugin-defaults", "interface")
            if event["interface"] is None and default_iface:
                event["interface"] = default_iface

        # 4) source ip
            if event["src_ip"] is None:
                event["src_ip"] = event["from"]

        # 5) dest ip
            if event["dst_ip"] is None:
                event["dst_ip"] = event["to"]

        # 6) protocol
            if event["protocol"] is None:
                event["protocol"] = "TCP"

        # 7) ports
            if event["src_port"] is None:
                event["src_port"] = event["port_from"]
            if event["dst_port"] is None:
                event["dst_port"] = event["port_to"]
            if event["src_port"] is None:
                event["src_port"] = 0
            if event["dst_port"] is None:
                event["dst_port"] = 0
            if event["src_ip"] is None:
                event["src_ip"] = event["sensor"]
            if event["dst_ip"] is None:
                event["dst_ip"] = event["sensor"]
	    event["log"] = log





        # the type of this event should always be 'monitor'
        if event["type"] is None:
            event["type"] = 'monitor'

        # Clean up mess
        event["port_from"] = ""
        event["port_to"] = ""
        event["to"] = ""
        event["from"] = ""
        event["absolute"] = ""
        event["interval"] = ""

        return event


    # given the server's watch_rule, find what rule to apply
    def match_rule(self):
        plugin_sid = self.watch_rule['plugin_sid']
        for rule_name, rule in self.plugin.rules().iteritems():
            for sid in Config.split_sids(str(rule['sid'])): # sid=1,2-4,5
                if str(plugin_sid) == str(sid) or str(sid).lower() == 'any':
                    return rule_name
        return None

    # eval watch rule condition
    def eval_condition(self, cond, arg1, arg2, value):

        if type(arg1) is not int:
            try:
                arg1 = int(arg1)
            except ValueError:
                logger.warning(
                    "value returned by monitor (arg1=%s) is not an integer" % \
                    str(arg1))
                return False

        if type(arg2) is not int:
            try:
                arg2 = int(arg2)
            except ValueError:
                logger.warning(
                    "value returned by monitor (arg2=%s) is not an integer" % \
                    str(arg2))
                return False

        if type(value) is not int:
            try:
                value = int(value)
            except ValueError:
                logger.warning(
                    "value returned by monitor (value=%s) is not an integer" % \
                    str(value))
                return False

        logger.debug("Monitor expresion evaluation: " + \
            "%s(arg2) <%s> %s(arg1) + %s(value)?" % \
            (str(arg2), str(cond), str(arg1), str(value)))

        if cond == "eq":
            return (int(arg2) == int(arg1) + int(value))
        elif cond == "ne":
            return (int(arg2) != int(arg1) + int(value))
        elif cond == "gt":
            return (int(arg2) > int(arg1) + int(value))
        elif cond == "ge":
            return (int(arg2) >= int(arg1) + int(value))
        elif cond == "le":
            return (int(arg2) <= int(arg1) + int(value))
        elif cond == "lt":
            return (int(arg2) < int(arg1) + int(value))
        else:
            return False
 
    # given the watch rule, ask to Monitor and obtain a result
    # *must* be overriden in child classes:
    # different implementations for each type of monitor
    # (socket, database, etc.)
    def get_data(self, rule_name):
        pass

    # *must* be overriden in child classes:
    def open(self):
        pass

    # *must* be overriden in child classes:
    def close(self):
        pass


    # TODO: merge with ParserLog.feed()
    #
    def get_value(self, monitor_response, rule_name):

        value = None
        hash = {}
        count = 1

        regexp = self.regexps[rule_name]
        pattern = re.compile(regexp, re.IGNORECASE | re.MULTILINE)
        
        # TODO: monitor_response could possibly be a list
        if isinstance(monitor_response, list):
            match = pattern.search(monitor_response[0])
        else:
            match = pattern.search(monitor_response)

        if match is not None:
            groups = match.groups()

            for group in groups:

                # group by index ()
                if group is None: group = ''
                hash.update({str(count): str(group)})
                count += 1

                # group by name (?P<name-of-group>)
                hash.update(match.groupdict())
	else:
		return None


        # first, try getting substitution from the regular expresion syntax
        result = self.results[rule_name]
        value = self.plugin.get_replace_value(result, hash)
        try:
            val = int(value.split(".")[0])
        except:
            return False

        return val


    # get a new value from monitor and compare with the first one
    # returns True if the condition apply, False in the other case
    def evaluate(self, rule_name):
        
        if self.first_value is None:
            logger.debug("Can not extract value (arg1) from monitor response or no initial value to compare with")
            return True

        value = None
        monitor_response = self.get_data(rule_name)
        if not monitor_response:
            logger.warning("No data received from monitor")
            return True
        else:
            value = self.get_value(monitor_response, rule_name)
            if value is None:
	    	return True
	    #if not value:
            #    continue
            if self.eval_condition(cond=self.watch_rule["condition"],
                                   arg1=self.first_value,
                                   arg2=value,
                                   value=int(self.watch_rule["value"])):
                self.watch_rule["type"] = "monitor"
		try:
			cond = self.watch_rule["condition"]
                        arg1 = self.first_value
                        arg2 = value
                        value = int(self.watch_rule["value"])
                        comm = self.queries
			log = "Monitor Command: %s , Monitor expresion evaluation: %s(arg2) <%s> %s(arg1) + %s(value)? , Command Response: %s" % (str(comm), str(arg2), str(cond), str(arg1), str(value), monitor_response.replace("\n", "\r"))
		except:
			log = "Monitor Exception"
                self.watch_rule = self._plugin_defaults(self.watch_rule, log)
                Output.event(self.watch_rule)
                Stats.new_event(self.watch_rule)
                return True

        logger.debug("No data matching the watch-rule received from monitor")
        return False


    # *may* be overriden in child classes
    def process(self):

        # get the name of rule to apply
        rule_name = self.match_rule()
        if rule_name is not None:
            logger.info("Matched rule: [%s]" % (rule_name))

        # get data from plugin (first time)
            if self.first_value is None:

        # <absolute> is "no" by default
        # the absence of <interval> implies that <absolute> is "yes"
                if self.watch_rule['absolute'] in ('yes', 'true') or\
                   not self.watch_rule['interval']:
                    self.first_value = 0
                else:
                    monitor_response = self.get_data(rule_name)
                    if not monitor_response:
                        self.first_value = 0
                    for resp in monitor_response:
                        if resp:
                            self.first_value = self.get_value(resp, rule_name)
                            if self.first_value == False:
                                self.first_value = 0

        # get current time
        current_time = int(time.time())

        # Three posibilities:
        #
        # 1) no interval specified, no need to wait
        if not self.watch_rule.dict().has_key('interval'):
            self.evaluate(rule_name)
            return True

        # 1) no interval specified, no need to wait
        elif not self.watch_rule['interval']:
            self.evaluate(rule_name)
            return True

        # 2) we are in time, check the result of the watch-rule
        elif (self.initial_time + \
                int(self.watch_rule["interval"]) > current_time):
            return self.evaluate(rule_name)

        # 3) we are out of time
        else:
            self.evaluate(rule_name)
            return True
Example #20
0
class Monitor:
    def __init__(self, plugin, watch_rule):
        self.plugin = plugin
        self.options = CommandLineOptions().get_options()

        # read configuration
        self._conf = Conf()
        if self.options.config_file:
            conffile = self.options.config_file
        else:
            conffile = self._conf.DEFAULT_CONFIG_FILE
        self._conf.read([conffile], False)
        self.watch_rule = watch_rule

        self.queries = \
            self.get_replaced_values('query', self.watch_rule.dict())
        self.regexps = \
            self.get_replaced_values('regexp', self.watch_rule.dict())
        self.results = \
            self.get_replaced_values('result', self.watch_rule.dict())

        self.initial_time = int(time.time())  # initial time at object call
        self.first_value = None

        self.open()

    def get_replaced_values(self, key, groups):

        # replace plugin variables with watch_rule data
        #
        # for example, given the following watch_rule:
        #
        #     watch-rule plugin_id="2006" plugin_sid="1" condition="eq"
        #                value="1" from="192.168.6.64" to="192.168.6.63"
        #                port_from="5643" port_to="22"
        #
        #  and the following plugin query:
        #     query = {$from}:{$port_from} {$to}:{$port_to}
        #
        #  replace the variables with the watch-rule data:
        #     query = 192.168.6.64:5643 192.168.6.63:22

        values = {}
        for rule_name, rule in self.plugin.rules().iteritems():
            values[rule_name] = \
                self.plugin.get_replace_value(rule[key],
                                              groups)

        return values

    def _plugin_defaults(self, event, log):

        # get default values from config
        #

        if self._conf.has_section("plugin-defaults"):

            # 1) date
            default_date_format = self._conf.get("plugin-defaults",
                                                 "date_format")
            if event["date"] is None and default_date_format:
                event["date"] = time.strftime('%Y-%m-%d %H:%M:%S',
                                              time.localtime(time.time()))
                event["fdate"] = time.strftime('%Y-%m-%d %H:%M:%S',
                                               time.localtime(time.time()))

        # 2) sensor
            default_sensor = self._conf.get("plugin-defaults", "sensor")
            if event["sensor"] is None and default_sensor:
                event["sensor"] = default_sensor

        # 3) interface
            default_iface = self._conf.get("plugin-defaults", "interface")
            if event["interface"] is None and default_iface:
                event["interface"] = default_iface

        # 4) source ip
            if event["src_ip"] is None:
                event["src_ip"] = event["from"]

        # 5) dest ip
            if event["dst_ip"] is None:
                event["dst_ip"] = event["to"]

        # 6) protocol
            if event["protocol"] is None:
                event["protocol"] = "TCP"

        # 7) ports
            if event["src_port"] is None:
                event["src_port"] = event["port_from"]
            if event["dst_port"] is None:
                event["dst_port"] = event["port_to"]
            if event["src_port"] is None:
                event["src_port"] = 0
            if event["dst_port"] is None:
                event["dst_port"] = 0
            if event["src_ip"] is None:
                event["src_ip"] = event["sensor"]
            if event["dst_ip"] is None:
                event["dst_ip"] = event["sensor"]
            event["log"] = log

        # the type of this event should always be 'monitor'
        if event["type"] is None:
            event["type"] = 'monitor'

        # Clean up mess
        event["port_from"] = ""
        event["port_to"] = ""
        event["to"] = ""
        event["from"] = ""
        event["absolute"] = ""
        event["interval"] = ""

        return event

    # given the server's watch_rule, find what rule to apply
    def match_rule(self):
        plugin_sid = self.watch_rule['plugin_sid']
        for rule_name, rule in self.plugin.rules().iteritems():
            for sid in Config.split_sids(str(rule['sid'])):  # sid=1,2-4,5
                if str(plugin_sid) == str(sid) or str(sid).lower() == 'any':
                    return rule_name
        return None

    # eval watch rule condition
    def eval_condition(self, cond, arg1, arg2, value):

        if type(arg1) is not int:
            try:
                arg1 = int(arg1)
            except ValueError:
                logger.warning(
                    "value returned by monitor (arg1=%s) is not an integer" % \
                    str(arg1))
                return False

        if type(arg2) is not int:
            try:
                arg2 = int(arg2)
            except ValueError:
                logger.warning(
                    "value returned by monitor (arg2=%s) is not an integer" % \
                    str(arg2))
                return False

        if type(value) is not int:
            try:
                value = int(value)
            except ValueError:
                logger.warning(
                    "value returned by monitor (value=%s) is not an integer" % \
                    str(value))
                return False

        logger.debug("Monitor expresion evaluation: " + \
            "%s(arg2) <%s> %s(arg1) + %s(value)?" % \
            (str(arg2), str(cond), str(arg1), str(value)))

        if cond == "eq":
            return (int(arg2) == int(arg1) + int(value))
        elif cond == "ne":
            return (int(arg2) != int(arg1) + int(value))
        elif cond == "gt":
            return (int(arg2) > int(arg1) + int(value))
        elif cond == "ge":
            return (int(arg2) >= int(arg1) + int(value))
        elif cond == "le":
            return (int(arg2) <= int(arg1) + int(value))
        elif cond == "lt":
            return (int(arg2) < int(arg1) + int(value))
        else:
            return False

    # given the watch rule, ask to Monitor and obtain a result
    # *must* be overriden in child classes:
    # different implementations for each type of monitor
    # (socket, database, etc.)
    def get_data(self, rule_name):
        pass

    # *must* be overriden in child classes:
    def open(self):
        pass

    # *must* be overriden in child classes:
    def close(self):
        pass

    # TODO: merge with ParserLog.feed()
    #
    def get_value(self, monitor_response, rule_name):

        value = None
        hash = {}
        count = 1

        regexp = self.regexps[rule_name]
        pattern = re.compile(regexp, re.IGNORECASE | re.MULTILINE)

        # TODO: monitor_response could possibly be a list
        if isinstance(monitor_response, list):
            match = pattern.search(monitor_response[0])
        else:
            match = pattern.search(monitor_response)

        if match is not None:
            groups = match.groups()

            for group in groups:

                # group by index ()
                if group is None: group = ''
                hash.update({str(count): str(group)})
                count += 1

                # group by name (?P<name-of-group>)
                hash.update(match.groupdict())
        else:
            return None

        # first, try getting substitution from the regular expresion syntax
        result = self.results[rule_name]
        value = self.plugin.get_replace_value(result, hash)
        try:
            val = int(value.split(".")[0])
        except:
            return False

        return val

    # get a new value from monitor and compare with the first one
    # returns True if the condition apply, False in the other case
    def evaluate(self, rule_name):

        if self.first_value is None:
            logger.debug(
                "Can not extract value (arg1) from monitor response or no initial value to compare with"
            )
            return True

        value = None
        monitor_response = self.get_data(rule_name)
        if not monitor_response:
            logger.warning("No data received from monitor")
            return True
        else:
            value = self.get_value(monitor_response, rule_name)
            if value is None:
                return True

#if not value:
#    continue
            if self.eval_condition(cond=self.watch_rule["condition"],
                                   arg1=self.first_value,
                                   arg2=value,
                                   value=int(self.watch_rule["value"])):
                self.watch_rule["type"] = "monitor"
                try:
                    cond = self.watch_rule["condition"]
                    arg1 = self.first_value
                    arg2 = value
                    value = int(self.watch_rule["value"])
                    comm = self.queries
                    log = "Monitor Command: %s , Monitor expresion evaluation: %s(arg2) <%s> %s(arg1) + %s(value)? , Command Response: %s" % (
                        str(comm), str(arg2), str(cond), str(arg1), str(value),
                        monitor_response.replace("\n", "\r"))
                except:
                    log = "Monitor Exception"
                self.watch_rule = self._plugin_defaults(self.watch_rule, log)
                Output.event(self.watch_rule)
                Stats.new_event(self.watch_rule)
                return True

        logger.debug("No data matching the watch-rule received from monitor")
        return False

    # *may* be overriden in child classes
    def process(self):

        # get the name of rule to apply
        rule_name = self.match_rule()
        if rule_name is not None:
            logger.info("Matched rule: [%s]" % (rule_name))

            # get data from plugin (first time)
            if self.first_value is None:

                # <absolute> is "no" by default
                # the absence of <interval> implies that <absolute> is "yes"
                if self.watch_rule['absolute'] in ('yes', 'true') or\
                   not self.watch_rule['interval']:
                    self.first_value = 0
                else:
                    monitor_response = self.get_data(rule_name)
                    if not monitor_response:
                        self.first_value = 0
                    for resp in monitor_response:
                        if resp:
                            self.first_value = self.get_value(resp, rule_name)
                            if self.first_value == False:
                                self.first_value = 0

        # get current time
        current_time = int(time.time())

        # Three posibilities:
        #
        # 1) no interval specified, no need to wait
        if not self.watch_rule.dict().has_key('interval'):
            self.evaluate(rule_name)
            return True

        # 1) no interval specified, no need to wait
        elif not self.watch_rule['interval']:
            self.evaluate(rule_name)
            return True

        # 2) we are in time, check the result of the watch-rule
        elif (self.initial_time + \
                int(self.watch_rule["interval"]) > current_time):
            return self.evaluate(rule_name)

        # 3) we are out of time
        else:
            self.evaluate(rule_name)
            return True
Example #21
0
class W2V(object):
    def __init__(self):
        self.tokenizer = RegexpTokenizer(r'\w+')
        self.lem = WordNetLemmatizer()
        self.stopwords = set(stopwords.words('english'))
        self.dict = None
        self.corpus = None
        self.bm25model = None
        self.docs_list = []
        self.conf = Conf()
        self.xml_path = self.conf.getConfig('path', 'xml_path')

    def cleanData(self, doc):
        raw_tokens = self.tokenizer.tokenize(doc.lower())
        lem_tokens = [self.lem.lemmatize(token) for token in raw_tokens]
        lem_tokens_without_stopword = filter(lambda i: i not in self.stopwords,
                                             lem_tokens)
        return list(lem_tokens_without_stopword)

    def xml2json(self, xmlpath):
        with open(xmlpath, "r") as xmlf:
            xml_str = xmlf.read()
            dict_str = xmltodict.parse(xml_str)
            # json_str = json.dumps(dict_str)
            return dict_str

    def extractUseful(self, dict_str):
        useful_list = []

        if "official_title" in dict_str["clinical_study"]:
            useful_list.append(dict_str["clinical_study"]["official_title"])
        else:
            useful_list.append(dict_str["clinical_study"]["brief_title"])

        if "brief_summary" in dict_str["clinical_study"]:
            useful_list.append(
                dict_str["clinical_study"]["brief_summary"]["textblock"])

        if "detailed_description" in dict_str["clinical_study"]:
            useful_list.append(dict_str["clinical_study"]
                               ["detailed_description"]["textblock"])

        if "eligibility" in dict_str["clinical_study"]:
            useful_list.append(dict_str["clinical_study"]["eligibility"]
                               ["criteria"]["textblock"])

        return ','.join(useful_list)

    def buildModel(self):
        model = word2vec.Word2Vec(sentences=self.docs_list,
                                  min_count=5,
                                  workers=4)
        model.save("models/w2v.model")

    def run(self):
        count = 0

        for root, _, files in os.walk(self.xml_path, topdown=True):
            for filename in files:
                try:
                    file_path = os.path.join(root, filename)
                    json_data = self.xml2json(file_path)

                    useful_str = self.extractUseful(json_data)
                    useful_tokens = self.cleanData(useful_str)

                    self.docs_list.append(useful_tokens)
                except KeyboardInterrupt:
                    # 处理ctrl+C中断程序的情况
                    print('Interrupted')
                    try:
                        sys.exit(0)
                    except SystemExit:
                        os._exit(0)
                except Exception as e:
                    print(e)
                    with open('error_w2v_xml.txt', 'a') as f:
                        f.write(str(file_path) + '\n')
                    print('Error in ', str(filename))

                count += 1
                if count % 2000 == 0:
                    print("Already finished {}".format(count))

        print("Start build model")
        self.buildModel()
Example #22
0
class Query(object):
    def __init__(self):
        self.conf = Conf()
        self.query_xml_path = self.conf.getConfig("path", "query_xml_path")
        self.index_name = self.conf.getConfig("search", "index_name")
        self.doc_type = self.conf.getConfig("search", "doc_type")
        self.meshDict = self.getPickles(
            self.conf.getConfig("path", "dict_pickle_path"))
        self.es = Elasticsearch(timeout=30,
                                max_retries=10,
                                retry_on_timeout=True)
        # 设定es的超时时限为30秒,默认为10秒
        # 最大重试次数为10次
        # 防止因数据量太大导致的超时
        self.fields = self.conf.getImportant()
        self.extracted = []

        self.tokenizer = RegexpTokenizer(r"\w+")
        self.lem = WordNetLemmatizer()
        self.stemmer = PorterStemmer()
        self.stopwords = set(stopwords.words("english"))

    def getPickles(self, pickle_path):
        with open(pickle_path, "rb") as pf:
            data = pickle.load(pf)
            return data

    def xml2json(self, xmlpath):
        with open(xmlpath, "r") as xmlf:
            xml_str = xmlf.read()
            dict_str = xmltodict.parse(xml_str)
            # json_str = json.dumps(dict_str)
            return dict_str

    def extract_query(self):
        # 处理查询字段
        query_xml_data = self.xml2json(self.query_xml_path)["topics"]["topic"]
        for q in query_xml_data:
            new_query = {
                "id": q["@number"],
                "disease": q["disease"],
                "gene": q["gene"],
                "age": int(q["demographic"].split("-")[0]) * 365,
                "gender": q["demographic"].split(" ")[-1],
                "other": q["other"],
            }
            self.extracted.append(new_query)
        with open("query.json", "w") as f:
            f.write(json.dumps(self.extracted, indent=4))

    def cleanData(self, doc):
        raw_tokens = self.tokenizer.tokenize(doc.lower())
        lem_tokens = [self.stemmer.stem(token) for token in raw_tokens]
        lem_tokens = [
            token for token in lem_tokens
            if not token.isdigit() and len(token) > 1
        ]
        lem_tokens_without_stopword = filter(lambda i: i not in self.stopwords,
                                             lem_tokens)
        return list(lem_tokens_without_stopword)

    def query(self, single_query):
        gender_lst = ["male", "female"]
        must_not_gender = gender_lst[abs(
            gender_lst.index(single_query["gender"]) - 1)]
        # 性别分为male,female和All三种,得到不用的一种

        query_keywords = single_query["disease"].lower().split(" ")
        relevence = single_query["disease"].lower().split(" ")

        for qk in query_keywords:
            # qk = qk.lower()
            if qk in self.meshDict and qk not in [
                    "cancer",
                    "adenocarcinoma",
                    "carcinoma",
            ]:
                relevence += self.meshDict[qk]

        if "mesh_numbers" in relevence:
            relevence.remove("mesh_numbers")
        relevence = list(set(self.cleanData(" ".join(relevence))))

        print(single_query["gene"].replace("(",
                                           " ").replace(")",
                                                        " ").replace(",", ""))

        # for rl in relevence:
        #     if rl in ["mesh_numbers", "cancers", "non", "carcinomas", "tumors", "neoplasms", "pseudocysts", "cysts", "vipomas"]:
        #         # print(rl)
        #         relevence.remove(rl)

        relevence_str = " ".join(relevence)
        # print(relevence_str)

        # query_body = {
        #     "query": {
        #         "multi_match": {
        #             "query": (single_query["disease"] + ' ' + single_query["gene"].replace("(", " ").replace(")", " ").replace(",", "")).lower(),
        #             "type": "cross_fields",
        #             "fields": [
        #                 "brief_title",
        #                 "brief_summary",
        #                 "detailed_description",
        #                 "official_title",
        #                 "keyword",
        #                 "condition",
        #                 "eligibility.criteria.textblock",
        #             ],
        #         }
        #     },
        #     "size": 1000,
        # }
        # p5: 0.3586
        # p10:0.3138
        # p15:0.2704
        # with age: p5: 0.3586 p10:0.3172 p15:0.2805
        # with gender: p5: 0.3655 p10:0.3241 p15:0.2920

        query_body = {
            "query": {
                "multi_match": {
                    "query": (single_query["disease"] + ' ' +
                              single_query["gene"].replace("(", " ").replace(
                                  ")", " ").replace(",", "")).lower(),
                    "type":
                    "cross_fields",
                    "fields": [
                        "brief_title",
                        "brief_summary",
                        "detailed_description",
                        "official_title",
                        "keyword",
                        "condition",
                        "eligibility.criteria.textblock",
                    ],
                }
            },
            "size": 1000,
        }

        # query_body = {
        #     "query": {
        #         "multi_match": {
        #             "query": (single_query["gene"].replace("(", " ").replace(")", " ").replace(",", "")).lower(),
        #             "type": "cross_fields",
        #             "fields": [
        #                 "brief_title",
        #                 "brief_summary",
        #                 "detailed_description",
        #                 "official_title",
        #                 "keyword",
        #                 "condition",
        #                 "eligibility.criteria.textblock",
        #             ],
        #         }
        #     },
        #     "size": 1000,
        # }

        # query_standard = (single_query["gene"].replace("(", " ").replace(")", " ").replace(",", "")).lower()

        # query_body = {
        #     "query": {
        #         "bool": {
        #             "should": [
        #                 {"match": {"brief_title": {"query": query_standard, "boost": 2}}},
        #                 {"match": {"official_title": {"query": query_standard, "boost": 2}}},
        #                 {"match": {"brief_summary": {"query": query_standard, "boost": 1}}},
        #                 {"match": {"detailed_description": {"query": query_standard, "boost": 1}}},
        #                 {"match": {"eligibility.criteria.textblock": {"query": query_standard, "boost": 5}}},
        #                 {"match": {"keyword": {"query": query_standard, "boost": 6}}},
        #                 {"match": {"condition": {"query": query_standard, "boost": 3}}},
        #             ],
        #             "must_not": [{"term": {"gender": must_not_gender}}],
        #         },
        #     },
        #     "size": 1500,
        # }
        # 这里的querybody需要再认真设计下,不同的查询方式对最终结果的MAP和P@10影响很大

        query_result = self.es.search(index=self.index_name,
                                      doc_type=self.doc_type,
                                      body=query_body)["hits"]["hits"]
        # 获得查询结果

        # print(query_result)
        # score_max = query_result[0]["_score"]
        rank = 1
        with open("trec_eval/eval/r40.txt", "a") as f:
            try:
                for qr in query_result:
                    # 过滤年龄不符合的情况
                    if "eligibility" in qr["_source"]:
                        qr_eli = qr["_source"]["eligibility"]
                        if float(qr_eli["minimum_age"]) > single_query["age"] or\
                            single_query["age"] > float(qr_eli["maximum_age"]):
                            continue
                        if qr_eli["gender"].lower().strip() not in [
                                single_query["gender"].lower(), 'all', 'All'
                        ]:
                            print(qr_eli["gender"].lower())
                            print(single_query["gender"].lower())
                            continue

                    # 按照要求格式写文件
                    f.write("{} Q0 {} {} {} certRI\n".format(
                        single_query["id"],
                        qr["_source"]["id_info"],
                        rank,
                        round(qr["_score"], 4),
                    ))
                    rank += 1

                    if rank > 1000:
                        break

            except ValueError as _:
                print(qr["_source"]["eligibility"])
            except KeyError as ke:
                print(ke)
                print(qr["_source"])

        print("Relative docs:{}".format(rank - 1))

    def run(self):
        self.extract_query()
        for single_query in self.extracted:
            print(single_query)
            self.query(single_query)
Example #23
0
class DataPreprocessing(object):
    def __init__(self):
        self.conf = Conf()
        self.xml_path = self.conf.getConfig("path", "xml_path")
        self.index_name = self.conf.getConfig("search", "index_name")
        self.doc_type = self.conf.getConfig("search", "doc_type")
        # 读取设定

        self.tokenizer = RegexpTokenizer(r"\w+")
        self.lem = WordNetLemmatizer()
        self.stemmer = PorterStemmer()
        self.stopwords = set(stopwords.words("english"))

        self.es = Elasticsearch()
        self.fields = self.conf.getImportant()
        # self.mapping = self.conf.getMapping()

        # es的index和doc_type相当于mysql的db和table
        # 如果要创建的index已存在,则删除原有index
        if self.es.indices.exists(index=self.index_name):
            self.es.indices.delete(index=self.index_name)

        # 创建index
        self.es.indices.create(index=self.index_name)
        # self.es.indices.put_mapping(index=self.index_name, doc_type=self.doc_type, body=self.mapping)
        print("created index:" + self.index_name)

    def xml2json(self, xmlpath):
        # 将xml数据转化为dict
        with open(xmlpath, "r") as xmlf:
            xml_str = xmlf.read()
            dict_str = xmltodict.parse(xml_str)
            # json_str = json.dumps(dict_str)
            return dict_str

    def cleanData(self, doc):
        raw_tokens = self.tokenizer.tokenize(doc.lower())
        lem_tokens = [self.stemmer.stem(token) for token in raw_tokens]
        lem_tokens = [
            token for token in lem_tokens if not token.isdigit() and len(token) > 1
        ]
        lem_tokens_without_stopword = filter(
            lambda i: i not in self.stopwords, lem_tokens
        )
        return " ".join(list(lem_tokens_without_stopword))

    def clean(self, json_data):
        if json_data["brief_title"]:
            json_data["brief_title"] = self.cleanData(json_data["brief_title"])
        if json_data["official_title"]:
            json_data["official_title"] = self.cleanData(json_data["official_title"])
        if json_data["brief_summary"]:
            json_data["brief_summary"] = self.cleanData(json_data["brief_summary"])
        if json_data["detailed_description"]:
            json_data["detailed_description"] = self.cleanData(
                json_data["detailed_description"]
            )
        if json_data["eligibility"]["criteria"]["textblock"]:
            json_data["eligibility"]["criteria"]["textblock"] = self.cleanData(
                json_data["eligibility"]["criteria"]["textblock"]
            )
        return json_data

    def oswalk(self):
        count = 0

        # 遍历xml_path中所有文件夹下的所有文件
        for os_set in os.walk(self.xml_path, topdown=True):
            for filename in os_set[2]:
                try:
                    filepath = os.path.join(os_set[0], filename)
                    json_data = self.xml2json(filepath)

                    cleaned_json_data = {}

                    default_input_json = {
                        "id_info": "NCT00000000",
                        "brief_title": "",
                        "official_title": "",
                        "brief_summary": "",
                        "detailed_description": "",
                        "intervention": {"intervention_type": "", "intervention_name": ""},
                        "eligibility": {
                            "criteria": {"textblock": ""},
                            "gender": "All",
                            "minimum_age": "6 Months",
                            "maximum_age": "100 Years",
                            "healthy_volunteers": "No",
                        },
                        "keyword": [],
                        "intervention_browse": [],
                        "condition": [],
                    }

                    # 将important.txt中设定好的字段从dict中提取出来,填充到要存进es的dict中
                    for field in self.fields:
                        if field in json_data["clinical_study"]:
                            if len(self.fields[field]) > 1 and not isinstance(
                                json_data["clinical_study"][field], str
                            ):
                                cleaned_json_data[field] = json_data["clinical_study"][
                                    field
                                ][self.fields[field]]
                            else:
                                cleaned_json_data[field] = json_data["clinical_study"][
                                    field
                                ]
                        else:
                            cleaned_json_data[field] = default_input_json[field]
                            # if len(self.fields[field]) > 1 and not isinstance(
                            #     default_input_json[field], str
                            # ):
                            #     cleaned_json_data[field] = default_input_json[field][
                            #         self.fields[field]
                            #     ]
                            # else:
                            #     cleaned_json_data[field] = default_input_json[field]

                    # 处理年龄
                    # print(default_input_json)
                    # print(cleaned_json_data)
                    if "eligibility" in cleaned_json_data:
                        if "criteria" not in cleaned_json_data["eligibility"]:
                            cleaned_json_data["eligibility"]["criteria"] = {"textblock": ""}

                        for k in default_input_json["eligibility"]:
                            if k not in cleaned_json_data["eligibility"]:
                                cleaned_json_data["eligibility"][k] = default_input_json["eligibility"][k]

                        cleaned_json_data["eligibility"] = NormalAge(
                            cleaned_json_data["eligibility"]
                        )

                    cleaned_json_data = self.clean(cleaned_json_data)

                    # ----------------------------------
                    # print(cleaned_json_data)
                    # return
                    # ----------------------------------

                    # 插入数据
                    self.es.index(
                        index=self.index_name,
                        body=cleaned_json_data,
                        doc_type=self.doc_type,
                    )

                    count += 1
                    if count % 1000 == 0:
                        print("Already finished:" + str(count))
                except KeyboardInterrupt:
                    # 处理ctrl+C中断程序的情况
                    print("Interrupted")
                    try:
                        sys.exit(0)
                    except SystemExit:
                        os._exit(0)
                except Exception as e:
                    print(cleaned_json_data)
                    print(e)
                    with open("errorxml.txt", "a") as f:
                        f.write(str(filepath) + "\n")
                    print("Error in ", str(filename))
Example #24
0
                self.__process()

            # back to begining
            self.start_time = time.time()

    def __len__(self):
        return len(self.__event_list)


if __name__ == "__main__":

    Logger.set_verbose("debug")

    from Config import Conf

    conf = Conf()
    #conf.read(['../etc/agent/config.cfg'])
    conf.read(['/etc/ossim/agent/config.cfg'])

    from Event import Event

    event1 = Event()
    event1["src_ip"] = "127.0.0.1"
    event1["dst_ip"] = "127.0.0.1"
    event1["sensor"] = "127.0.0.1"
    event1["plugin_id"] = "6001"
    event1["plugin_sid"] = "1"
    event1["src_port"] = "22"
    event1["dst_port"] = "80"

    event2 = Event()
Example #25
0
        if not self.logger.handlers:
            fh_stream = logging.StreamHandler()
            fh_stream.setLevel(log_l[self.log_level])
            formatter = logging.Formatter(
                "%(asctime)s %(name)s %(levelname)s %(message)s")
            fh_stream.setFormatter(formatter)

            fh_file = logging.FileHandler(self.log_file)
            fh_file.setLevel(log_l[self.log_level])
            fh_file.setFormatter(formatter)

            self.logger.addHandler(fh_stream)
            self.logger.addHandler(fh_file)


log_path = Conf.get_logs()
current_time = datetime.datetime.now().strftime("%Y-%m-%d")
log_extension = Reader_config().get_conf_log_extension()
file_name = os.path.join(log_path, current_time + log_extension)

loglevel = Reader_config().get_conf_log_level()


def my_log(log_name=__file__):
    return Logs(log_file=file_name, log_name=log_name,
                log_level=loglevel).logger


if __name__ == '__main__':
    my_log().debug("dsadasd")  # debug("dsadasd") 是日子级别 和描述,用于日志模版
Example #26
0
class Monitor:

    def __init__(self, plugin, watch_rule):
        self.plugin = plugin
        self.options = CommandLineOptions().get_options()

        # read configuration
        self._conf = Conf()
        if self.options.config_file:
            conffile = self.options.config_file
        else:
            conffile = self._conf.DEFAULT_CONFIG_FILE
        self._conf.read([conffile],'latin1')
        self.watch_rule = watch_rule
        groups =  self.watch_rule.dict()
        for item,value in groups.iteritems():
            if item in self.watch_rule.EVENT_BASE64:
                groups[item] = b64decode(value)
        self.queries = \
            self.get_replaced_values('query',groups)
        self.regexps = \
            self.get_replaced_values('regexp', groups)
        self.results = \
            self.get_replaced_values('result', groups)
        self.initial_time = int(time.time()) # initial time at object call
        self.first_value = None

        if "tzone" in self.plugin.hitems("DEFAULT"):
            self.timezone = self.plugin.get("DEFAULT", "tzone")
            logger.debug("Plugin %s (%s) with specific tzone = %s" % \
                         (self.plugin.get("config", "name"),
                          self.plugin.get("DEFAULT", "plugin_id"),
                          self.timezone))
        else:
            self.timezone = self._conf.get("plugin-defaults", "tzone")

        self.__agenttimezone = None
        self.__EventTimeZone = None
        self.__systemTimeZone = None
        self.__set_system_tzone()
        self.__setTZData()


        self.open()

    def get_replaced_values(self, key, groups):

        # replace plugin variables with watch_rule data
        #
        # for example, given the following watch_rule:
        # 
        #     watch-rule plugin_id="2006" plugin_sid="1" condition="eq"
        #                value="1" from="192.168.6.64" to="192.168.6.63"
        #                port_from="5643" port_to="22"
        #
        #  and the following plugin query:
        #     query = {$from}:{$port_from} {$to}:{$port_to}
        #
        #  replace the variables with the watch-rule data:
        #     query = 192.168.6.64:5643 192.168.6.63:22

        values = {}
        for rule_name, rule in self.plugin.rules().iteritems():
            if key !='result':
                values[rule_name] = self.plugin.get_replace_value(rule[key], groups)
            else:
                values[rule_name]=rule[key]

        return values

    def _plugin_defaults(self, event, log):


        # get default values from config
        #
        ipv4_reg = "^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$"
        if self._conf.has_section("plugin-defaults"):

        # 1) date
            default_date_format = self._conf.get("plugin-defaults",
                                                 "date_format")
            if event["date"] is None and default_date_format:
                event["date"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
                event["fdate"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))


        # 2) sensor
            default_sensor = self._conf.get("plugin-defaults", "sensor")
            if event["sensor"] is None and default_sensor:
                event["sensor"] = default_sensor

        # 3) interface
            default_iface = self._conf.get("plugin-defaults", "interface")
            if event["interface"] is None and default_iface:
                event["interface"] = default_iface

        # 4) source ip
            if event["src_ip"] is None:
                event["src_ip"] = event["from"]

        # 5) dest ip
            if event["dst_ip"] is None:
                event["dst_ip"] = event["to"]

        # 6) protocol
            if event["protocol"] is None:
                event["protocol"] = "TCP"

        # 7) ports
            if event["src_port"] is None:
                event["src_port"] = event["port_from"]
            if event["dst_port"] is None:
                event["dst_port"] = event["port_to"]
            if event["src_port"] is None:
                event["src_port"] = 0
            if event["dst_port"] is None:
                event["dst_port"] = 0
            if event["src_ip"] is None:
                event["src_ip"] = event["sensor"]
            if event["dst_ip"] is None:
                event["dst_ip"] = event["sensor"]

        # 8) Time zone
            if 'tzone' in event.EVENT_ATTRS:
                Utils.normalizeToUTCDate(event, self.__EventTimeZone)

        #Check if valid ip, if not we put 0.0.0.0 in sensor field
        if event['src_ip'] is not None:
            if not re.match(ipv4_reg, event['src_ip']):
                data = event['src_ip']
                event['src_ip'] = '0.0.0.0'
                print ("Event's field src_ip (%s) is not a valid IP.v4/IP.v6 address, set it to default ip 0.0.0.0 and real data on userdata8" % (data))
                event['userdata8'] = data
        elif 'src_ip' in event.EVENT_ATTRS:
            event['src_ip'] = '0.0.0.0'
        #Check if valid ip, if not we put 0.0.0.0 in sensor field
        if event['dst_ip'] is not None:
            if not re.match(ipv4_reg, event['dst_ip']):
                data = event['dst_ip']
                print ("Event's field dst_ip (%s) is not a valid IP.v4 address, set it to default ip 0.0.0.0 and real data on userdata9" % (data))
                event['dst_ip'] = '0.0.0.0'
                event['userdata9'] = data
        elif 'dst_ip' in event.EVENT_ATTRS:
            event['dst_ip'] = '0.0.0.0'
        event["log"] = log





        # the type of this event should always be 'monitor'
        if event["type"] is None:
            event["type"] = 'monitor'

        # Clean up mess
        event["port_from"] = ""
        event["port_to"] = ""
        event["to"] = ""
        event["from"] = ""
        event["absolute"] = ""
        event["interval"] = ""

        return event


    def __set_system_tzone(self):
        """Sets the system timezone by reading the timezone """
        try:
            #read local timezone information. 
            f = open('/etc/timezone', 'r')
            used_tzone = f.readline().rstrip()
            f.close()
            if used_tzone not in all_timezones:
                logger.info("Warning, we can't read valid timezone data.Using GMT")
                used_tzone = 'GMT'
            self.systemtzone = used_tzone
        except Exception, e:
            used_tzone = 'GMT'
            logger.info("Warning, we can't read valid timezone data.Using GMT")
            # back to begining
            self.start_time = time.time()


    def __len__(self):
        return len(self.__event_list)



if __name__ == "__main__":

    Logger.set_verbose("debug")

    from Config import Conf

    conf = Conf()
    #conf.read(['../etc/agent/config.cfg'])
    conf.read(['/etc/ossim/agent/config.cfg'])

    from Event import Event

    event1 = Event()
    event1["src_ip"] = "127.0.0.1"
    event1["dst_ip"] = "127.0.0.1"
    event1["sensor"] = "127.0.0.1"
    event1["plugin_id"] = "6001"
    event1["plugin_sid"] = "1"
    event1["src_port"] = "22"
    event1["dst_port"] = "80"

    event2 = Event()
Example #28
0
class Agent():

    def __init__(self):
        self.options = CommandLineOptions().get_options()
        self.server_connection  = None
        self.send_packet_queue = Queue.Queue(maxsize=0)
        self.recv_packet_queue = Queue.Queue(maxsize=0)
        self.config = Conf()
        self.config.read([os.path.join(os.path.dirname(os.path.realpath(__file__)),"config.cfg")])

    
    def working(self):
        print "am working "
        
    def get_bson_packet(self,buffer_data,len):
        if len <4 :
          return None,buffer_data
        message_length, = unpack('<L', buffer_data[0:4])
        if len < message_length:
            return None,buffer_data
        
        return buffer_data[0:message_length],buffer_data[message_length:len]
    
    def __start_agent(self):
       server_ip = self.config.get("server","ip")
       server_port = int(self.config.get("server","port"))
       sensor_id= self.config.get("agent","id")
       
       priority = 1
       system_id_file = ""   
 
       Worker(self.recv_packet_queue,self.send_packet_queue).start()
      
       self.server_connection = ServerConn( server_ip, server_port, priority, sensor_id,system_id_file)
       server_socket = self.server_connection.connect()
       if server_socket == None :
           print "connect failed !"
           exit(0);
           
       Sender(self.send_packet_queue,self.server_connection).start()
       Heartbeat(self.send_packet_queue).start()
       
       server_socket.setblocking(False)
       read_buff = ""
       print "===="
       while True:
          readable, writable, exceptional = select.select([self.server_connection.get_connectsocket()], [], [self.server_connection.get_connectsocket()])
          #handle read event
          try:
              if server_socket in readable :
                  #construct packet
                  chunk= server_socket.recv(1024*1024)
                  read_buff += chunk
                  read_buff_len = len(read_buff)
                  print read_buff_len
                  while( read_buff_len > 4) :
                     packet, read_buff = self.get_bson_packet(read_buff,read_buff_len)
                     if packet:
                       print "push a packet"
                       self.recv_packet_queue.put(packet)
                       read_buff_len = len(read_buff)
                     else :
                       break
                     
          except  Exception as e:
               print e
                 
                 
          for s in exceptional:
              print "socket --has exceptional"
       
       
    def start(self):
       self.__start_agent()