def __init__(self): self.conf = Conf() self.xml_path = self.conf.getConfig("path", "xml_path") self.index_name = self.conf.getConfig("search", "index_name") self.doc_type = self.conf.getConfig("search", "doc_type") # 读取设定 self.tokenizer = RegexpTokenizer(r"\w+") self.lem = WordNetLemmatizer() self.stemmer = PorterStemmer() self.stopwords = set(stopwords.words("english")) self.es = Elasticsearch() self.fields = self.conf.getImportant() # self.mapping = self.conf.getMapping() # es的index和doc_type相当于mysql的db和table # 如果要创建的index已存在,则删除原有index if self.es.indices.exists(index=self.index_name): self.es.indices.delete(index=self.index_name) # 创建index self.es.indices.create(index=self.index_name) # self.es.indices.put_mapping(index=self.index_name, doc_type=self.doc_type, body=self.mapping) print("created index:" + self.index_name)
def __init__(self): self.options = CommandLineOptions().get_options() self.server_connection = None self.send_packet_queue = Queue.Queue(maxsize=0) self.recv_packet_queue = Queue.Queue(maxsize=0) self.config = Conf() self.config.read([os.path.join(os.path.dirname(os.path.realpath(__file__)),"config.cfg")])
def run(self): conf = Conf() conf.read(['config.cfg'], check_neededEntries=False) output = OutputDB(conf) sep = SnortEventsParser(output) sep.process() print conf
def __init__(self): self.tokenizer = RegexpTokenizer(r'\w+') self.lem = WordNetLemmatizer() self.stopwords = set(stopwords.words('english')) self.dict = None self.corpus = None self.bm25model = None self.docs_list = [] self.conf = Conf() self.xml_path = self.conf.getConfig('path', 'xml_path')
class IRSearch(object): def __init__(self): self.conf = Conf() self.xml_path = self.conf.getConfig('path', 'xml_path') self.index_name = self.conf.getConfig('search', 'index_name') self.doc_type = self.conf.getConfig('search', 'doc_type') self.es = Elasticsearch() self.search_body = {} self.search_type_support = ['match_all', 'term', 'terms', 'match', 'multi_match', 'bool', 'range', 'prefix', 'wildcard'] self.search_type_model = self.conf.getSeachModel() self.conn = ES('127.0.0.1:9200') self.search_result = None self.conn.default_indices = [self.index_name] def makeQuery(self, searchtype, searchfield, keyword, is_sort=False, is_aggs=False, is_multi_match=False, use_bool=""): if searchtype not in self.search_type_support: print('Ops, your search type is not supported') print('Supported search types:\n') print(self.search_type_support) return self.search_body = self.search_type_model[searchtype] if is_multi_match: self.search_body["query"][searchtype] = { "query": keyword, "fields": searchfield } elif use_bool: self.search_body["query"][searchtype][use_bool] = [{ "term": { searchfield: keyword } }] else: self.search_body["query"][searchtype][searchfield] = keyword print(self.search_body) return self.search_body # I don't know what I am doing because I'm an idiot. def Query(self, searchtype, searchfield, keyword, is_sort=False, is_aggs=False, is_multi_match=False, use_bool=""): query_body = self.makeQuery( searchtype, searchfield, keyword, is_sort, is_aggs, is_multi_match, use_bool) result = self.es.search(index=self.index_name, doc_type=self.doc_type, body=query_body) return result def querySingle(self, searchfield, keyword): q = TermQuery(searchfield, keyword) self.search_result = self.conn.search(query=q)
def __init__(self): self.conf = Conf() self.xml_path = self.conf.getConfig('path', 'xml_path') self.index_name = self.conf.getConfig('search', 'index_name') self.doc_type = self.conf.getConfig('search', 'doc_type') self.es = Elasticsearch() self.search_body = {} self.search_type_support = ['match_all', 'term', 'terms', 'match', 'multi_match', 'bool', 'range', 'prefix', 'wildcard'] self.search_type_model = self.conf.getSeachModel() self.conn = ES('127.0.0.1:9200') self.search_result = None self.conn.default_indices = [self.index_name]
class Test(object): def __init__(self): self.conf = Conf() self.xml_path = self.conf.getConfig("path", "xml_path") self.index_name = self.conf.getConfig("search", "index_name") self.doc_type = self.conf.getConfig("search", "doc_type") self.es = Elasticsearch(timeout=30, max_retries=10, retry_on_timeout=True) self.search_body = {} self.search_type_support = [ "match_all", "term", "terms", "match", "multi_match", "bool", "range", "prefix", "wildcard", ] self.search_type_model = self.conf.getSeachModel() def getCount(self): print(self.es.count(index=self.index_name, doc_type=self.doc_type)) def searchSingle(self): res = self.es.search( index=self.index_name, doc_type=self.doc_type, body={ "query": {"match": {"id_info": "NCT02065063"}}, "size": 10000, }, ) # body={"query": {"match": {"detailed_description": "carcinoma"}}}, # body={"query": {"match": {"id_info": "NCT00001431"}}}, for r in res["hits"]["hits"]: print(r["_source"]) with open("carcinoma", 'a') as f: f.write("{}\n".format(r["_source"]["id_info"])) def getPickles(self, pickle_path): with open(pickle_path, 'rb') as pf: data = pickle.load(pf) # pprint.pprint(data) return data
class OutputServer(OutputPlugins): def __init__(self, conn): logger.info("Added Server output (%s:%s)"%(conn.get_server_ip(), conn.get_server_port())) self.conn=conn self.activated=True self.send_events=False self.conf=Conf() self.options=CommandLineOptions().get_options() if self.options.config_file: conffile=self.options.config_file else: conffile=self.conf.DEFAULT_CONFIG_FILE self.conf.read([conffile],False) if self.conf.has_section("output-server"): if self.conf.getboolean("output-server", "send_events"): self.send_events=True def event(self, e): if self.activated and self.send_events: try: if self.conn.get_is_alive(): self.conn.send(str(e)) except: return def plugin_state(self, msg): if self.activated: try: self.conn.send(msg) except: return def shutdown(self): self.conn.close() self.activated=False
def __init__(self): self.conf = Conf() self.xml_path = self.conf.getConfig("path", "xml_path") self.index_name = self.conf.getConfig("search", "index_name") self.doc_type = self.conf.getConfig("search", "doc_type") self.es = Elasticsearch(timeout=30, max_retries=10, retry_on_timeout=True) self.search_body = {} self.search_type_support = [ "match_all", "term", "terms", "match", "multi_match", "bool", "range", "prefix", "wildcard", ] self.search_type_model = self.conf.getSeachModel()
def __init__(self, conn): logger.info("Added Server output (%s:%s)" % (conn.ip, conn.port)) self.conn = conn self.activated = True self.send_events = False self.conf = Conf() self.options = CommandLineOptions().get_options() if self.options.config_file: conffile = self.options.config_file else: conffile = self.conf.DEFAULT_CONFIG_FILE self.conf.read([conffile], 'latin1') if self.conf.has_section("output-server"): if self.conf.getboolean("output-server", "send_events"): self.send_events = True
class OutputServer(OutputPlugins): def __init__(self, conn): logger.info("Added Server output (%s:%s)" % (conn.get_server_ip(), conn.get_server_port())) self.conn = conn self.activated = True self.send_events = False self.conf = Conf() self.options = CommandLineOptions().get_options() if self.options.config_file: conffile = self.options.config_file else: conffile = self.conf.DEFAULT_CONFIG_FILE self.conf.read([conffile], 'latin1') if self.conf.has_section("output-server"): if self.conf.getboolean("output-server", "send_events"): self.send_events = True def event(self, e): if self.activated and self.send_events: try: if self.conn.get_is_alive(): self.conn.send(str(e)) except: return def plugin_state(self, msg): if self.activated: try: self.conn.send(msg) except: return def shutdown(self): self.conn.close() self.activated = False
def __init__(self): self.conf = Conf() self.query_xml_path = self.conf.getConfig("path", "query_xml_path") self.index_name = self.conf.getConfig("search", "index_name") self.doc_type = self.conf.getConfig("search", "doc_type") self.meshDict = self.getPickles( self.conf.getConfig("path", "dict_pickle_path")) self.es = Elasticsearch(timeout=30, max_retries=10, retry_on_timeout=True) # 设定es的超时时限为30秒,默认为10秒 # 最大重试次数为10次 # 防止因数据量太大导致的超时 self.fields = self.conf.getImportant() self.extracted = [] self.tokenizer = RegexpTokenizer(r"\w+") self.lem = WordNetLemmatizer() self.stemmer = PorterStemmer() self.stopwords = set(stopwords.words("english"))
def __init__(self, plugin, watch_rule): self.plugin = plugin self.options = CommandLineOptions().get_options() # read configuration self._conf = Conf() if self.options.config_file: conffile = self.options.config_file else: conffile = self._conf.DEFAULT_CONFIG_FILE self._conf.read([conffile], 'latin1') self.watch_rule = watch_rule groups = self.watch_rule.dict() for item, value in groups.iteritems(): if item in self.watch_rule.EVENT_BASE64: groups[item] = b64decode(value) self.queries = \ self.get_replaced_values('query',groups) self.regexps = \ self.get_replaced_values('regexp', groups) self.results = \ self.get_replaced_values('result', groups) self.initial_time = int(time.time()) # initial time at object call self.first_value = None if "tzone" in self.plugin.hitems("DEFAULT"): self.timezone = self.plugin.get("DEFAULT", "tzone") logger.debug("Plugin %s (%s) with specific tzone = %s" % \ (self.plugin.get("config", "name"), self.plugin.get("DEFAULT", "plugin_id"), self.timezone)) else: self.timezone = self._conf.get("plugin-defaults", "tzone") self.__agenttimezone = None self.__EventTimeZone = None self.__systemTimeZone = None self.__set_system_tzone() self.__setTZData() self.open()
def __init__(self, plugin, watch_rule): self.plugin = plugin self.options = CommandLineOptions().get_options() # read configuration self._conf = Conf() if self.options.config_file: conffile = self.options.config_file else: conffile = self._conf.DEFAULT_CONFIG_FILE self._conf.read([conffile], False) self.watch_rule = watch_rule self.queries = \ self.get_replaced_values('query', self.watch_rule.dict()) self.regexps = \ self.get_replaced_values('regexp', self.watch_rule.dict()) self.results = \ self.get_replaced_values('result', self.watch_rule.dict()) self.initial_time = int(time.time()) # initial time at object call self.first_value = None self.open()
def __init__(self, conn): logger.info("Added Server output (%s:%s)"%(conn.get_server_ip(), conn.get_server_port())) self.conn=conn self.activated=True self.send_events=False self.conf=Conf() self.options=CommandLineOptions().get_options() if self.options.config_file: conffile=self.options.config_file else: conffile=self.conf.DEFAULT_CONFIG_FILE self.conf.read([conffile],False) if self.conf.has_section("output-server"): if self.conf.getboolean("output-server", "send_events"): self.send_events=True
def __init__(self, plugin, watch_rule): self.plugin = plugin self.options = CommandLineOptions().get_options() # read configuration self._conf = Conf() if self.options.config_file: conffile = self.options.config_file else: conffile = self._conf.DEFAULT_CONFIG_FILE self._conf.read([conffile],'latin1') self.watch_rule = watch_rule groups = self.watch_rule.dict() for item,value in groups.iteritems(): if item in self.watch_rule.EVENT_BASE64: groups[item] = b64decode(value) self.queries = \ self.get_replaced_values('query',groups) self.regexps = \ self.get_replaced_values('regexp', groups) self.results = \ self.get_replaced_values('result', groups) self.initial_time = int(time.time()) # initial time at object call self.first_value = None if "tzone" in self.plugin.hitems("DEFAULT"): self.timezone = self.plugin.get("DEFAULT", "tzone") logger.debug("Plugin %s (%s) with specific tzone = %s" % \ (self.plugin.get("config", "name"), self.plugin.get("DEFAULT", "plugin_id"), self.timezone)) else: self.timezone = self._conf.get("plugin-defaults", "tzone") self.__agenttimezone = None self.__EventTimeZone = None self.__systemTimeZone = None self.__set_system_tzone() self.__setTZData() self.open()
def __init__(self, plugin, watch_rule): self.plugin = plugin self.options = CommandLineOptions().get_options() # read configuration self._conf = Conf() if self.options.config_file: conffile = self.options.config_file else: conffile = self._conf.DEFAULT_CONFIG_FILE self._conf.read([conffile],False) self.watch_rule = watch_rule self.queries = \ self.get_replaced_values('query', self.watch_rule.dict()) self.regexps = \ self.get_replaced_values('regexp', self.watch_rule.dict()) self.results = \ self.get_replaced_values('result', self.watch_rule.dict()) self.initial_time = int(time.time()) # initial time at object call self.first_value = None self.open()
class Monitor: def __init__(self, plugin, watch_rule): self.plugin = plugin self.options = CommandLineOptions().get_options() # read configuration self._conf = Conf() if self.options.config_file: conffile = self.options.config_file else: conffile = self._conf.DEFAULT_CONFIG_FILE self._conf.read([conffile], 'latin1') self.watch_rule = watch_rule groups = self.watch_rule.dict() for item, value in groups.iteritems(): if item in self.watch_rule.EVENT_BASE64: groups[item] = b64decode(value) self.queries = \ self.get_replaced_values('query',groups) self.regexps = \ self.get_replaced_values('regexp', groups) self.results = \ self.get_replaced_values('result', groups) self.initial_time = int(time.time()) # initial time at object call self.first_value = None if "tzone" in self.plugin.hitems("DEFAULT"): self.timezone = self.plugin.get("DEFAULT", "tzone") logger.debug("Plugin %s (%s) with specific tzone = %s" % \ (self.plugin.get("config", "name"), self.plugin.get("DEFAULT", "plugin_id"), self.timezone)) else: self.timezone = self._conf.get("plugin-defaults", "tzone") self.__agenttimezone = None self.__EventTimeZone = None self.__systemTimeZone = None self.__set_system_tzone() self.__setTZData() self.open() def get_replaced_values(self, key, groups): # replace plugin variables with watch_rule data # # for example, given the following watch_rule: # # watch-rule plugin_id="2006" plugin_sid="1" condition="eq" # value="1" from="192.168.6.64" to="192.168.6.63" # port_from="5643" port_to="22" # # and the following plugin query: # query = {$from}:{$port_from} {$to}:{$port_to} # # replace the variables with the watch-rule data: # query = 192.168.6.64:5643 192.168.6.63:22 values = {} for rule_name, rule in self.plugin.rules().iteritems(): if key != 'result': values[rule_name] = self.plugin.get_replace_value( rule[key], groups) else: values[rule_name] = rule[key] return values def _plugin_defaults(self, event, log): # get default values from config # ipv4_reg = "^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$" if self._conf.has_section("plugin-defaults"): # 1) date default_date_format = self._conf.get("plugin-defaults", "date_format") if event["date"] is None and default_date_format: event["date"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) event["fdate"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) # 2) sensor default_sensor = self._conf.get("plugin-defaults", "sensor") if event["sensor"] is None and default_sensor: event["sensor"] = default_sensor # 3) interface default_iface = self._conf.get("plugin-defaults", "interface") if event["interface"] is None and default_iface: event["interface"] = default_iface # 4) source ip if event["src_ip"] is None: event["src_ip"] = event["from"] # 5) dest ip if event["dst_ip"] is None: event["dst_ip"] = event["to"] # 6) protocol if event["protocol"] is None: event["protocol"] = "TCP" # 7) ports if event["src_port"] is None: event["src_port"] = event["port_from"] if event["dst_port"] is None: event["dst_port"] = event["port_to"] if event["src_port"] is None: event["src_port"] = 0 if event["dst_port"] is None: event["dst_port"] = 0 if event["src_ip"] is None: event["src_ip"] = event["sensor"] if event["dst_ip"] is None: event["dst_ip"] = event["sensor"] # 8) Time zone if 'tzone' in event.EVENT_ATTRS: Utils.normalizeToUTCDate(event, self.__EventTimeZone) #Check if valid ip, if not we put 0.0.0.0 in sensor field if event['src_ip'] is not None: if not re.match(ipv4_reg, event['src_ip']): data = event['src_ip'] event['src_ip'] = '0.0.0.0' print( "Event's field src_ip (%s) is not a valid IP.v4/IP.v6 address, set it to default ip 0.0.0.0 and real data on userdata8" % (data)) event['userdata8'] = data elif 'src_ip' in event.EVENT_ATTRS: event['src_ip'] = '0.0.0.0' #Check if valid ip, if not we put 0.0.0.0 in sensor field if event['dst_ip'] is not None: if not re.match(ipv4_reg, event['dst_ip']): data = event['dst_ip'] print( "Event's field dst_ip (%s) is not a valid IP.v4 address, set it to default ip 0.0.0.0 and real data on userdata9" % (data)) event['dst_ip'] = '0.0.0.0' event['userdata9'] = data elif 'dst_ip' in event.EVENT_ATTRS: event['dst_ip'] = '0.0.0.0' event["log"] = log # the type of this event should always be 'monitor' if event["type"] is None: event["type"] = 'monitor' # Clean up mess event["port_from"] = "" event["port_to"] = "" event["to"] = "" event["from"] = "" event["absolute"] = "" event["interval"] = "" return event def __set_system_tzone(self): """Sets the system timezone by reading the timezone """ try: #read local timezone information. f = open('/etc/timezone', 'r') used_tzone = f.readline().rstrip() f.close() if used_tzone not in all_timezones: logger.info( "Warning, we can't read valid timezone data.Using GMT") used_tzone = 'GMT' self.systemtzone = used_tzone except Exception, e: used_tzone = 'GMT' logger.info("Warning, we can't read valid timezone data.Using GMT")
class Monitor: def __init__(self, plugin, watch_rule): self.plugin = plugin self.options = CommandLineOptions().get_options() # read configuration self._conf = Conf() if self.options.config_file: conffile = self.options.config_file else: conffile = self._conf.DEFAULT_CONFIG_FILE self._conf.read([conffile],False) self.watch_rule = watch_rule self.queries = \ self.get_replaced_values('query', self.watch_rule.dict()) self.regexps = \ self.get_replaced_values('regexp', self.watch_rule.dict()) self.results = \ self.get_replaced_values('result', self.watch_rule.dict()) self.initial_time = int(time.time()) # initial time at object call self.first_value = None self.open() def get_replaced_values(self, key, groups): # replace plugin variables with watch_rule data # # for example, given the following watch_rule: # # watch-rule plugin_id="2006" plugin_sid="1" condition="eq" # value="1" from="192.168.6.64" to="192.168.6.63" # port_from="5643" port_to="22" # # and the following plugin query: # query = {$from}:{$port_from} {$to}:{$port_to} # # replace the variables with the watch-rule data: # query = 192.168.6.64:5643 192.168.6.63:22 values = {} for rule_name, rule in self.plugin.rules().iteritems(): values[rule_name] = \ self.plugin.get_replace_value(rule[key], groups) return values def _plugin_defaults(self, event, log): # get default values from config # if self._conf.has_section("plugin-defaults"): # 1) date default_date_format = self._conf.get("plugin-defaults", "date_format") if event["date"] is None and default_date_format: event["date"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) event["fdate"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) # 2) sensor default_sensor = self._conf.get("plugin-defaults", "sensor") if event["sensor"] is None and default_sensor: event["sensor"] = default_sensor # 3) interface default_iface = self._conf.get("plugin-defaults", "interface") if event["interface"] is None and default_iface: event["interface"] = default_iface # 4) source ip if event["src_ip"] is None: event["src_ip"] = event["from"] # 5) dest ip if event["dst_ip"] is None: event["dst_ip"] = event["to"] # 6) protocol if event["protocol"] is None: event["protocol"] = "TCP" # 7) ports if event["src_port"] is None: event["src_port"] = event["port_from"] if event["dst_port"] is None: event["dst_port"] = event["port_to"] if event["src_port"] is None: event["src_port"] = 0 if event["dst_port"] is None: event["dst_port"] = 0 if event["src_ip"] is None: event["src_ip"] = event["sensor"] if event["dst_ip"] is None: event["dst_ip"] = event["sensor"] event["log"] = log # the type of this event should always be 'monitor' if event["type"] is None: event["type"] = 'monitor' # Clean up mess event["port_from"] = "" event["port_to"] = "" event["to"] = "" event["from"] = "" event["absolute"] = "" event["interval"] = "" return event # given the server's watch_rule, find what rule to apply def match_rule(self): plugin_sid = self.watch_rule['plugin_sid'] for rule_name, rule in self.plugin.rules().iteritems(): for sid in Config.split_sids(str(rule['sid'])): # sid=1,2-4,5 if str(plugin_sid) == str(sid) or str(sid).lower() == 'any': return rule_name return None # eval watch rule condition def eval_condition(self, cond, arg1, arg2, value): if type(arg1) is not int: try: arg1 = int(arg1) except ValueError: logger.warning( "value returned by monitor (arg1=%s) is not an integer" % \ str(arg1)) return False if type(arg2) is not int: try: arg2 = int(arg2) except ValueError: logger.warning( "value returned by monitor (arg2=%s) is not an integer" % \ str(arg2)) return False if type(value) is not int: try: value = int(value) except ValueError: logger.warning( "value returned by monitor (value=%s) is not an integer" % \ str(value)) return False logger.debug("Monitor expresion evaluation: " + \ "%s(arg2) <%s> %s(arg1) + %s(value)?" % \ (str(arg2), str(cond), str(arg1), str(value))) if cond == "eq": return (int(arg2) == int(arg1) + int(value)) elif cond == "ne": return (int(arg2) != int(arg1) + int(value)) elif cond == "gt": return (int(arg2) > int(arg1) + int(value)) elif cond == "ge": return (int(arg2) >= int(arg1) + int(value)) elif cond == "le": return (int(arg2) <= int(arg1) + int(value)) elif cond == "lt": return (int(arg2) < int(arg1) + int(value)) else: return False # given the watch rule, ask to Monitor and obtain a result # *must* be overriden in child classes: # different implementations for each type of monitor # (socket, database, etc.) def get_data(self, rule_name): pass # *must* be overriden in child classes: def open(self): pass # *must* be overriden in child classes: def close(self): pass # TODO: merge with ParserLog.feed() # def get_value(self, monitor_response, rule_name): value = None hash = {} count = 1 regexp = self.regexps[rule_name] pattern = re.compile(regexp, re.IGNORECASE | re.MULTILINE) # TODO: monitor_response could possibly be a list if isinstance(monitor_response, list): match = pattern.search(monitor_response[0]) else: match = pattern.search(monitor_response) if match is not None: groups = match.groups() for group in groups: # group by index () if group is None: group = '' hash.update({str(count): str(group)}) count += 1 # group by name (?P<name-of-group>) hash.update(match.groupdict()) else: return None # first, try getting substitution from the regular expresion syntax result = self.results[rule_name] value = self.plugin.get_replace_value(result, hash) try: val = int(value.split(".")[0]) except: return False return val # get a new value from monitor and compare with the first one # returns True if the condition apply, False in the other case def evaluate(self, rule_name): if self.first_value is None: logger.debug("Can not extract value (arg1) from monitor response or no initial value to compare with") return True value = None monitor_response = self.get_data(rule_name) if not monitor_response: logger.warning("No data received from monitor") return True else: value = self.get_value(monitor_response, rule_name) if value is None: return True #if not value: # continue if self.eval_condition(cond=self.watch_rule["condition"], arg1=self.first_value, arg2=value, value=int(self.watch_rule["value"])): self.watch_rule["type"] = "monitor" try: cond = self.watch_rule["condition"] arg1 = self.first_value arg2 = value value = int(self.watch_rule["value"]) comm = self.queries log = "Monitor Command: %s , Monitor expresion evaluation: %s(arg2) <%s> %s(arg1) + %s(value)? , Command Response: %s" % (str(comm), str(arg2), str(cond), str(arg1), str(value), monitor_response.replace("\n", "\r")) except: log = "Monitor Exception" self.watch_rule = self._plugin_defaults(self.watch_rule, log) Output.event(self.watch_rule) Stats.new_event(self.watch_rule) return True logger.debug("No data matching the watch-rule received from monitor") return False # *may* be overriden in child classes def process(self): # get the name of rule to apply rule_name = self.match_rule() if rule_name is not None: logger.info("Matched rule: [%s]" % (rule_name)) # get data from plugin (first time) if self.first_value is None: # <absolute> is "no" by default # the absence of <interval> implies that <absolute> is "yes" if self.watch_rule['absolute'] in ('yes', 'true') or\ not self.watch_rule['interval']: self.first_value = 0 else: monitor_response = self.get_data(rule_name) if not monitor_response: self.first_value = 0 for resp in monitor_response: if resp: self.first_value = self.get_value(resp, rule_name) if self.first_value == False: self.first_value = 0 # get current time current_time = int(time.time()) # Three posibilities: # # 1) no interval specified, no need to wait if not self.watch_rule.dict().has_key('interval'): self.evaluate(rule_name) return True # 1) no interval specified, no need to wait elif not self.watch_rule['interval']: self.evaluate(rule_name) return True # 2) we are in time, check the result of the watch-rule elif (self.initial_time + \ int(self.watch_rule["interval"]) > current_time): return self.evaluate(rule_name) # 3) we are out of time else: self.evaluate(rule_name) return True
class Monitor: def __init__(self, plugin, watch_rule): self.plugin = plugin self.options = CommandLineOptions().get_options() # read configuration self._conf = Conf() if self.options.config_file: conffile = self.options.config_file else: conffile = self._conf.DEFAULT_CONFIG_FILE self._conf.read([conffile], False) self.watch_rule = watch_rule self.queries = \ self.get_replaced_values('query', self.watch_rule.dict()) self.regexps = \ self.get_replaced_values('regexp', self.watch_rule.dict()) self.results = \ self.get_replaced_values('result', self.watch_rule.dict()) self.initial_time = int(time.time()) # initial time at object call self.first_value = None self.open() def get_replaced_values(self, key, groups): # replace plugin variables with watch_rule data # # for example, given the following watch_rule: # # watch-rule plugin_id="2006" plugin_sid="1" condition="eq" # value="1" from="192.168.6.64" to="192.168.6.63" # port_from="5643" port_to="22" # # and the following plugin query: # query = {$from}:{$port_from} {$to}:{$port_to} # # replace the variables with the watch-rule data: # query = 192.168.6.64:5643 192.168.6.63:22 values = {} for rule_name, rule in self.plugin.rules().iteritems(): values[rule_name] = \ self.plugin.get_replace_value(rule[key], groups) return values def _plugin_defaults(self, event, log): # get default values from config # if self._conf.has_section("plugin-defaults"): # 1) date default_date_format = self._conf.get("plugin-defaults", "date_format") if event["date"] is None and default_date_format: event["date"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) event["fdate"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) # 2) sensor default_sensor = self._conf.get("plugin-defaults", "sensor") if event["sensor"] is None and default_sensor: event["sensor"] = default_sensor # 3) interface default_iface = self._conf.get("plugin-defaults", "interface") if event["interface"] is None and default_iface: event["interface"] = default_iface # 4) source ip if event["src_ip"] is None: event["src_ip"] = event["from"] # 5) dest ip if event["dst_ip"] is None: event["dst_ip"] = event["to"] # 6) protocol if event["protocol"] is None: event["protocol"] = "TCP" # 7) ports if event["src_port"] is None: event["src_port"] = event["port_from"] if event["dst_port"] is None: event["dst_port"] = event["port_to"] if event["src_port"] is None: event["src_port"] = 0 if event["dst_port"] is None: event["dst_port"] = 0 if event["src_ip"] is None: event["src_ip"] = event["sensor"] if event["dst_ip"] is None: event["dst_ip"] = event["sensor"] event["log"] = log # the type of this event should always be 'monitor' if event["type"] is None: event["type"] = 'monitor' # Clean up mess event["port_from"] = "" event["port_to"] = "" event["to"] = "" event["from"] = "" event["absolute"] = "" event["interval"] = "" return event # given the server's watch_rule, find what rule to apply def match_rule(self): plugin_sid = self.watch_rule['plugin_sid'] for rule_name, rule in self.plugin.rules().iteritems(): for sid in Config.split_sids(str(rule['sid'])): # sid=1,2-4,5 if str(plugin_sid) == str(sid) or str(sid).lower() == 'any': return rule_name return None # eval watch rule condition def eval_condition(self, cond, arg1, arg2, value): if type(arg1) is not int: try: arg1 = int(arg1) except ValueError: logger.warning( "value returned by monitor (arg1=%s) is not an integer" % \ str(arg1)) return False if type(arg2) is not int: try: arg2 = int(arg2) except ValueError: logger.warning( "value returned by monitor (arg2=%s) is not an integer" % \ str(arg2)) return False if type(value) is not int: try: value = int(value) except ValueError: logger.warning( "value returned by monitor (value=%s) is not an integer" % \ str(value)) return False logger.debug("Monitor expresion evaluation: " + \ "%s(arg2) <%s> %s(arg1) + %s(value)?" % \ (str(arg2), str(cond), str(arg1), str(value))) if cond == "eq": return (int(arg2) == int(arg1) + int(value)) elif cond == "ne": return (int(arg2) != int(arg1) + int(value)) elif cond == "gt": return (int(arg2) > int(arg1) + int(value)) elif cond == "ge": return (int(arg2) >= int(arg1) + int(value)) elif cond == "le": return (int(arg2) <= int(arg1) + int(value)) elif cond == "lt": return (int(arg2) < int(arg1) + int(value)) else: return False # given the watch rule, ask to Monitor and obtain a result # *must* be overriden in child classes: # different implementations for each type of monitor # (socket, database, etc.) def get_data(self, rule_name): pass # *must* be overriden in child classes: def open(self): pass # *must* be overriden in child classes: def close(self): pass # TODO: merge with ParserLog.feed() # def get_value(self, monitor_response, rule_name): value = None hash = {} count = 1 regexp = self.regexps[rule_name] pattern = re.compile(regexp, re.IGNORECASE | re.MULTILINE) # TODO: monitor_response could possibly be a list if isinstance(monitor_response, list): match = pattern.search(monitor_response[0]) else: match = pattern.search(monitor_response) if match is not None: groups = match.groups() for group in groups: # group by index () if group is None: group = '' hash.update({str(count): str(group)}) count += 1 # group by name (?P<name-of-group>) hash.update(match.groupdict()) else: return None # first, try getting substitution from the regular expresion syntax result = self.results[rule_name] value = self.plugin.get_replace_value(result, hash) try: val = int(value.split(".")[0]) except: return False return val # get a new value from monitor and compare with the first one # returns True if the condition apply, False in the other case def evaluate(self, rule_name): if self.first_value is None: logger.debug( "Can not extract value (arg1) from monitor response or no initial value to compare with" ) return True value = None monitor_response = self.get_data(rule_name) if not monitor_response: logger.warning("No data received from monitor") return True else: value = self.get_value(monitor_response, rule_name) if value is None: return True #if not value: # continue if self.eval_condition(cond=self.watch_rule["condition"], arg1=self.first_value, arg2=value, value=int(self.watch_rule["value"])): self.watch_rule["type"] = "monitor" try: cond = self.watch_rule["condition"] arg1 = self.first_value arg2 = value value = int(self.watch_rule["value"]) comm = self.queries log = "Monitor Command: %s , Monitor expresion evaluation: %s(arg2) <%s> %s(arg1) + %s(value)? , Command Response: %s" % ( str(comm), str(arg2), str(cond), str(arg1), str(value), monitor_response.replace("\n", "\r")) except: log = "Monitor Exception" self.watch_rule = self._plugin_defaults(self.watch_rule, log) Output.event(self.watch_rule) Stats.new_event(self.watch_rule) return True logger.debug("No data matching the watch-rule received from monitor") return False # *may* be overriden in child classes def process(self): # get the name of rule to apply rule_name = self.match_rule() if rule_name is not None: logger.info("Matched rule: [%s]" % (rule_name)) # get data from plugin (first time) if self.first_value is None: # <absolute> is "no" by default # the absence of <interval> implies that <absolute> is "yes" if self.watch_rule['absolute'] in ('yes', 'true') or\ not self.watch_rule['interval']: self.first_value = 0 else: monitor_response = self.get_data(rule_name) if not monitor_response: self.first_value = 0 for resp in monitor_response: if resp: self.first_value = self.get_value(resp, rule_name) if self.first_value == False: self.first_value = 0 # get current time current_time = int(time.time()) # Three posibilities: # # 1) no interval specified, no need to wait if not self.watch_rule.dict().has_key('interval'): self.evaluate(rule_name) return True # 1) no interval specified, no need to wait elif not self.watch_rule['interval']: self.evaluate(rule_name) return True # 2) we are in time, check the result of the watch-rule elif (self.initial_time + \ int(self.watch_rule["interval"]) > current_time): return self.evaluate(rule_name) # 3) we are out of time else: self.evaluate(rule_name) return True
class W2V(object): def __init__(self): self.tokenizer = RegexpTokenizer(r'\w+') self.lem = WordNetLemmatizer() self.stopwords = set(stopwords.words('english')) self.dict = None self.corpus = None self.bm25model = None self.docs_list = [] self.conf = Conf() self.xml_path = self.conf.getConfig('path', 'xml_path') def cleanData(self, doc): raw_tokens = self.tokenizer.tokenize(doc.lower()) lem_tokens = [self.lem.lemmatize(token) for token in raw_tokens] lem_tokens_without_stopword = filter(lambda i: i not in self.stopwords, lem_tokens) return list(lem_tokens_without_stopword) def xml2json(self, xmlpath): with open(xmlpath, "r") as xmlf: xml_str = xmlf.read() dict_str = xmltodict.parse(xml_str) # json_str = json.dumps(dict_str) return dict_str def extractUseful(self, dict_str): useful_list = [] if "official_title" in dict_str["clinical_study"]: useful_list.append(dict_str["clinical_study"]["official_title"]) else: useful_list.append(dict_str["clinical_study"]["brief_title"]) if "brief_summary" in dict_str["clinical_study"]: useful_list.append( dict_str["clinical_study"]["brief_summary"]["textblock"]) if "detailed_description" in dict_str["clinical_study"]: useful_list.append(dict_str["clinical_study"] ["detailed_description"]["textblock"]) if "eligibility" in dict_str["clinical_study"]: useful_list.append(dict_str["clinical_study"]["eligibility"] ["criteria"]["textblock"]) return ','.join(useful_list) def buildModel(self): model = word2vec.Word2Vec(sentences=self.docs_list, min_count=5, workers=4) model.save("models/w2v.model") def run(self): count = 0 for root, _, files in os.walk(self.xml_path, topdown=True): for filename in files: try: file_path = os.path.join(root, filename) json_data = self.xml2json(file_path) useful_str = self.extractUseful(json_data) useful_tokens = self.cleanData(useful_str) self.docs_list.append(useful_tokens) except KeyboardInterrupt: # 处理ctrl+C中断程序的情况 print('Interrupted') try: sys.exit(0) except SystemExit: os._exit(0) except Exception as e: print(e) with open('error_w2v_xml.txt', 'a') as f: f.write(str(file_path) + '\n') print('Error in ', str(filename)) count += 1 if count % 2000 == 0: print("Already finished {}".format(count)) print("Start build model") self.buildModel()
class Query(object): def __init__(self): self.conf = Conf() self.query_xml_path = self.conf.getConfig("path", "query_xml_path") self.index_name = self.conf.getConfig("search", "index_name") self.doc_type = self.conf.getConfig("search", "doc_type") self.meshDict = self.getPickles( self.conf.getConfig("path", "dict_pickle_path")) self.es = Elasticsearch(timeout=30, max_retries=10, retry_on_timeout=True) # 设定es的超时时限为30秒,默认为10秒 # 最大重试次数为10次 # 防止因数据量太大导致的超时 self.fields = self.conf.getImportant() self.extracted = [] self.tokenizer = RegexpTokenizer(r"\w+") self.lem = WordNetLemmatizer() self.stemmer = PorterStemmer() self.stopwords = set(stopwords.words("english")) def getPickles(self, pickle_path): with open(pickle_path, "rb") as pf: data = pickle.load(pf) return data def xml2json(self, xmlpath): with open(xmlpath, "r") as xmlf: xml_str = xmlf.read() dict_str = xmltodict.parse(xml_str) # json_str = json.dumps(dict_str) return dict_str def extract_query(self): # 处理查询字段 query_xml_data = self.xml2json(self.query_xml_path)["topics"]["topic"] for q in query_xml_data: new_query = { "id": q["@number"], "disease": q["disease"], "gene": q["gene"], "age": int(q["demographic"].split("-")[0]) * 365, "gender": q["demographic"].split(" ")[-1], "other": q["other"], } self.extracted.append(new_query) with open("query.json", "w") as f: f.write(json.dumps(self.extracted, indent=4)) def cleanData(self, doc): raw_tokens = self.tokenizer.tokenize(doc.lower()) lem_tokens = [self.stemmer.stem(token) for token in raw_tokens] lem_tokens = [ token for token in lem_tokens if not token.isdigit() and len(token) > 1 ] lem_tokens_without_stopword = filter(lambda i: i not in self.stopwords, lem_tokens) return list(lem_tokens_without_stopword) def query(self, single_query): gender_lst = ["male", "female"] must_not_gender = gender_lst[abs( gender_lst.index(single_query["gender"]) - 1)] # 性别分为male,female和All三种,得到不用的一种 query_keywords = single_query["disease"].lower().split(" ") relevence = single_query["disease"].lower().split(" ") for qk in query_keywords: # qk = qk.lower() if qk in self.meshDict and qk not in [ "cancer", "adenocarcinoma", "carcinoma", ]: relevence += self.meshDict[qk] if "mesh_numbers" in relevence: relevence.remove("mesh_numbers") relevence = list(set(self.cleanData(" ".join(relevence)))) print(single_query["gene"].replace("(", " ").replace(")", " ").replace(",", "")) # for rl in relevence: # if rl in ["mesh_numbers", "cancers", "non", "carcinomas", "tumors", "neoplasms", "pseudocysts", "cysts", "vipomas"]: # # print(rl) # relevence.remove(rl) relevence_str = " ".join(relevence) # print(relevence_str) # query_body = { # "query": { # "multi_match": { # "query": (single_query["disease"] + ' ' + single_query["gene"].replace("(", " ").replace(")", " ").replace(",", "")).lower(), # "type": "cross_fields", # "fields": [ # "brief_title", # "brief_summary", # "detailed_description", # "official_title", # "keyword", # "condition", # "eligibility.criteria.textblock", # ], # } # }, # "size": 1000, # } # p5: 0.3586 # p10:0.3138 # p15:0.2704 # with age: p5: 0.3586 p10:0.3172 p15:0.2805 # with gender: p5: 0.3655 p10:0.3241 p15:0.2920 query_body = { "query": { "multi_match": { "query": (single_query["disease"] + ' ' + single_query["gene"].replace("(", " ").replace( ")", " ").replace(",", "")).lower(), "type": "cross_fields", "fields": [ "brief_title", "brief_summary", "detailed_description", "official_title", "keyword", "condition", "eligibility.criteria.textblock", ], } }, "size": 1000, } # query_body = { # "query": { # "multi_match": { # "query": (single_query["gene"].replace("(", " ").replace(")", " ").replace(",", "")).lower(), # "type": "cross_fields", # "fields": [ # "brief_title", # "brief_summary", # "detailed_description", # "official_title", # "keyword", # "condition", # "eligibility.criteria.textblock", # ], # } # }, # "size": 1000, # } # query_standard = (single_query["gene"].replace("(", " ").replace(")", " ").replace(",", "")).lower() # query_body = { # "query": { # "bool": { # "should": [ # {"match": {"brief_title": {"query": query_standard, "boost": 2}}}, # {"match": {"official_title": {"query": query_standard, "boost": 2}}}, # {"match": {"brief_summary": {"query": query_standard, "boost": 1}}}, # {"match": {"detailed_description": {"query": query_standard, "boost": 1}}}, # {"match": {"eligibility.criteria.textblock": {"query": query_standard, "boost": 5}}}, # {"match": {"keyword": {"query": query_standard, "boost": 6}}}, # {"match": {"condition": {"query": query_standard, "boost": 3}}}, # ], # "must_not": [{"term": {"gender": must_not_gender}}], # }, # }, # "size": 1500, # } # 这里的querybody需要再认真设计下,不同的查询方式对最终结果的MAP和P@10影响很大 query_result = self.es.search(index=self.index_name, doc_type=self.doc_type, body=query_body)["hits"]["hits"] # 获得查询结果 # print(query_result) # score_max = query_result[0]["_score"] rank = 1 with open("trec_eval/eval/r40.txt", "a") as f: try: for qr in query_result: # 过滤年龄不符合的情况 if "eligibility" in qr["_source"]: qr_eli = qr["_source"]["eligibility"] if float(qr_eli["minimum_age"]) > single_query["age"] or\ single_query["age"] > float(qr_eli["maximum_age"]): continue if qr_eli["gender"].lower().strip() not in [ single_query["gender"].lower(), 'all', 'All' ]: print(qr_eli["gender"].lower()) print(single_query["gender"].lower()) continue # 按照要求格式写文件 f.write("{} Q0 {} {} {} certRI\n".format( single_query["id"], qr["_source"]["id_info"], rank, round(qr["_score"], 4), )) rank += 1 if rank > 1000: break except ValueError as _: print(qr["_source"]["eligibility"]) except KeyError as ke: print(ke) print(qr["_source"]) print("Relative docs:{}".format(rank - 1)) def run(self): self.extract_query() for single_query in self.extracted: print(single_query) self.query(single_query)
class DataPreprocessing(object): def __init__(self): self.conf = Conf() self.xml_path = self.conf.getConfig("path", "xml_path") self.index_name = self.conf.getConfig("search", "index_name") self.doc_type = self.conf.getConfig("search", "doc_type") # 读取设定 self.tokenizer = RegexpTokenizer(r"\w+") self.lem = WordNetLemmatizer() self.stemmer = PorterStemmer() self.stopwords = set(stopwords.words("english")) self.es = Elasticsearch() self.fields = self.conf.getImportant() # self.mapping = self.conf.getMapping() # es的index和doc_type相当于mysql的db和table # 如果要创建的index已存在,则删除原有index if self.es.indices.exists(index=self.index_name): self.es.indices.delete(index=self.index_name) # 创建index self.es.indices.create(index=self.index_name) # self.es.indices.put_mapping(index=self.index_name, doc_type=self.doc_type, body=self.mapping) print("created index:" + self.index_name) def xml2json(self, xmlpath): # 将xml数据转化为dict with open(xmlpath, "r") as xmlf: xml_str = xmlf.read() dict_str = xmltodict.parse(xml_str) # json_str = json.dumps(dict_str) return dict_str def cleanData(self, doc): raw_tokens = self.tokenizer.tokenize(doc.lower()) lem_tokens = [self.stemmer.stem(token) for token in raw_tokens] lem_tokens = [ token for token in lem_tokens if not token.isdigit() and len(token) > 1 ] lem_tokens_without_stopword = filter( lambda i: i not in self.stopwords, lem_tokens ) return " ".join(list(lem_tokens_without_stopword)) def clean(self, json_data): if json_data["brief_title"]: json_data["brief_title"] = self.cleanData(json_data["brief_title"]) if json_data["official_title"]: json_data["official_title"] = self.cleanData(json_data["official_title"]) if json_data["brief_summary"]: json_data["brief_summary"] = self.cleanData(json_data["brief_summary"]) if json_data["detailed_description"]: json_data["detailed_description"] = self.cleanData( json_data["detailed_description"] ) if json_data["eligibility"]["criteria"]["textblock"]: json_data["eligibility"]["criteria"]["textblock"] = self.cleanData( json_data["eligibility"]["criteria"]["textblock"] ) return json_data def oswalk(self): count = 0 # 遍历xml_path中所有文件夹下的所有文件 for os_set in os.walk(self.xml_path, topdown=True): for filename in os_set[2]: try: filepath = os.path.join(os_set[0], filename) json_data = self.xml2json(filepath) cleaned_json_data = {} default_input_json = { "id_info": "NCT00000000", "brief_title": "", "official_title": "", "brief_summary": "", "detailed_description": "", "intervention": {"intervention_type": "", "intervention_name": ""}, "eligibility": { "criteria": {"textblock": ""}, "gender": "All", "minimum_age": "6 Months", "maximum_age": "100 Years", "healthy_volunteers": "No", }, "keyword": [], "intervention_browse": [], "condition": [], } # 将important.txt中设定好的字段从dict中提取出来,填充到要存进es的dict中 for field in self.fields: if field in json_data["clinical_study"]: if len(self.fields[field]) > 1 and not isinstance( json_data["clinical_study"][field], str ): cleaned_json_data[field] = json_data["clinical_study"][ field ][self.fields[field]] else: cleaned_json_data[field] = json_data["clinical_study"][ field ] else: cleaned_json_data[field] = default_input_json[field] # if len(self.fields[field]) > 1 and not isinstance( # default_input_json[field], str # ): # cleaned_json_data[field] = default_input_json[field][ # self.fields[field] # ] # else: # cleaned_json_data[field] = default_input_json[field] # 处理年龄 # print(default_input_json) # print(cleaned_json_data) if "eligibility" in cleaned_json_data: if "criteria" not in cleaned_json_data["eligibility"]: cleaned_json_data["eligibility"]["criteria"] = {"textblock": ""} for k in default_input_json["eligibility"]: if k not in cleaned_json_data["eligibility"]: cleaned_json_data["eligibility"][k] = default_input_json["eligibility"][k] cleaned_json_data["eligibility"] = NormalAge( cleaned_json_data["eligibility"] ) cleaned_json_data = self.clean(cleaned_json_data) # ---------------------------------- # print(cleaned_json_data) # return # ---------------------------------- # 插入数据 self.es.index( index=self.index_name, body=cleaned_json_data, doc_type=self.doc_type, ) count += 1 if count % 1000 == 0: print("Already finished:" + str(count)) except KeyboardInterrupt: # 处理ctrl+C中断程序的情况 print("Interrupted") try: sys.exit(0) except SystemExit: os._exit(0) except Exception as e: print(cleaned_json_data) print(e) with open("errorxml.txt", "a") as f: f.write(str(filepath) + "\n") print("Error in ", str(filename))
self.__process() # back to begining self.start_time = time.time() def __len__(self): return len(self.__event_list) if __name__ == "__main__": Logger.set_verbose("debug") from Config import Conf conf = Conf() #conf.read(['../etc/agent/config.cfg']) conf.read(['/etc/ossim/agent/config.cfg']) from Event import Event event1 = Event() event1["src_ip"] = "127.0.0.1" event1["dst_ip"] = "127.0.0.1" event1["sensor"] = "127.0.0.1" event1["plugin_id"] = "6001" event1["plugin_sid"] = "1" event1["src_port"] = "22" event1["dst_port"] = "80" event2 = Event()
if not self.logger.handlers: fh_stream = logging.StreamHandler() fh_stream.setLevel(log_l[self.log_level]) formatter = logging.Formatter( "%(asctime)s %(name)s %(levelname)s %(message)s") fh_stream.setFormatter(formatter) fh_file = logging.FileHandler(self.log_file) fh_file.setLevel(log_l[self.log_level]) fh_file.setFormatter(formatter) self.logger.addHandler(fh_stream) self.logger.addHandler(fh_file) log_path = Conf.get_logs() current_time = datetime.datetime.now().strftime("%Y-%m-%d") log_extension = Reader_config().get_conf_log_extension() file_name = os.path.join(log_path, current_time + log_extension) loglevel = Reader_config().get_conf_log_level() def my_log(log_name=__file__): return Logs(log_file=file_name, log_name=log_name, log_level=loglevel).logger if __name__ == '__main__': my_log().debug("dsadasd") # debug("dsadasd") 是日子级别 和描述,用于日志模版
class Monitor: def __init__(self, plugin, watch_rule): self.plugin = plugin self.options = CommandLineOptions().get_options() # read configuration self._conf = Conf() if self.options.config_file: conffile = self.options.config_file else: conffile = self._conf.DEFAULT_CONFIG_FILE self._conf.read([conffile],'latin1') self.watch_rule = watch_rule groups = self.watch_rule.dict() for item,value in groups.iteritems(): if item in self.watch_rule.EVENT_BASE64: groups[item] = b64decode(value) self.queries = \ self.get_replaced_values('query',groups) self.regexps = \ self.get_replaced_values('regexp', groups) self.results = \ self.get_replaced_values('result', groups) self.initial_time = int(time.time()) # initial time at object call self.first_value = None if "tzone" in self.plugin.hitems("DEFAULT"): self.timezone = self.plugin.get("DEFAULT", "tzone") logger.debug("Plugin %s (%s) with specific tzone = %s" % \ (self.plugin.get("config", "name"), self.plugin.get("DEFAULT", "plugin_id"), self.timezone)) else: self.timezone = self._conf.get("plugin-defaults", "tzone") self.__agenttimezone = None self.__EventTimeZone = None self.__systemTimeZone = None self.__set_system_tzone() self.__setTZData() self.open() def get_replaced_values(self, key, groups): # replace plugin variables with watch_rule data # # for example, given the following watch_rule: # # watch-rule plugin_id="2006" plugin_sid="1" condition="eq" # value="1" from="192.168.6.64" to="192.168.6.63" # port_from="5643" port_to="22" # # and the following plugin query: # query = {$from}:{$port_from} {$to}:{$port_to} # # replace the variables with the watch-rule data: # query = 192.168.6.64:5643 192.168.6.63:22 values = {} for rule_name, rule in self.plugin.rules().iteritems(): if key !='result': values[rule_name] = self.plugin.get_replace_value(rule[key], groups) else: values[rule_name]=rule[key] return values def _plugin_defaults(self, event, log): # get default values from config # ipv4_reg = "^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$" if self._conf.has_section("plugin-defaults"): # 1) date default_date_format = self._conf.get("plugin-defaults", "date_format") if event["date"] is None and default_date_format: event["date"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) event["fdate"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) # 2) sensor default_sensor = self._conf.get("plugin-defaults", "sensor") if event["sensor"] is None and default_sensor: event["sensor"] = default_sensor # 3) interface default_iface = self._conf.get("plugin-defaults", "interface") if event["interface"] is None and default_iface: event["interface"] = default_iface # 4) source ip if event["src_ip"] is None: event["src_ip"] = event["from"] # 5) dest ip if event["dst_ip"] is None: event["dst_ip"] = event["to"] # 6) protocol if event["protocol"] is None: event["protocol"] = "TCP" # 7) ports if event["src_port"] is None: event["src_port"] = event["port_from"] if event["dst_port"] is None: event["dst_port"] = event["port_to"] if event["src_port"] is None: event["src_port"] = 0 if event["dst_port"] is None: event["dst_port"] = 0 if event["src_ip"] is None: event["src_ip"] = event["sensor"] if event["dst_ip"] is None: event["dst_ip"] = event["sensor"] # 8) Time zone if 'tzone' in event.EVENT_ATTRS: Utils.normalizeToUTCDate(event, self.__EventTimeZone) #Check if valid ip, if not we put 0.0.0.0 in sensor field if event['src_ip'] is not None: if not re.match(ipv4_reg, event['src_ip']): data = event['src_ip'] event['src_ip'] = '0.0.0.0' print ("Event's field src_ip (%s) is not a valid IP.v4/IP.v6 address, set it to default ip 0.0.0.0 and real data on userdata8" % (data)) event['userdata8'] = data elif 'src_ip' in event.EVENT_ATTRS: event['src_ip'] = '0.0.0.0' #Check if valid ip, if not we put 0.0.0.0 in sensor field if event['dst_ip'] is not None: if not re.match(ipv4_reg, event['dst_ip']): data = event['dst_ip'] print ("Event's field dst_ip (%s) is not a valid IP.v4 address, set it to default ip 0.0.0.0 and real data on userdata9" % (data)) event['dst_ip'] = '0.0.0.0' event['userdata9'] = data elif 'dst_ip' in event.EVENT_ATTRS: event['dst_ip'] = '0.0.0.0' event["log"] = log # the type of this event should always be 'monitor' if event["type"] is None: event["type"] = 'monitor' # Clean up mess event["port_from"] = "" event["port_to"] = "" event["to"] = "" event["from"] = "" event["absolute"] = "" event["interval"] = "" return event def __set_system_tzone(self): """Sets the system timezone by reading the timezone """ try: #read local timezone information. f = open('/etc/timezone', 'r') used_tzone = f.readline().rstrip() f.close() if used_tzone not in all_timezones: logger.info("Warning, we can't read valid timezone data.Using GMT") used_tzone = 'GMT' self.systemtzone = used_tzone except Exception, e: used_tzone = 'GMT' logger.info("Warning, we can't read valid timezone data.Using GMT")
# back to begining self.start_time = time.time() def __len__(self): return len(self.__event_list) if __name__ == "__main__": Logger.set_verbose("debug") from Config import Conf conf = Conf() #conf.read(['../etc/agent/config.cfg']) conf.read(['/etc/ossim/agent/config.cfg']) from Event import Event event1 = Event() event1["src_ip"] = "127.0.0.1" event1["dst_ip"] = "127.0.0.1" event1["sensor"] = "127.0.0.1" event1["plugin_id"] = "6001" event1["plugin_sid"] = "1" event1["src_port"] = "22" event1["dst_port"] = "80" event2 = Event()
class Agent(): def __init__(self): self.options = CommandLineOptions().get_options() self.server_connection = None self.send_packet_queue = Queue.Queue(maxsize=0) self.recv_packet_queue = Queue.Queue(maxsize=0) self.config = Conf() self.config.read([os.path.join(os.path.dirname(os.path.realpath(__file__)),"config.cfg")]) def working(self): print "am working " def get_bson_packet(self,buffer_data,len): if len <4 : return None,buffer_data message_length, = unpack('<L', buffer_data[0:4]) if len < message_length: return None,buffer_data return buffer_data[0:message_length],buffer_data[message_length:len] def __start_agent(self): server_ip = self.config.get("server","ip") server_port = int(self.config.get("server","port")) sensor_id= self.config.get("agent","id") priority = 1 system_id_file = "" Worker(self.recv_packet_queue,self.send_packet_queue).start() self.server_connection = ServerConn( server_ip, server_port, priority, sensor_id,system_id_file) server_socket = self.server_connection.connect() if server_socket == None : print "connect failed !" exit(0); Sender(self.send_packet_queue,self.server_connection).start() Heartbeat(self.send_packet_queue).start() server_socket.setblocking(False) read_buff = "" print "====" while True: readable, writable, exceptional = select.select([self.server_connection.get_connectsocket()], [], [self.server_connection.get_connectsocket()]) #handle read event try: if server_socket in readable : #construct packet chunk= server_socket.recv(1024*1024) read_buff += chunk read_buff_len = len(read_buff) print read_buff_len while( read_buff_len > 4) : packet, read_buff = self.get_bson_packet(read_buff,read_buff_len) if packet: print "push a packet" self.recv_packet_queue.put(packet) read_buff_len = len(read_buff) else : break except Exception as e: print e for s in exceptional: print "socket --has exceptional" def start(self): self.__start_agent()