def search_with_triple(self, triple_doc, **kwargs): """ 根据三元组查询节点 :param triple_doc: 三元组信息 :param kwargs: 扩展信息(关联节点属性描述) :return: """ self.debug('>>> start search_with_triple <<<') ret = {} if triple_doc: triple_subject = str2unicode(triple_doc.get('subject', "")) # 主语 triple_object = str2unicode(triple_doc.get('object', "")) # 宾语 triple_predicates = triple_doc.get('predicate', []) # 谓语(关系属性) query_property = kwargs.get("query_property", 'name') # 用于查询关联节点的属性值,默认为name self.debug( 'triple_subject=%s, triple_predicates=%s, triple_object=%s, query_property=%s', triple_subject, json.dumps(triple_predicates, ensure_ascii=False), triple_object, query_property) # 查询关系属性库确定关系属性的类型(数据关系还是对象关系) predicate_docs = self.property_collection.find( {'uri': { '$in': triple_predicates }}) for predicate_item in predicate_docs: # 遍历谓语关系并检索neo4j predicate_type = predicate_item.get('type', '') predicate_value = str(predicate_item.get('uri', '')) self.debug('predicate_type=%s, predicate_value=%s', predicate_type, predicate_value) tmp_ret = {} if predicate_type == 'data': # 谓语为数据关系 tmp_ret = self.query_node_property(triple_subject, predicate_value, triple_object) elif predicate_type == 'object': # 谓语为对象关系 tmp_ret = self.query_node_relation(triple_subject, predicate_value, triple_object, query_property) else: self.warn( '@@@@@@@@@@@@@@@@@@@@@@@@ unexpected value, predicate_type is None' ) ret = dict(ret, **tmp_ret) else: self.warn( '@@@@@@@@@@@@@@@@@@@@ unexpected value, triple_doc is None') self.debug('>>> end search_with_triple <<<') return ret
def nt_password_hash(passwd,pad_to_21_bytes=True): """ NtPasswordHash( IN 0-to-256-unicode-char Password, OUT 16-octet PasswordHash ) { /* * Use the MD4 algorithm [5] to irreversibly hash Password * into PasswordHash. Only the password is hashed without * including any terminating 0. */ """ # we have to have UNICODE password pw = utils.str2unicode(passwd) # do MD4 hash md4_context = md4.new() md4_context.update(pw) res = md4_context.digest() if pad_to_21_bytes: # addig zeros to get 21 bytes string res = res + '\000\000\000\000\000' return res
def translate_to_basic(self, environment, connection, error_code): "" connection.logger.log('*** Translating NTLM to Basic...\n') user, password = self.get_credentials_from_basic(connection, error_code) if user: connection.logger.log("*** Found Basic credentials in client's header.\n") if environment['UNICODE']: environment['USER'] = utils.str2unicode(string.upper(user)) else: environment['USER'] = string.upper(user) #environment['PASSWORD'] = password connection.logger.log("*** Basic User/Password: %s/%s.\n" % (user, password)) connection.logger.log("*** Calculating hashed passwords (LM and NT)...") environment['LM_HASHED_PW'] = ntlm_procs.create_LM_hashed_password(password) environment['NT_HASHED_PW'] = ntlm_procs.create_NT_hashed_password(password) connection.logger.log("Done.\n") return 1 else: connection.logger.log("*** There are no basic credentials in client's header.\n") connection.logger.log("*** Replacing NTLM value with Basic in rserver's header...") self.replace_ntlm_with_basic(connection, error_code) connection.logger.log("Done.\n") connection.logger.log("*** New server's header:\n=====\n" + connection.rserver_head_obj.__repr__()) return 0
def nt_password_hash(passwd, pad_to_21_bytes=True): """ NtPasswordHash( IN 0-to-256-unicode-char Password, OUT 16-octet PasswordHash ) { /* * Use the MD4 algorithm [5] to irreversibly hash Password * into PasswordHash. Only the password is hashed without * including any terminating 0. */ """ # we have to have UNICODE password pw = utils.str2unicode(passwd) # do MD4 hash md4_context = md4.new() md4_context.update(pw) res = md4_context.digest() if pad_to_21_bytes: # addig zeros to get 21 bytes string res = res + '\000\000\000\000\000' return res
def _match_predicate(self): self.debug('>>> start _match_predicate <<<') templates_docs = self.template_core.search_with_seg( self.query, query_fields=['key_index'], ) templates_list = list(templates_docs) predicate_ret = '' subject_ret = '' self.debug("got templates_docs=%s", json.dumps(templates_list, ensure_ascii=False)) if templates_list: for tmp_item in templates_list: pattern_str = tmp_item.get('pattern', '') predicate_value = tmp_item.get('predicate_value', '') if pattern_str and predicate_value: pattern = re.compile(ur'%s' % pattern_str) is_match = pattern.match(str2unicode(self.query)) if is_match: self.debug('got match pattern=%s, predicate_value=%s', pattern_str, predicate_value) subject_ret = is_match.group('title') predicate_ret = predicate_value return subject_ret, predicate_ret else: self.warn( '@@@@@@@@@@@@@@@@@@@@@@@ unexpected pattern_str=%s, predicate_value=%s', pattern_str, predicate_value) else: self.debug("retrieved None templates_docs") self.warn("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ don't match any templates ") self.debug(">>> end _match_predicate <<<") return subject_ret, predicate_ret
def search_neighbors_info(self, name, relationship, **kwargs): docs = {} node_property = kwargs.get('node_property', '') if node_property: self.debug('search node name=%s, relationship=%s, property=%s', name, relationship, node_property) else: self.debug('search node name=%s, relationship=%s', name, relationship) if name: if node_property: condition = BIO_CYPER_TEMPLATE['neighbors_property'] % \ (str2unicode(name), relationship, node_property) else: condition = BIO_CYPER_TEMPLATE['neighbors_data'] % \ (str2unicode(name), relationship) self.debug('condition=%s', condition) data = self.graph.run(condition).data() docs = self._extract_answer(data) self.debug('got name=%s, answer=%s', name, json.dumps(docs)) else: self.warn('@@@@@@@@@@@@@@ name is None') return docs
def search_node_info(self, name, **kwargs): docs = {} self.debug('>>> start search_node_info <<<') node_property = kwargs.get('node_property', '') self.debug('search node with name=%s, property=%s', name, node_property) if name and node_property: condition = BIO_CYPER_TEMPLATE['node_property'] % (str2unicode(name), node_property) self.debug('condition=%s', condition) data = self.graph.run(condition).data() docs = self._extract_answer(data) if docs: self.debug('got node=%s, answer=%s', name, json.dumps(docs)) else: self.debug('search equal_node name=%s, property=%s', name, node_property) condition = BIO_CYPER_TEMPLATE['equal_node_property'] % (str2unicode(name), node_property) self.debug('condition=%s', condition) data = self.graph.run(condition).data() docs = self._extract_answer(data) self.debug('got equal_node=%s, answer=%s', name, json.dumps(docs)) else: self.warn('@@@@@@@@@@@@@@ unexpected name=%s, property=%s', name, node_property) self.debug('>>> end search_node_info <<<') return docs
def create_NT_hashed_password(passwd): "create NT hashed password" # we have to have UNICODE password pw = utils.str2unicode(passwd) # do MD4 hash md4_context = md4.new() md4_context.update(pw) res = md4_context.digest() # addig zeros to get 21 bytes string res = res + '\000\000\000\000\000' return res
def _match_predicate(self): """ 基于模板匹配谓语 :return:返回匹配到的谓语 """ self.debug('>>> start _match_predicate <<<') template_docs = self.template_core.search_with_seg( self.query, query_fields=['key_index']) match_template_docs = [] if template_docs: for tmp_item in template_docs: pattern_str = tmp_item.get('pattern', '') # 模板的正则表达式 predicates = tmp_item.get('predicates', []) # 模板对应的谓语 priority = tmp_item.get( 'priority', DEFAULT_TEMPLATE_PRIORITY) # 模板优先级,(1为精确匹配) missing_tuple = tmp_item.get('missing_tuple', '') # 缺失的三元祖 pattern = re.compile(ur'%s' % pattern_str) is_match = pattern.match(str2unicode(self.query)) if is_match: # 模板匹配,将其添加到match_template_docs中 doc = { 'pattern': pattern_str, 'predicates': predicates, 'priority': priority, 'missing_tuple': missing_tuple } self.debug("got match pattern=%s", pattern_str) doc['title'] = is_match.group('title') if priority != 1: # 匹配到的模板不是精确模板,加入到match_template_docs match_template_docs.append(doc) else: # 匹配到的模板是精确模板,仅返回精确匹配模板 self.debug('got precise pattern=%s') match_template_docs = [ doc, ] break else: # 模板未匹配 self.debug("don't match pattern=%s", pattern_str) else: self.warn( '@@@@@@@@@@@@@@@@@@@@@@ unexpected value, templates_docs=None') self.debug(">>> end _match_predicate <<<") return match_template_docs
def translate_to_basic(self, environment, connection, error_code): "" connection.logger.log('*** Translating NTLM to Basic...\n') user, password = self.get_credentials_from_basic( connection, error_code) if user: connection.logger.log( "*** Found Basic credentials in client's header.\n") if environment['UNICODE']: environment['USER'] = utils.str2unicode(string.upper(user)) else: environment['USER'] = string.upper(user) #environment['PASSWORD'] = password connection.logger.log("*** Basic User/Password: %s/%s.\n" % (user, password)) connection.logger.log( "*** Calculating hashed passwords (LM and NT)...") environment['LM_HASHED_PW'] = ntlm_procs.create_LM_hashed_password( password) environment['NT_HASHED_PW'] = ntlm_procs.create_NT_hashed_password( password) connection.logger.log("Done.\n") return 1 else: connection.logger.log( "*** There are no basic credentials in client's header.\n") connection.logger.log( "*** Replacing NTLM value with Basic in rserver's header...") self.replace_ntlm_with_basic(connection, error_code) connection.logger.log("Done.\n") connection.logger.log("*** New server's header:\n=====\n" + connection.rserver_head_obj.__repr__()) return 0
def collect_features(browser): ''' Read DOM attributes from the current page loaded by the browser, derive page features Args: browser (cef): Browser object that with the page already loaded and ready Returns: tuple (header, attributes, dom, body html): the header and attributes are list of attributes which together forms a feature table; dom is the raw attributes extracted by JS code, in form of list of lists; the body html is the rendered html code of the <body> part of the page Data dictionary of collect_features() output: id [int] seq num of node in JS dom tree parent [int] id of parent node tagname [str] HTML tag name depth [int] node count to its deepest descendent in dom tree (etree-based) childcount [int] num of children sourceline [int] line num of source code (etree-based, i.e. start from <body> tag) sourcepct [float] percentage position of source line in HTML (etree-based, within <body>) pospct [float] percentage postiion of node in the DOM (depth-first search of JS DOM) xpct [float] percentage position of element's left edge to window width x [int] pixel coordinate of left edge of element's bounding box to the page y [int] pixel coordinate of top edge of element's bounding box to the page width [int] pixel width of element's bounding box height [int] pixel height of element's bounding box fgcolor [str] foreground color, in form of rgb(255,255,255) or rgba(255,255,255,1.0) bgcolor [str] background color, in form of rgb(255,255,255) or rgba(255,255,255,1.0) textxws [int] character length of text excluding whitespaces textlen [int] character length of text htmllen [int] character length of HTML code visible [bool] visibility of this element fontsize [float] font size xpath [str] xpath of element textclip [str] starting and ending snippet of text ''' dom = [[str2unicode(x) for x in row] for row in browser.getDOMdata(True)] # synchronous get, and make all string into unicode winparam = browser.windowParams winwidth = winparam['innerWidth'] logger.debug("%d web elements found" % len(dom)) bodyhtml = next((x[-1] for x in dom if x[0]=='/html/body'),'') assert(bodyhtml) # we assumed there must be a body domtree = html2dom(bodyhtml) # need to pretty format source before use objectify.deannotate(domtree, cleanup_namespaces=True) linecount = len(bodyhtml.split("\n")) # populate DOM tree geometry data xpathHash = {attrs[0]:i for i,attrs in enumerate(dom)} depthHash = {} # actually "height", distance from node to deepest leaf, based on lxml etree def findElementDepth(e): "e: lxml etree element node, find its depth in dom tree" if e not in depthHash: if len(e): # e has children depthHash[e] = 1 + max(findElementDepth(x) for x in e.iterchildren()) else: # e has no children, by definition depth=0 depthHash[e] = 0 return depthHash[e] # collect element attributes: attributes = [] for i,attrs in enumerate(dom): if i and (i % 1000 == 0): logger.debug('...on element #%d' % i) xpath, display, visible, x, y, width, height, fgcolor, bgcolor, fontsize, textonly, htmlcode = attrs if not xpath or re.search(r'[^a-z0-9\[\]\/]',xpath) or re.search(r'(?<!\w)(script|head)(?!\w)',xpath): continue # skip these to avoid pollution by JS or HTML header etreenode = domtree.xpath(xpath) if len(etreenode) != 1: if not etreenode: logger.error('JS reported XPath cannot be found in lxml: %s' % xpath) continue else: logger.error('XPath not unique for %s. %d elements found.' % (xpath, len(etreenode))) parent = xpathHash.get(xpath.rsplit('/',1)[0]) tagname = xpath.rsplit('/',1)[-1].split('[',1)[0] depth = findElementDepth(etreenode[0]) if etreenode: childcount = len(etreenode) else: childcount = len(n for n in xpathHash if n.startwith(xpath) and '/' not in n[len(xpath):]) sourceline = etreenode[0].sourceline fgcolor = fgcolor.replace(' ','') bgcolor = bgcolor.replace(' ','') textonly = condense_space(textonly) # text from JS retains word boundary by replacing tag with space while etree.tostring() just remove tags htmlcode = condense_space(htmlcode) if not htmlcode: # JS cannot give out the HTML, use etree version instead htmlcode = condense_space(etree.tostring(etreenode[0], encoding='utf8', method='html').decode('utf8')) # derived data textlen, htmllen = len(textonly), len(htmlcode) textxws = sum(1 for c in textonly if c and not c.isspace()) # text length excluding whitespaces if not htmllen: logger.error('empty HTML for tag %s on line %s at (%s,%s)+(%s,%s)' % (tagname, sourceline, x,y,width,height)) textclip = abbreviate(textonly) sourcepct = sourceline/linecount xpct = x/winwidth pospct = (i+1)/len(dom) # remember this attributes.append([i, parent, tagname, depth, childcount, sourceline, sourcepct, pospct, xpct, x, y, width, height, fgcolor, bgcolor, textxws, textlen, htmllen, min(visible,display), fontsize, xpath, textclip]) header = ("id parent tagname depth childcount sourceline sourcepct pospct xpct x y width height " "fgcolor bgcolor textxws textlen htmllen visible fontsize xpath textclip").split() return header, attributes, dom, bodyhtml
def build_env_dict(self, connection): "" connection.logger.log('*** Building environment for NTLM.\n') env = {} if connection.config['NTLM_AUTH']['NTLM_FLAGS']: env['FLAGS'] = connection.config['NTLM_AUTH']['NTLM_FLAGS'] connection.logger.log('*** Using custom NTLM flags: %s\n' % env['FLAGS']) else: # I have seen flag field '\005\202' as well (with NT response). #0x8206 or 0x8207 or 0x8205 env['FLAGS'] = "06820000" #flags = utils.hex2str(ed['NTLM_FLAGS']) connection.logger.log('*** Using default NTLM flags: %s\n' % env['FLAGS']) env['LM'] = connection.config['NTLM_AUTH']['LM_PART'] env['NT'] = connection.config['NTLM_AUTH']['NT_PART'] # we must have at least LM part if not (env['LM'] or env['NT']): env['LM'] = 1 if env['LM'] == 1 and env['NT'] == 0: connection.logger.log('*** NTLM version with LM response only.\n') elif env['LM'] == 1 and env['NT'] == 1: connection.logger.log('*** NTLM version with LM and NT responses.\n') elif env['LM'] == 0 and env['NT'] == 1: connection.logger.log('*** NTLM version with NT response only.\n') #env['UNICODE'] = connection.config['NTLM_AUTH']['UNICODE'] if env['NT']: env['UNICODE'] = 1 else: env['UNICODE'] = 0 # have to put these ones into [NTLM] section env['DOMAIN'] = string.upper(connection.config['NTLM_AUTH']['NT_DOMAIN']) # Check if there is explicit NT_Hostname in config, if there is one then take it, # if there is no one then take gethostname() result. if connection.config['NTLM_AUTH']['NT_HOSTNAME']: env['HOST'] = string.upper(connection.config['NTLM_AUTH']['NT_HOSTNAME']) else: env['HOST'] = string.upper(connection.config['GENERAL']['HOST']) env['USER'] = string.upper(connection.config['NTLM_AUTH']['USER']) connection.logger.log('*** NTLM Domain/Host/User: %s/%s/%s\n' % (env['DOMAIN'], env['HOST'], env['USER'])) # have to use UNICODE stings if env['UNICODE']: env['DOMAIN'] = utils.str2unicode(env['DOMAIN']) env['HOST'] = utils.str2unicode(env['HOST']) env['USER'] = utils.str2unicode(env['USER']) connection.logger.log('*** Using UNICODE stings.\n') if connection.config['NTLM_AUTH']['LM_HASHED_PW'] and connection.config['NTLM_AUTH']['NT_HASHED_PW']: env['LM_HASHED_PW'] = connection.config['NTLM_AUTH']['LM_HASHED_PW'] env['NT_HASHED_PW'] = connection.config['NTLM_AUTH']['NT_HASHED_PW'] connection.logger.log('*** NTLM hashed passwords found.\n') # Test params if connection.config['NTLM_AUTH'].has_key('NTLM_MODE'): env['NTLM_MODE'] = int(connection.config['NTLM_AUTH']['NTLM_MODE']) else: env['NTLM_MODE'] = 0 # End of test params env['NTLM_TO_BASIC'] = connection.config['NTLM_AUTH']['NTLM_TO_BASIC'] connection.logger.log('*** Environment has been built successfully.\n') return env
def build_env_dict(self, connection): "" connection.logger.log('*** Building environment for NTLM.\n') env = {} if connection.config['NTLM_AUTH']['NTLM_FLAGS']: env['FLAGS'] = connection.config['NTLM_AUTH']['NTLM_FLAGS'] connection.logger.log('*** Using custom NTLM flags: %s\n' % env['FLAGS']) else: # I have seen flag field '\005\202' as well (with NT response). #0x8206 or 0x8207 or 0x8205 env['FLAGS'] = "06820000" #flags = utils.hex2str(ed['NTLM_FLAGS']) connection.logger.log('*** Using default NTLM flags: %s\n' % env['FLAGS']) env['LM'] = connection.config['NTLM_AUTH']['LM_PART'] env['NT'] = connection.config['NTLM_AUTH']['NT_PART'] # we must have at least LM part if not (env['LM'] or env['NT']): env['LM'] = 1 if env['LM'] == 1 and env['NT'] == 0: connection.logger.log('*** NTLM version with LM response only.\n') elif env['LM'] == 1 and env['NT'] == 1: connection.logger.log('*** NTLM version with LM and NT responses.\n') elif env['LM'] == 0 and env['NT'] == 1: connection.logger.log('*** NTLM version with NT response only.\n') #env['UNICODE'] = connection.config['NTLM_AUTH']['UNICODE'] if env['NT']: env['UNICODE'] = 1 else: env['UNICODE'] = 0 # have to put these ones into [NTLM] section env['DOMAIN'] = string.upper(connection.config['NTLM_AUTH']['NT_DOMAIN']) # Check if there is explicit NT_Hostname in config, if there is one then take it, # if there is no one then take gethostname() result. if connection.config['NTLM_AUTH']['NT_HOSTNAME']: env['HOST'] = string.upper(connection.config['NTLM_AUTH']['NT_HOSTNAME']) else: env['HOST'] = string.upper(connection.config['GENERAL']['HOST']) env['USER'] = string.upper(connection.config['NTLM_AUTH']['USER']) connection.logger.log('*** NTLM Domain/Host/User: %s/%s/%s\n' % (env['DOMAIN'], env['HOST'], env['USER'])) # have to use UNICODE stings if env['UNICODE']: env['DOMAIN'] = utils.str2unicode(env['DOMAIN']) env['HOST'] = utils.str2unicode(env['HOST']) env['USER'] = utils.str2unicode(env['USER']) connection.logger.log('*** Using UNICODE stings.\n') if connection.config['NTLM_AUTH']['LM_HASHED_PW'] and connection.config['NTLM_AUTH']['NT_HASHED_PW']: env['LM_HASHED_PW'] = connection.config['NTLM_AUTH']['LM_HASHED_PW'] env['NT_HASHED_PW'] = connection.config['NTLM_AUTH']['NT_HASHED_PW'] connection.logger.log('*** NTLM hashed passwords found.\n') # Test params if connection.config['NTLM_AUTH'].has_key('NTLM_MODE'): env['NTLM_MODE'] = int(connection.config['NTLM_AUTH']['NTLM_MODE']) else: env['NTLM_MODE'] = 0 # End of test params env['NTLM_TO_BASIC'] = int(connection.config['NTLM_AUTH']['NTLM_TO_BASIC']) connection.logger.log('*** Environment has been built successfully.\n') return env
def nt_password_hash(passwd): """NtPasswordHash""" pw = utils.str2unicode(passwd) md4_context = md4.new() md4_context.update(pw) return md4_context.digest()