def extractActive(self, state, d): #print "Active FTP" username, password = "******", "Unknown" # look for port lines m = self.activeRE.search(d) if m is None: return # split data into a list of lines lines = d.splitlines() iterlines = iter(lines) for l in iterlines: m = self.userRE.search(l) if m: username = m.group(1) continue m = self.passRE.search(l) if m: password = m.group(1) continue if l.find("PORT")>=0: try: nextl = iterlines.next() except StopIteration: return if nextl.find("RETR")>=0: # this means the current PORT will be # a data channel for a downaload filename = nextl.split(" ")[1] ip_port = l.split(" ")[1].split(",") #ip = ".".join(ip_port[0:4]) port = int(ip_port[4])*256 + int(ip_port[5]) # now we know the ip and port of the client # data channel. # find the correct state # it will look like the reverse flow, with a different dport rflow = freverse(state.flow) rflow.dport = port rflow.sport = 20 # find the state that carries the data rstate = self.statemgr.find_flow_state(rflow) # rename the data file if rstate is not None: fn = renameFile(rstate, filename) id, m5 = self.id.identify(rstate) output = "%s requested %s from %s (%s, %s) at %s\n\tfile: %s, filetype: %s, md5 sum: %s\n" % (rstate.flow.dst, filename, rstate.flow.src, username, password, self.tf(rstate.ts), fn, id, m5) self.add_flow(rstate.ts, rstate.flow.src, rstate.flow.dst, output)
def _renameFlow(self, state, t): """state is a honeysnap.flow.flow_state object, t = response or request""" #print "_renameFlow:state", state.fname rflow = freverse(state.flow) #print '_renameFlow:rflow ', rflow rs = self.statemgr.find_flow_state(rflow) if rs is not None: if rs.decoded is not None and state.decoded is not None: #print "Both halves decoded" user_agent = "UNKNOWN" url = 'UNKNOWN' r1 = rs.decoded if t == 'request': try: url = urllib.splitquery(state.decoded.uri)[0] realname = url.rsplit("/", 1)[-1] except AttributeError: realname = 'index.html' try: url = state.decoded.headers['host'] + url user_agent = state.decoded.headers['user-agent'] except KeyError: pass # reverse flows to get right sense for file renaming temp = rs rs = state state = temp if t == 'response': url = urllib.splitquery(r1.uri)[0] realname = url.rsplit("/", 1)[-1] try: user_agent = r1.headers['user-agent'] url = r1.headers['host'] + url except KeyError: # probably something like a CONNECT pass if realname == '' or realname == '/' or not realname: realname = 'index.html' fn = renameFile(state, realname) id, m5 = self.id.identify(state) outstring = "%s -> %s, %s (%s) at %s\n" % ( state.flow.src, state.flow.dst, url, user_agent, self.tf(state.ts)) outstring = outstring + "\tfile: %s, filetype: %s, md5 sum: %s\n" % ( fn, id, m5) self.add_flow(state.ts, state.flow.src, state.flow.dst, outstring)
def _renameFlow(self, state, t): """state is a honeysnap.flow.flow_state object, t = response or request""" #print "_renameFlow:state", state.fname rflow = freverse(state.flow) #print '_renameFlow:rflow ', rflow rs = self.statemgr.find_flow_state(rflow) if rs is not None: if rs.decoded is not None and state.decoded is not None: #print "Both halves decoded" user_agent = "UNKNOWN" url = 'UNKNOWN' r1 = rs.decoded if t == 'request': try: url = urllib.splitquery(state.decoded.uri)[0] realname = url.rsplit("/", 1)[-1] except AttributeError: realname = 'index.html' try: url = state.decoded.headers['host'] + url user_agent = state.decoded.headers['user-agent'] except KeyError: pass # reverse flows to get right sense for file renaming temp = rs rs = state state = temp if t == 'response': url = urllib.splitquery(r1.uri)[0] realname = url.rsplit("/", 1)[-1] try: user_agent = r1.headers['user-agent'] url = r1.headers['host'] + url except KeyError: # probably something like a CONNECT pass if realname == '' or realname == '/' or not realname: realname = 'index.html' fn = renameFile(state, realname) id, m5 = self.id.identify(state) outstring = "%s -> %s, %s (%s) at %s\n" % (state.flow.src, state.flow.dst, url, user_agent, self.tf(state.ts)) outstring = outstring + "\tfile: %s, filetype: %s, md5 sum: %s\n" %(fn,id,m5) self.add_flow(state.ts, state.flow.src, state.flow.dst, outstring)
def extractHeaders(self, state, d): """ Pull the headers and body off the data, drop them into the filename.hdr, filename.body files Write remaining data back to original file Header parsing stolen from dpkt.http """ headers = None data = None body = None request = "" f = cStringIO.StringIO(d) if state.decoded is not None: # this request was successfully decoded # so the decoded object will contain all the headers # and the detached data headers = {} headers = state.decoded.headers body = state.decoded.body try: request = state.decoded.request except dpkt.Error: request = "" try: data = state.decoded.data except dpkt.Error: data = None else: # dpkt.http failed to decode f = cStringIO.StringIO(d) headers = {} # grab whatever headers we can while 1: line = f.readline() if not line: return request = line line = line.strip() if not line: break l = line.split(None, 1) if not l[0].endswith(':'): break k = l[0][:-1].lower() headers[k] = len(l) != 1 and l[1] or '' # this state is somehow broken, or dpkt would have decoded it # we'll just put the rest of the data into a file data = f.readlines() data = "".join(data) body = None # write headers, body, data to files if headers is not None and len(headers) > 0: base = state.fname base += ".hdr" fp = open(base, "wb") rf = freverse(state.flow) s = "reverse flow: %s\n" % rf.__repr__() fp.write(s) fp.write(request) for k, v in headers.items(): line = k + " : " + v + "\n" fp.write(line) fp.close() if body is not None and len(body) > 0: base = state.fname fp = open(base, "wb") if isinstance(body, type([])): body = "".join(body) fp.write(body) fp.close() if data is not None and len(data) > 0: base = state.fname base += ".data" fp = open(base, "wb") if isinstance(data, type([])): data = "".join(data) fp.write(data) fp.close()
def decode(self, state, statemgr): """ Takes an instance of flow.flow_state, and an instance of flow.flow_state_manager """ self.statemgr = statemgr state.open(flags="rb", statemgr=self.statemgr) d = state.fp.readlines() state.close() #print "decode:state ", state.fname if len(d) == 0: return d = self.check_data(d) t, req = self.determineType(d) if (t, req) == (None, None): # binary data return d = "".join(d) r = None f = state.flow if t == 'response': try: r = http.Response(d) r.request = req if not hasattr(r, "data"): setattr(r, "data", None) state.decoded = r except (dpkt.Error, ValueError): try: # bad data, try lax parsing state.open(flags="rb", statemgr=self.statemgr) l = state.fp.readline() headers = http.parse_headers(state.fp) r = http.Message() r.headers = headers r.body = state.fp.readlines() r.data = None r.status = "-" r.request = req state.decoded = r state.close() except dpkt.Error: print "response failed to decode: %s " % state.fname pass if t == 'request': try: r = http.Request(d) state.decoded = r r.request = req if not getattr(r, "data"): r.data = None except dpkt.Error: try: # bad data, so let's try some laxer parsing state.open(flags="rb", statemgr=self.statemgr) l = state.fp.readline() headers = http.parse_headers(state.fp) r = http.Message() r.headers = headers r.body = state.fp.readlines() r.request = req r.data = None state.decoded = r state.close() # frig up some stuff for the logging h = req.split() r.method = h[0].strip() r.uri = h[1].strip() except dpkt.Error: print "request failed to decode: %s " % state.fname pass if r: state.decoded = r else: return if t is not None: self.extractHeaders(state, d) rs = self.statemgr.find_flow_state(freverse(state.flow)) if not rs: # haven't seen other half - just fake something so that at least the request gets logged. if t == 'request': dummy_response = http.Response() dummy_response.__dict__['status'] = '-' self._add_log_entry(r, dummy_response, f.src, f.dst, state.ts) return if rs.decoded: self._renameFlow(state, t) else: self.decode(rs, self.statemgr) if rs.decoded: if t == 'request': self._add_log_entry(r, rs.decoded, f.src, f.dst, state.ts) elif t == 'response': self._add_log_entry(rs.decoded, r, f.dst, f.src, rs.ts)
def extractHeaders(self, state, d): """ Pull the headers and body off the data, drop them into the filename.hdr, filename.body files Write remaining data back to original file Header parsing stolen from dpkt.http """ headers = None data = None body = None request = "" f = cStringIO.StringIO(d) if state.decoded is not None: # this request was successfully decoded # so the decoded object will contain all the headers # and the detached data headers = {} headers = state.decoded.headers body = state.decoded.body try: request = state.decoded.request except dpkt.Error: request = "" try: data = state.decoded.data except dpkt.Error: data = None else: # dpkt.http failed to decode f = cStringIO.StringIO(d) headers = {} # grab whatever headers we can while 1: line = f.readline() if not line: return request = line line = line.strip() if not line: break l = line.split(None, 1) if not l[0].endswith(':'): break k = l[0][:-1].lower() headers[k] = len(l) != 1 and l[1] or '' # this state is somehow broken, or dpkt would have decoded it # we'll just put the rest of the data into a file data = f.readlines() data = "".join(data) body = None # write headers, body, data to files if headers is not None and len(headers) > 0: base = state.fname base += ".hdr" fp = open(base, "wb") rf = freverse(state.flow) s = "reverse flow: %s\n" % rf.__repr__() fp.write(s) fp.write(request) for k,v in headers.items(): line = k + " : " + v + "\n" fp.write(line) fp.close() if body is not None and len(body) > 0: base = state.fname fp = open(base, "wb") if isinstance(body, type([])): body = "".join(body) fp.write(body) fp.close() if data is not None and len(data) > 0: base = state.fname base += ".data" fp = open(base, "wb") if isinstance(data, type([])): data = "".join(data) fp.write(data) fp.close()
def decode(self, state, statemgr): """ Takes an instance of flow.flow_state, and an instance of flow.flow_state_manager """ self.statemgr = statemgr state.open(flags="rb", statemgr=self.statemgr) d = state.fp.readlines() state.close() #print "decode:state ", state.fname if len(d) == 0: return d = self.check_data(d) t, req = self.determineType(d) if (t, req) == (None, None): # binary data return d = "".join(d) r = None f = state.flow if t =='response': try: r = http.Response(d) r.request = req if not hasattr(r, "data"): setattr(r,"data", None) state.decoded = r except (dpkt.Error, ValueError): try: # bad data, try lax parsing state.open(flags="rb", statemgr=self.statemgr) l = state.fp.readline() headers = http.parse_headers(state.fp) r = http.Message() r.headers = headers r.body = state.fp.readlines() r.data = None r.status = "-" r.request = req state.decoded = r state.close() except dpkt.Error: print "response failed to decode: %s " % state.fname pass if t == 'request': try: r = http.Request(d) state.decoded = r r.request = req if not getattr(r, "data"): r.data = None except dpkt.Error: try: # bad data, so let's try some laxer parsing state.open(flags="rb", statemgr=self.statemgr) l = state.fp.readline() headers = http.parse_headers(state.fp) r = http.Message() r.headers = headers r.body = state.fp.readlines() r.request = req r.data = None state.decoded = r state.close() # frig up some stuff for the logging h = req.split() r.method = h[0].strip() r.uri = h[1].strip() except dpkt.Error: print "request failed to decode: %s " % state.fname pass if r: state.decoded = r else: return if t is not None: self.extractHeaders(state, d) rs = self.statemgr.find_flow_state(freverse(state.flow)) if not rs: # haven't seen other half - just fake something so that at least the request gets logged. if t == 'request': dummy_response = http.Response() dummy_response.__dict__['status'] = '-' self._add_log_entry(r, dummy_response, f.src, f.dst, state.ts) return if rs.decoded: self._renameFlow(state, t) else: self.decode(rs, self.statemgr) if rs.decoded: if t == 'request': self._add_log_entry(r, rs.decoded, f.src, f.dst, state.ts) elif t == 'response': self._add_log_entry(rs.decoded, r, f.dst, f.src, rs.ts)
def extractPassive(self, state, d): #print "Passive FTP" username, password = "******", "Unknown" # repr(port/256), repr(port%256) # first we have to find the reverse flow/state # from it we will extract the ip and port info rflow = freverse(state.flow) rstate = self.statemgr.find_flow_state(rflow) if rstate is None: # no reverse state, bail return rstate.open(flags="rb", statemgr=self.statemgr) dchannel = rstate.fp.readlines() rstate.close() lines = d.splitlines() iterlines = iter(lines) portlines = [] cmdlines = [] # find all the lines from the server # that open a data port # find all the 227 lines in the data channel for l in dchannel: m = self._227re.search(l) if m is not None: portlines.append(l) # find all the client lines that use # a data port for l in lines: m = self.userRE.search(l) if m: username = m.group(1) continue else: username = "******" m = self.passRE.search(l) if m: password = m.group(1) continue else: password = "******" w = [i for i in cmds if i in l.split()[0]] if len(w) == 0: # this line doesn't contain a data command continue cmdlines.append(l) # zip the 2 lists together # should give [(227 response, Client CMD),...] pairs = zip(portlines, cmdlines) for p in pairs: if p[1].find("RETR") < 0: # not a RETR command continue m = self.portIPRE.search(p[0]) if m is not None: # the last 2 items in the RE result are the port info info = m.group().split(",") p256 = int(info[-2]) p1 = int(info[-1]) ip = ".".join(info[0:4]) port = 256*p256 + p1 else: continue filename = p[1].split(" ")[1] rflow.sport = port # passive ftp transactions happen on high ports # so the stream extractor has not extracted the data # create a new stream extractor to pull the data p = pcap.pcap(self.options["tmpf"]) de = tcpflow.tcpFlow(p) filter = "src host %s and src port %d" % (rflow.src, rflow.sport) de.setFilter(filter) de.setOutdir(self.options["output_data_directory"]+ "/%s/ftp") # run the flow extractor de.start() # now find the correct state flows = [f for f in de.states.getFlows() if f.isSrcSport(ip, port)] if len(flows) > 0: if len(flows) > 1: print "hmmm, got more than 1 flow" rflow = flows[0] rstate = de.states.find_flow_state(rflow) # rename the data file if rstate is not None: fn = renameFile(rstate, filename) id, m5 = self.id.identify(rstate) output = "%s requested %s from %s (%s, %s) at %s\n\tfile: %s, filetype: %s, md5 sum: %s\n" % (rstate.flow.dst, filename, rstate.flow.src, username, password, self.tf(rstate.ts), fn, id, m5) self.add_flow(rstate.ts, rstate.flow.src, rstate.flow.dst, output)