-
Notifications
You must be signed in to change notification settings - Fork 1
/
protoanal.py
424 lines (380 loc) · 15.7 KB
/
protoanal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
# protoanal.py - unknown network protocol analysis toolkit
#
# Copyright:
# Tim Varkalis (tim.analyst@gmail.com) - monkeynut.eu
# Security Consultant - Portcullis Computer Security Limited.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#from scapy.all import *
from scapy.utils import rdpcap
from scapy.plist import PacketList
from scapy.layers.inet import UDP, Raw
from subprocess import Popen
import numpy as np
import scipy.stats as ss
import pylab as pl
import networkx as nx
import itertools as it
import pygraphviz as gv
class Conversation(object):
def __init__( self, packets , localport = 50002, remoteport = 4172, isUDP = True ):
'''
Initialise a conversation from a pcap file or a list otherwise obtained from scapy
using scapy.rdpcap (also available as udpanal.rdpcap).
Args:
* packets - either string representing relative or absolute filename for pcap file
or PacketList object returned by rdpcap
* localport - the UDP port you communicated from
* remoteport - the UDP port you are communicating with
* isUDP - for future use when TCP & SCTP are also implemented
'''
if type(packets) == type(''):
pktlist = rdpcap( packets )
elif type(PacketList()) == type( packets ):
pktlist = packets # re-initialise, take the penalty to kill that annoying 'filter multiplicity)
else:
self = packets
self.lport = localport
self.rport = remoteport
if isUDP:
pktlist = pktlist.filter( lambda x: x.haslayer(UDP) )
pktlist = pktlist.filter( lambda x:
x.getlayer(UDP).dport == self.lport or
x.getlayer(UDP).sport == self.lport or
x.getlayer(UDP).dport == self.rport or
x.getlayer(UDP).sport == self.rport )
self.pktlist = PacketList(pktlist)
self.count = len(self.pktlist)
return
def __getitem__(self, y):
re
def fromLocal( self , asConvo = True ): # asConvo = False just returns packetlist which is more efficient.
'''
Returns a conversation object or PacketList containing only packets sent from the local port.
It optionally takes an argument which if true returns the result as a conversation object or a
filtered PacketList object (for speed or list customisation before creating conversation).
'''
m = self.pktlist.filter( lambda x: x[UDP].sport == self.lport )
if asConvo: return Conversation(m)
else: return m
def fromRemote( self, asConvo = True ):
'''
Works as fromLocal, but packets coming from the remote side.
'''
m = self.pktlist.filter( lambda x: x[UDP].sport == self.rport )
if asConvo: return Conversation(m)
else: return m
def fromEither( self, asConvo = True ):
'''
Works as fromLocal, but includes packets originating either local or remotely.
'''
m = self.pktlist.filter( lambda x: x[UDP].sport == self.lport or x[UDP].sport == self.rport )
if asConvo: return Conversation(m)
else: return m
def subrange( self, end, start = 0, asConvo = True ):
'''
Select a range of packets by position index.
'''
m = self.pktlist[start:end]
if asConvo: return Conversation(m)
else: return m
def size( self ):
'''
Returns the size (in bytes) of each UDP payload in the Conversation as a numpy array.
'''
plens = np.array( [ len(c[Raw].load) for c in self.pktlist ] )
return plens
def sizes( self ):
'''
Returns the set of sizes (in bytes) of packet payloads.
'''
return set(self.size())
def sizeIs( self, size , asConvo = True):
'''
Returns a filtered PacketList object containing only those packets with payloads of the specified size.
'''
m = self.pktlist.filter( lambda x: len( x[Raw].load ) == size )
if asConvo: return Conversation(m)
else: return m
def sizeBelow( self, size , asConvo = True):
'''
Returns a filtered PacketList object containing only those packets with payloads smaller than or equal to size
'''
m = self.pktlist.filter( lambda x: len( x[Raw].load ) <= size )
if asConvo: return Conversation(m)
else: return m
def sizeAbove( self, size , asConvo = True):
'''
Returns a filtered PacketList object containing only those packets with payloads larger than or equal to the specified number of bytes.
'''
m = self.pktlist.filter( lambda x: len( x[Raw].load ) >= size )
if asConvo: return Conversation(m)
else: return m
def sizeHistogram(self, plot=True):
'''
Simplifies the procss of displaying histograms of payload sizes. This is a useful way to classify packets.
If plot = True, it will display the histogram, else it will return the tuple ( counts, bins ).
If you want finer grained control just import pylab as pl and get cracking!
'''
if plot:
pl.hist( self.size() )
pl.show()
else:
return pl.histogram( self.size() )
def statePlot( self , bucketwidth, offsets, filename = 'state.dot', display=True ):
'''
bucketwidth is the number of bytes in each section of the packet. Offset selects which section is of interest.
The result of this function is a graph where the nodes are packet states and the directed arcs show numbered
transitions between states.
Beware, if there are a large number of states and indices the graph will likely be indecipherable if it
even generates. Choose the values wisely.
In future this will be more useful, based on multiple lengths using a scapy dissection definition.
For now a spec for byte samples and offsets is all you get. Tools will be included to look at bit patterns
in order to determine the existence and properties of individual flags. However, at this stage, that should
be both trivial and irrelevant.'''
statesamples = Samples( self, bucketwidth * ( np.array(offsets).max() +1 ) , bucketwidth )
valuesetlist = []
valueslist = []
for o in offsets:
valueset = statesamples.asHexSet(o)
print valueset
valuesetlist.append( valueset )
valueslist.append( statesamples.asHex(o) )
nodelist = [ n for n in it.product(*valuesetlist) ]
G = nx.MultiDiGraph()
G.add_nodes_from( nodelist )
for i in range( len( valueslist[0] ) - 1):
nodetuple = []
for j in range( len( offsets ) ):
nodetuple.append( valueslist[j][i] )
thisnodetuple = tuple( nodetuple )
nodetuple = []
for j in range( len( offsets ) ):
nodetuple.append( valueslist[j][i+1] )
nextnodetuple = tuple( nodetuple )
# before adding edge, color according to local or remote origin
if self.pktlist[i][UDP].sport == self.lport: edgecolour = 'green'
if self.pktlist[i][UDP].sport == self.rport: edgecolour = 'blue'
G.add_edge( thisnodetuple, nextnodetuple, label=i , color = edgecolour )
# clean out unused states from graph
for n in G.nodes():
if G.neighbors(n) == []: G.remove_node(n)
GA = nx.to_agraph( G )
GA.edge_attr['penwidth'] = 3.0
GA.layout()
GA.write( filename )
if display:
Popen( ['xdot', filename ] )
return
class Samples(object):
def __init__( self, convo , maxlen = 8 , bucketsize = 1):
'''
Samples object represents the set of payloads extracted from the conversation.
The bulk of the analysis will be performed here using the methods exposed.
Args:
* convo is a conversation object
* maxlen is the maximum number of bytes to include.
NOTE: all samples in the conversation should have at least this length.
* bucketsize is the number of bytes to consider each bucket as
TODO: For now, bucket size and length are number of bytes. It should be number of bits
'''
self.payloads = [ c[Raw].load for c in convo.pktlist ]
self.bytelists = []
self.bucketsize = bucketsize
self.maxlen = maxlen
for j in xrange( maxlen/bucketsize ):
vlist = []
for i in xrange( len( self.payloads ) ):
vlist.append( self.payloads[i][bucketsize*j:bucketsize*(j+1)] )
self.bytelists.append( vlist )
return
def _mord( self, bytestr ):
'''multibyte version of ord.'''
return int( bytestr.encode('hex'), 16 )
def sizes( self ):
'''
Returns a numpy array containing the length of each request.
'''
plens = np.array( [ len( c ) for c in self.payloads ] )
return plens
def sizeMin( self ):
'''
Returns the minimum length (in bytes) of the set of samples.
'''
return self.sizes().min()
def sizeMax( self ):
'''
Returns the maximum length (in bytes) of the set of samples.
'''
return self.sizes().max()
def valueList( self, offset = 'all' ):
'''
Returns a list of all values for each payload in the conversation by default.
If an integer is supplied it becomes a bucket-wise offset and a list showing the
value for that bucket is produced.
'''
if offset == 'all':
return self.bytelists
if type(offset) == type([]):
return { o: self.bytelists[o] for o in offset }
if type(offset) == type(1):
return self.bytelists[ offset ]
def valueSet( self, offset = 'all' ):
'''
As with valueList, except returns a set which eliminates duplicates and does not preserve order.
'''
if offset == 'all':
return [ set(b) for b in self.bytelists ]
if type(offset) == type([]):
return { o: set( self.bytelists[ o ] ) for o in offset }
if type(offset) == type(1):
return set( self.bytelists[ offset ] )
def valueCount( self, offset = 'all' ):
'''
As with valueList, except returns the size of each payloa
TODO: I think this is broken for dicts ..
'''
if offset == 'all':
return [ len(s) for s in [ set(b) for b in self.bytelists ] ]
if type(offset) == type([]):
return { o: len(s) for s in [ set(self.bytelists[o]) for o in offset ] }
if type(offset) == type(1):
return len( set( self.bytelists[offset] ) )
def valueMax( self, offset = 'all' ):
'''
As with valueList, except returns the maximum value in the indicated bucket.
'''
if offset == 'all':
return [ np.array( [ self._mord(c) for c in b ] ).max() for b in self.bytelists ]
if type(offset) == type([]):
return { o: np.array( [ self._mord(c) for c in self.bytelists[o] ] ).max() for o in offset }
if type(offset) == type(1):
return np.array( [ self._mord(c) for c in self.bytelists[offset] ] ).max()
def testNonZero( self, offset = 'all' ):
'''
Tests a condition against each packet at the specified offset. Default is across all offsets in the sample.
A list of offsets or single offset can be supplied, returning a dict of lists or list respecively.
returns True of all values in the bucket were zero, false otherwise.
'''
if offset == 'all':
return [ np.array( [ self._mord(c) for c in b ] ).all() for b in self.bytelists ]
if type(offset) == type([]):
return { o: np.array( [ self._mord(c) for c in self.bytelists[o] ] ).all() for o in offset }
if type(offset) == type(1):
return np.array( [ self._mord(c) for c in self.bytelists[offset] ] ).all()
def testBelow( self, value, offset = 'all' ):
'''
As with testAllZero, except returns True for all values in the bucket area below the given value (hex or int)
'''
if offset == 'all':
return [ np.all( np.array( [ self._mord(c) for c in b ] ) < value ) for b in self.bytelists ] # return isbelow
if type(offset) == type([]):
return { o: np.all( np.array( [ self._mord(c) for c in self.bytelists[o] ] ) ) for o in offset }
if type(offset) == type(1):
return np.array( [ self._mord(c) for c in self.bytelists[offset] ] < value )
def testLinearity( self, offset = 'all' ):
'''
As with testAllZero, except returns the r-value for a linearity test.
'''
timeaxis = range( len( self.bytelists[0] ) )
if offset == 'all':
return [ ss.linregress( [ self._mord(x) for x in b ], timeaxis )[2] for b in self.bytelists ]
if type(offset) == type([]):
return { o: ss.linregress( [ self._mord(x) for x in self.bytelists[o] ], timeaxis )[2] for o in offset }
if type(offset) == type(1):
return ss.linregress( [ self._mord(x) for x in self.bytelists[offset] ], timeaxis )[2]
def testEntropy( self, offset = 'all' ):
'''
As with testAllZero, except returns the shannon entryopy of values across all packets for the given offset
TODO: this give erroneous results.
'''
if offset == 'all':
return [ ss.entropy( [ self._mord(x) for x in b ] ) for b in self.bytelists ]
if type(offset) == type([]):
return { o: ss.entropy( [ self._mord(x) for x in self.bytelists[o] ] ) for o in offset }
if type(offset) == type(1):
return ss.entropy( [ self._mord(x) for x in self.bytelists[offset] ] )
def testNormal( self, offset = 'all'):
'''
As with testAllZero, except returns the p-value for the normal distribution null hypothesis.
'''
if offset == 'all':
return [ ss.normaltest( [ self._mord(c) for c in b] )[1] for b in self.bytelists ]
if type(offset) == type([]):
return { o: ss.normaltest( [ self._mord(c) for c in self.bytelists[o] ] )[1] for o in offset }
if type(offset) == type(1):
return ss.normaltest( [ self._mord(c) for c in self.bytelists[ offset ] ] )[1]
def testChi2( self, offset = 'all' ):
'''
As with testAllZero, except returns the result of a chi-squared test (TODO: detail of its default behavior)
'''
if offset == 'all':
return [ ss.chisquare( [ self._mord(c) for c in b ] )[1] for b in self.bytelists ]
if type(offset) == type([]):
return { o: ss.chisquare( [ self._mord(c) for c in self.bytelists[o] ] )[1] for o in offset }
if type(offset) == type(1):
return ss.chisquare( [ self._mord(c) for c in self.bytelists[ offset ] ] )[1]
def valuePlot( self, offsets = 'all' ):
'''
Plot the numerical value of each bucket in a timeline, each will have different colors. TODO: add key
'''
if offsets == 'all':
offsets = range( len( self.bytelists ) )
if type(offsets) == type(1):
offsets = [ offsets ]
if type(offsets) == type([]):
for o in offsets:
pl.plot( self.asDec(o), 'o' )
pl.show()
return
def asDec( self, offset = 'all'):
'''
Returns each of the values as a decimal number. Offsets can be specified as for testX and valueX methods.
'''
if offset == 'all':
return [ [ self._mord(c) for c in b ] for b in self.bytelists ]
if type(offset) == type([]):
return { o: [ self._mord(c) for c in self.bytelists[o] ] for o in offset }
if type(offset) == type(1):
return [ self._mord(c) for c in self.bytelists[offset] ]
def asDecSet( self, offset = 'all'):
'''
Returns the set of values across the samples for the given offsets as decimal numbers.
'''
if offset == 'all':
return [ set( [ self._mord(c) for c in b ] ) for b in self.bytelists ]
if type(offset) == type([]):
return { o: set( [ self._mord(c) for c in self.bytelists[o] ] ) for o in offset }
if type(offset) == type(1):
return set( [ self._mord(c) for c in self.bytelists[offset] ] )
def asHex( self, offset = 'all'):
'''
As with asDec, except returns ascii hex values.
'''
if offset == 'all':
return [ [ c.encode('hex') for c in b ] for b in self.bytelists ]
if type(offset) == type([]):
return { o: [ [ c.encode('hex') for c in self.bytelists[o] ] for o in offset ] }
if type(offset) == type(1):
return [ c.encode('hex') for c in self.bytelists[offset] ]
def asHexSet( self, offset = 'all'):
'''
As with asDecSet, except returns ascii hex values.
'''
if offset == 'all':
return [ set( [ c.encode('hex') for c in b ] ) for b in self.bytelists ]
if type(offset) == type([]):
return { o: set( [ c.encode('hex') for c in self.bytelists[o] ] ) for o in offset }
if type(offset) == type(1):
return set( [ c.encode('hex') for c in self.bytelists[offset] ] )