-
Notifications
You must be signed in to change notification settings - Fork 1
/
tr_log_reader.py
350 lines (313 loc) · 13.1 KB
/
tr_log_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
import re
import sys
import os
import Queue
import cPickle
import copy
from logger_factory import logger
from config import DOWNLOAD_LOCATION
_peeraddr_re = re.compile('^\[?([0-9.]+)\]?:')
class TrLog:
CACHE_VERSION = 0.4
def lastchunktime(self):
return self.chunks[-1]["t"]
def lastchunklength(self):
return self.chunks[-1]["end"] - self.chunks[-1]["begin"]
def averagechunklength(self):
return sum(map(lambda x: x["end"] - x["begin"], self.chunks)) / len(self.chunks)
def total_file_size(self):
return sum([f["length"] for f in self.files])
@staticmethod
def sort_chunks_sequentially(chunks):
times = map(lambda x: x["t"], chunks)
sorted_chunks = sorted(chunks, lambda x,y: cmp(x["begin"], y["begin"]))
result = []
i = 0
for chunk in sorted_chunks:
chunk["t"] = times[i]
result.append(chunk)
i += 1
return result
def save_cache(self, filename):
f = open(filename, 'w')
cPickle.dump(self.files, f)
cPickle.dump(self.chunks, f)
cPickle.dump(self.peers, f)
cPickle.dump(self.peeraddr_to_id, f)
cPickle.dump(self.totalsize, f)
cPickle.dump(self.CACHE_VERSION, f)
cPickle.dump(self.chunks_reduced_passivity, f)
cPickle.dump(self.file_location, f)
f.close()
@staticmethod
def from_cache(filename):
log = TrLog()
f = open(filename, 'r')
log.files = cPickle.load(f)
log.chunks = cPickle.load(f)
log.peers = cPickle.load(f)
log.peeraddr_to_id = cPickle.load(f)
log.totalsize = cPickle.load(f)
try:
actual_cache_version = cPickle.load(f)
if actual_cache_version != TrLog.CACHE_VERSION:
raise Exception("Session cache of unsupported version (expected %s, found %s). Try to delete the cache file (%s) manually and then retry what you just attempted." % (
TrLog.CACHE_VERSION, actual_cache_version, filename))
log.chunks_reduced_passivity = cPickle.load(f)
log.file_location = cPickle.load(f)
except EOFError:
raise Exception("Failed to read session cache. The cache file (%s) is probably depcrecated. Try to delete cache file manually and then retry what you just attempted." % filename)
f.close()
return log
def flatten(self):
result = []
peer_cursor = {}
for chunk in self.chunks:
if chunk['peeraddr'] in peer_cursor:
previous_chunk = peer_cursor[chunk['peeraddr']]
if (previous_chunk['t'] == chunk['t']
and previous_chunk['filenum'] == chunk['filenum']
and previous_chunk['end'] == chunk['begin']):
previous_chunk['end'] = chunk['end']
else:
result.append(chunk)
peer_cursor[chunk['peeraddr']] = chunk
else:
result.append(chunk)
peer_cursor[chunk['peeraddr']] = chunk
self.chunks = result
def _ignore_non_downloaded_files(self):
for filenum in reversed(range(len(self.files))):
f = self.files[filenum]
if not (self._file_exists(f) and self._has_chunks_in_log(filenum)):
self._remove_file(filenum)
def _file_exists(self, f):
return os.path.exists("%s/%s" % (self.file_location, f["name"]))
def _has_chunks_in_log(self, filenum):
for chunk in self.chunks:
if chunk["filenum"] == filenum:
return True
def select_files(self, selected_filenums):
for filenum in reversed(range(len(self.files))):
if filenum not in selected_filenums:
self._remove_file(filenum)
def _remove_file(self, filenum):
f = self.files[filenum]
file_begin = f["offset"]
file_length = f["length"]
file_end = file_begin + file_length
chunks_to_delete = []
for index in range(len(self.chunks)):
chunk = self.chunks[index]
if chunk["filenum"] == filenum:
chunks_to_delete.append(index)
elif chunk["filenum"] > filenum:
chunk["begin"] -= file_length
chunk["end"] -= file_length
chunk["filenum"] -= 1
for index in reversed(chunks_to_delete):
del self.chunks[index]
for index in range(filenum+1, len(self.files)):
f = self.files[index]
f["offset"] -= file_length
del self.files[filenum]
def _reduce_max_passivity(self, chunks, max_passivity):
previous_t = 0
reduced_time = 0
result = copy.deepcopy(chunks)
for i in range(len(result)):
if (result[i]["t"] - reduced_time - previous_t) > max_passivity:
reduced_time += result[i]["t"] - reduced_time - previous_t - max_passivity
result[i]["t"] -= reduced_time
previous_t = result[i]["t"]
return result
class TrLogReader:
NO_MORE_CHUNKS = {}
def __init__(self, logfilename, torrent_name="",
realtime=False, pretend_sequential=False):
self.logfilename = logfilename
self.torrent_name = torrent_name
self.realtime = realtime
self.pretend_sequential = pretend_sequential
if self.realtime:
self.chunks_queue = Queue.Queue()
self.id = None
self.files = []
self.peers = []
self.peeraddr_to_id = {}
self._chunk_count = 0
self.file_location = DOWNLOAD_LOCATION
def get_log(self,
use_cache=True,
ignore_non_downloaded_files=True,
max_passivity=1.0,
reduced_passivity=False):
if use_cache and os.path.exists(self._cache_filename()):
log = TrLog.from_cache(self._cache_filename())
else:
self.logfile = open(self.logfilename, "r")
self._process_torrent_info()
self._process_chunks()
self.logfile.close()
log = TrLog()
log.file_location = self.file_location
log.files = self.files
log.chunks = self.chunks
log.peers = self.peers
log.peeraddr_to_id = self.peeraddr_to_id
log.totalsize = self.totalsize
if ignore_non_downloaded_files:
log._ignore_non_downloaded_files()
if max_passivity:
log.chunks_reduced_passivity = log._reduce_max_passivity(self.chunks, max_passivity)
if use_cache:
self._cache_log(log)
if reduced_passivity:
log.chunks = log.chunks_reduced_passivity
return log
def _cache_log(self, log):
log.save_cache(self._cache_filename())
def _cache_filename(self):
return self.logfilename + '.cache'
def _process_torrent_info(self):
self._process_until_selected_torrent()
self._process_files_info()
self._warn_if_any_unsupported_values()
def _process_until_selected_torrent(self):
logger.debug("selecting torrent")
initialized_re = re.compile('initialized torrent (\d+): name=(.*) totalSize=(\d+) fileCount=(\d+) pieceSize=(\d+) pieceCount=(\d+)( fileLocation=(.+))?')
for line in self.logfile:
line = line.rstrip("\r\n")
logger.debug("processing: %s" % line)
m = initialized_re.search(line)
if m:
(id,name,totalsize,filecount,piecesize,piececount,foo,file_location) = m.groups()
if self.torrent_name == "" or re.search(self.torrent_name, name):
self.id = int(id)
self.name = name
self.totalsize = int(totalsize)
self.numfiles = int(filecount)
self.piecesize = int(piecesize)
if file_location:
self.file_location = file_location
break
if self.id == None:
logger.debug("no torrent found")
raise Exception("no torrent found")
logger.debug("selected torrent '%s' (TID=%d totalsize=%d piecesize=%d)" % \
(self.name, self.id, self.totalsize, self.piecesize))
def _process_files_info(self):
if not self.realtime:
self.logfile.seek(0)
file_info_re = re.compile('^TID=%d file=(\d+) offset=(\d+) length=(\d+) firstPiece=(\d+) lastPiece=(\d+) name=(.*)$' % self.id)
logger.debug("starting to search for file info in log")
for line in self.logfile:
line = line.rstrip("\r\n")
logger.debug("processing: %s" % line)
m = file_info_re.search(line)
if m:
self._process_file_info_line(m)
if len(self.files) == self.numfiles:
return
raise Exception("failed to find file info about all %d files (only found %d)" % (
self.numfiles, len(self.files)))
def _warn_if_any_unsupported_values(self):
files_with_long_value = filter(
lambda f: any(filter(lambda value: isinstance(value, long),
f.values())),
self.files)
if len(files_with_long_value):
print >>sys.stderr, "WARNING: these files require long (unsupported) rather than int type:"
for f in files_with_long_value:
print f["name"]
def _process_chunks(self):
self.numdata = 0
self.time_offset = None
self._chunk_re = re.compile('^\[(\d+)\] TID=%d peer=([^ ]+) got (\d+) bytes for block (\d+) at offset (\d+) in file (\d+) at offset (\d+) \.\.\. remaining (\d+) of (\d+)$' % self.id)
self.filenummax = 0
if not self.realtime:
self.chunks = []
logger.debug("starting to search for chunk info in log")
for line in self.logfile:
line = line.rstrip("\r\n")
logger.debug("processing: %s" % line)
self._process_chunk_line(line)
if self.realtime:
self.chunks_queue.put_nowait(self.NO_MORE_CHUNKS)
elif self.pretend_sequential:
self.chunks = self.sort_chunks_sequentially(self.chunks)
def _process_chunk_line(self, line):
chunk = self._parse_chunk_line(line)
if chunk:
chunks = self._split_chunk_at_file_boundaries(chunk)
for chunk in chunks:
self._add_chunk(chunk)
def _split_chunk_at_file_boundaries(self, chunk):
result = []
filenum = 0
for f in self.files:
if self._chunk_matches_file(chunk, f):
new_chunk = copy.copy(chunk)
new_chunk["id"] = self._next_chunk_id()
new_chunk["filenum"] = filenum
new_chunk["begin"] = max(chunk["begin"], f["offset"])
new_chunk["end"] = min(chunk["end"], f["offset"] + f["length"])
result.append(new_chunk)
filenum += 1
return result
def _chunk_matches_file(self, chunk, f):
return (f["offset"] <= chunk["begin"] < (f["offset"] + f["length"]) or
f["offset"] < chunk["end"] < (f["offset"] + f["length"]))
def _next_chunk_id(self):
result = self._chunk_count
self._chunk_count += 1
return result
def _parse_chunk_line(self, line):
m = self._chunk_re.search(line)
if not m:
return None
(t,peeraddr,nbytes,blockindex,blockoffset,filenum,fileoffset,remain,blocksize) = m.groups()
filenum = int(filenum)
self.filenummax = max(self.filenummax, filenum)
t = int(t)
if self.time_offset == None:
self.time_offset = t
t = float(t - self.time_offset) / 1000
nbytes = int(nbytes)
blockindex = int(blockindex)
blockoffset = int(blockoffset)
remain = int(remain)
blocksize = int(blocksize)
b1 = (blockoffset+blocksize-remain-nbytes) + (blockindex*self.piecesize)
b2 = b1 + nbytes
chunk = {"t": t,
"begin": b1,
"end": b2,
"peeraddr": self._parse_peeraddr(peeraddr),
"filenum": filenum}
return chunk
def _parse_peeraddr(self, string):
m = _peeraddr_re.search(string)
if m:
return m.group(1)
else:
return string
def _add_peer_unless_already_added(self, peeraddr):
if peeraddr not in self.peeraddr_to_id:
self.peeraddr_to_id[peeraddr] = len(self.peers)
self.peers.append(peeraddr)
def _process_file_info_line(self, m):
(file_id,offset,length,firstpiece,lastpiece,name) = m.groups()
file_id = int(file_id)
info = {"offset": int(offset),
"length": int(length),
"firstpiece": int(firstpiece),
"lastpiece": int(lastpiece),
"name": name}
self.files.insert(file_id, info)
def _add_chunk(self, chunk):
self._add_peer_unless_already_added(chunk["peeraddr"])
if self.realtime:
self.chunks_queue.put_nowait(chunk)
else:
self.chunks.append(chunk)