/
lslog.py
221 lines (167 loc) · 5.82 KB
/
lslog.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
__version__ = "0.2.0"
__author__ = "m.yama"
__all__ = [
"lslR",
"lsl"
]
import re, sys
from datetime import datetime
from posixpath import basename, normpath, join as pathjoin
from util.utils import to_datetime
from util.core import opener
def split_ls(
line,
parent=None,
base_date=None,
mod_sp = re.compile(" +")
):
if line.startswith("l"):
p, _, o, g, size, *d, name, _, ln = mod_sp.split(line)
else:
ln = ""
p, _, o, g, size, *d, name = mod_sp.split(line)
dt = to_datetime(" ".join(d))
if base_date is None:
base_date = datetime.now()
elif isinstance(base_date, str):
base_date = to_datetime(base_date)
if dt > base_date:
dt = dt.replace(year=base_date.year - 1)
dt = dt.strftime("%Y/%m/%d"), dt.strftime("%H:%M:%S")
try:
size = int(size)
except ValueError:
pass
ret = [p, o, g, size, *dt, name.split(".")[-1] if "." in name else "", name]
if not parent or parent == ".":
return [*ret, ln]
parent = normpath(parent)
fullpath = pathjoin(parent, name)
diritem = (fullpath if p.startswith("d") else parent).strip("/").split("/")
return [*ret, ln, fullpath, *map("/".__add__, diritem)]
def parse(
path_or_buffer,
recursive=False,
parent=None,
base_date=None,
offset=None,
):
prefix = "-dlcb"
with opener(path_or_buffer) as fp:
if recursive:
for x in fp:
line = x.rstrip()[offset:]
if line == "":
continue
if line.endswith(":"):
parent = line.rstrip(":")
if parent and line[0] in prefix:
yield split_ls(line, parent)
else:
for x in fp:
line = x.rstrip()[offset:]
if line and line[0] in prefix:
yield split_ls(line, parent=parent, base_date=base_date)
def lslR(path_or_buffer, parent=None, base_date=None):
return parse(path_or_buffer, recursive=True, parent=parent, base_date=base_date)
def lsl(path_or_buffer, parent=None, base_date=None):
return parse(path_or_buffer, recursive=False, parent=parent, base_date=base_date)
def to_csv(
iteratable,
outfile,
sep=',',
header=["permission", "owner", "group", "size", "date", "time", "ext", "basename", "link", "fullpath", "DIR*"],
quotechar='"',
quoting=0,
**kw
):
import csv
fp = outfile if hasattr(outfile, "write") else open(outfile, "w")
kw["lineterminator"] = "\n"
writer = csv.writer(fp, delimiter=sep, quotechar=quotechar, quoting=quoting, **kw)
if header:
writer.writerow(header)
for x in iteratable:
writer.writerow(x)
fp.flush()
def to_excel(
iteratable,
outfile,
header=["permission", "owner", "group", "size", "date", "time", "ext", "basename", "link", "fullpath", "DIR*"]
):
from xlsxwriter import Workbook
with Workbook(outfile) as book:
header_fmt = book.add_format(
dict(border=True, align="center", bold=True))
sheet = book.add_worksheet()
if header:
sheet.write_row(0, 0, header, header_fmt)
for i, row in enumerate(iteratable, 1):
sheet.write_row(i, 0, row)
if not sheet.dim_colmax:
sys.stderr.write("No Data\n")
return
if header:
sheet.autofilter(0, 0, sheet.dim_rowmax, sheet.dim_colmax)
def unicode_escape(x):
return x.encode().decode("unicode_escape")
def main():
from argparse import ArgumentParser
from glob import glob
usage="""
parse from `ls -lR` log string
Example1: {0} *.log -o fileslist.csv
Example2: {0} *.log -o fileslist.xlsx
""".format(basename(sys.argv[0]).replace(".py", ""))
ps = ArgumentParser(usage)
padd = ps.add_argument
padd('-r', '--recursive', action="store_true", default=False,
help='input file is recursive?(ls -lR) (default False')
padd('-o', '--outfile', type=str, default=sys.stdout,
help='output filepath (default `stdout`)')
padd('-b', '--basedate', type=str, default=None,
help='run ls -lR Date')
padd('-c', '--currentdirectory', type=str, default=None,
help='run ls -lR Current Directory Path String')
padd('-s', '--sep', type=unicode_escape, default=",",
help='csv output separator (default `,`)')
padd('--offset', type=int, default=None,
help='offset bytes Ex. skip bytes timestamp.')
padd("filename",
metavar="<filename>",
nargs="+", default=[],
help="target mlocate.db files")
args = ps.parse_args()
outfile = args.outfile
recursive = args.recursive
sep = args.sep
BASE_DATE = args.basedate
PARENT = args.currentdirectory
offset = args.offset
files = [g for fn in args.filename for g in glob(fn)]
if not files:
raise FileNotFoundError("Not found files {}".format(args.filename))
rows = (row for f in files for row in parse(f, recursive=recursive, parent=PARENT, base_date=BASE_DATE, offset=offset))
ext = str(outfile).lower().rsplit(".", 1)[1]
if ext.startswith("tsv"):
to_csv(rows, outfile, sep="\t")
elif ext.startswith("xls"):
to_excel(rows, outfile)
else:
to_csv(rows, outfile, sep=sep)
def test(path):
def test_parse_lslR():
assert len(list(parse(path, True))) == 7
def test_parse_lsl():
assert len(list(parse(path))) == 7
for x, func in list(locals().items()):
if x.startswith("test_") and callable(func):
t1 = datetime.now()
func()
t2 = datetime.now()
print("{} : time {}".format(x, t2-t1))
if __name__ == "__main__":
# test("C:/temp/lsdir.log")
main()