/
filespace.py
129 lines (105 loc) · 4.19 KB
/
filespace.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import os
import math
import hashlib
import binascii
from functools import reduce
from util import read, write, exists, delete
import objects
INDEX_RELPATH = os.path.join('.bvc', 'INDEX')
# (mtime, size)
def wstat(relpath):
st = os.stat(relpath)
return (str(st.st_mtime), str(st.st_size))
def hashbuf(buf):
hasher = hashlib.sha1()
bb = bytes(buf, 'utf-8')
BLOCK_SIZE = 65536
for i in range(0, math.ceil(len(bb) / BLOCK_SIZE)):
data = binascii.b2a_hqx(bb[i * BLOCK_SIZE: (i + 1) * BLOCK_SIZE])
hasher.update(data)
return hasher.hexdigest()
# blob_hash, rel_path, mtime, size
def read_index():
bc = read(INDEX_RELPATH)
if not bc:
return []
return list(map(lambda t: t.split('\t'), bc.split('\n')))
# updates the index, the files to be updated come from a tree
def update_index(tc):
nri = list()
for i,t in enumerate(tc):
st = wstat(t[1])
nri.append((t[0], t[1], st[0], st[1]))
write_index(nri)
# blob_hash, rel_path, mtime, size
def write_index(ri):
write(INDEX_RELPATH, '\n'.join(map(lambda t: '\t'.join(map(str, t)), ri)))
# adds to stage or the index, if add_to is False, then remove from index instead
def stage(relpath, add_to=True):
ri = read_index()
# find the first instance of relpath
ridx = next(( i for i,j in enumerate(ri) if j[1] == relpath ), None)
# index structure
# blob_hash, rel_path, mtime, size
if add_to:
bs = wstat(relpath)
# file is already in the index, it might need to be updated with a new blob, and stats
if ridx is not None:
# the file has been updated, update its stats
if ri[ridx][2] != bs[0] or ri[ridx][2] != bs[0]:
bc = read(relpath)
bh = hashbuf(bc)
# if the blob does not already exists, then create, otherwise, reuse
if not exists(bh):
objects.write(bh, bc)
ri[ridx] = (bh, relpath, bs[0], bs[1])
else:
bc = read(relpath)
bh = hashbuf(bc)
# if the blob does not already exists, then create, otherwise, reuse
if not exists(bh):
objects.write(bh, bc)
ri.append((bh, relpath, bs[0], bs[1]))
# bvc stage rm relpath
# remove the file from working directory and remove from index
else:
if ridx is not None:
delete(relpath)
del ri[ridx]
# sort the index
sri = sorted(ri, key=lambda x: x[1])
write_index(sri)
"""
Finds the 4 statuses ( added to stage, removed from stage, staged changes, unstaged changes )
Expects tree contents ( list -> [ (blob-hash, relpath) ] )
"""
def diff_tree_with_index(tree):
ct = tree
ri = read_index()
# retrieves all relpaths
cif = set(map(lambda x: x[1], ct))
rif = set(map(lambda x: x[1], ri))
# (blob_hash, relpath)
cii = set(map(lambda x: tuple(x[0:2]), ct))
rii = set(map(lambda x: tuple(x[0:2]), ri))
# find the added to stage (files that are in stage but not in commit tree)
added = rif - cif
# find the removed from stage (files that are in commit tree but not in stage)
removed = cif - rif
# files thats been modified and already in index
staged = set(map(lambda t: t[1], rii - cii)) - added - removed
# find the modified tracked files
modified = set()
for re in ri:
if re[1] in staged:
continue
ws = wstat(re[1])
if re[2] != ws[0] or re[3] != ws[1]:
modified.add(re[1])
# walks through the directory, ignores ./.bvc directory, merges os.walk into relpath, and then strips ./ from the front of each file
ws = reduce(lambda p,c: c.union(p) ,[ set(map(lambda f: ("%s/%s" %(t[0], f))[2:], t[2])) for t in os.walk('.') if './.bvc' not in t[0][0:6] ], set())
# relpaths in index
rf = set(map(lambda x: x[1], ri))
# untracks are sorted in descending: level of depth in directory (closest to base comes first), a-z
untracked = sorted(ws - rf, key=lambda x: (x.count('/'), x))
return (added, removed, staged, modified, untracked)