forked from antoine/ibrouteur
-
Notifications
You must be signed in to change notification settings - Fork 1
/
indexmanager.py
185 lines (163 loc) · 5.96 KB
/
indexmanager.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import os
import sys
import config
import web
import indexbuilder
from index import db
try:
from datetime import date
except:
from mx.DateTime import Date as date
#import p23_lib.distutils.filelist as filelist
def findall (dir = os.curdir):
"""Find all files under 'dir' and return the list of full filenames
(relative to 'dir').
"""
#copied from the pythom2.3 libs for easier adaptation to python 2.1
from stat import ST_MODE, S_ISREG, S_ISDIR, S_ISLNK
list = []
stack = [dir]
pop = stack.pop
push = stack.append
while stack:
dir = pop()
names = os.listdir(dir)
for name in names:
if dir != os.curdir: # avoid the dreaded "./" syndrome
fullname = os.path.join(dir, name)
else:
fullname = name
# Avoid excess stat calls -- just one will do, thank you!
stat = os.stat(fullname)
mode = stat[ST_MODE]
if S_ISREG(mode):
list.append(fullname)
elif S_ISDIR(mode) and not S_ISLNK(mode):
push(fullname)
return list
def sync():
#list all files from disk and DB
global db
disk_entries = get_files()
db_entries = [f['filename'] for f in db.query("select filename from images")]
extra_db_entries = []
print "%s disk entries\n%s db entries\n" % (len(disk_entries), len(db_entries))
for f in db_entries:
if f in disk_entries:
disk_entries.remove(f)
else:
extra_db_entries.append(f)
print "%s EXTRA disk entries" % (len(disk_entries))
build_all(disk_entries, extra_db_entries)
print "\n%s EXTRA db entries" % (len(extra_db_entries))
for f in extra_db_entries:
print "removing %s from DB" % f
#remove the info from the db
file_id = db.query('select id from images where filename = "%s"' % f)[0].id
db.query("delete from images where id = %s" % file_id)
for imv in config.have_many_values:
#removing the infos from the various multiple indexes
db.query("delete from images_%ss where image_id = %s" % (imv, file_id))
for imv in config.have_many_values:
print "cleaning the possibles orphan values for the index %s" % imv
db.query("delete from %ss where id not in (select %s_id from images_%ss)" % (imv, imv, imv))
#diff the lists : remove from each list identical entries
#for each file on the disk but not on the DB insert it
#for each file in the DB but not on the disk remove the entries (in images and in multiple tags tables)
#remove the unused multiple values (just SQL)
def get_files(root_dir = config.base_dir):
imfiles = findall(root_dir)
return [im for im in imfiles if im[-3:].upper() in config.supported_extensions and im[-7:-3]!=".ni."]
def build_all(files = get_files(), extra_db_entries=(), batch=date.today()):
#build the indexes for the given list of files
idx_types = config.additional_indexes
idx_types.extend(config.built_indexes)
print "found %s images to index" % (len(files))
#i = 0
for f in files:
infos = extract_infos(f, idx_types)
infos["batch"] = batch
store_infos(infos, extra_db_entries)
#i+=1
#if i > 100: break
def extract_infos(f, idx_types):
file_infos = {"filename" : f}
for idx_type in idx_types:
try:
value = getattr(indexbuilder, "get_%s" % idx_type)(f)
file_infos[idx_type] = value
except:
print "[ERR] file %s is like a piece of dirty old cheese because \n\
%s " % (f,sys.exc_info()[0])
raise
return file_infos
def store_infos(infos, extra_db_entries):
print " %s" % (infos)
#web.debug(" %s" % (infos))
simple_infos = infos.copy()
multiple_infos = {}
for imv in config.have_many_values:
try:
del simple_infos[imv]
multiple_infos[imv] = infos[imv]
except KeyError:
pass
#checking for file renaming with sha
possiblePrevFiles = db.query("select id, filename, batch from images where sha ='"+infos['sha']+"'")
updatingFile = False
if len(possiblePrevFiles) == 1:
#file found in db
print "INFO duplicate found : "+infos['filename']
prevFile = possiblePrevFiles[0]
file_id = prevFile.id
simple_infos['batch'] = prevFile.batch
try:
extra_db_entries.remove(prevFile.filename)
db.update('images', 'id = %s' % file_id, None, **simple_infos)
updatingFile = True
except ValueError:
#raise with .remove when the filename do not match
print "WARNING duplicate sha accross fileset, creating new entry"
else:
if len(possiblePrevFiles) > 1:
#more than one file with this sha...
print "INFO sha present multiple time for file : "+infos["filename"]
file_id = db.insert('images', True, **simple_infos)
for index in multiple_infos.keys():
#store the value in its table
for value in multiple_infos[index]:
try:
value_id = db.insert(index+'s', True, **{"value" : value})
#debuginsert(index+'s', False, **{"value" : value})
except:
#TODO should be IntegrityError for mysql but not sure how best integrate that without breaking the DB abstraction...
#but if the error wasn't an IntegrityError then the next line should fail
value_id = db.query('select id from %ss where value = "%s"' % (index, value))[0].id
#store the relationship between the value and the file
try:
db.insert("images_"+index+'s', False, **{index+"_id": value_id, "image_id" : file_id})
except Exception, inst:
#if we are update a file we might raise some integrity error here
if updatingFile:
pass
else:
raise inst
def debuginsert(tablename, seqname=None, **values):
if values:
print("INSERT INTO %s (%s) VALUES (%s)" % (
tablename,
", ".join(values.keys()),
', '.join([web.aparam() for x in values])
), values.values())
else:
print("INSERT INTO %s DEFAULT VALUES" % tablename)
def __to_dict_list(dict, key, value):
if dict.has_key(key):
dict[key].append(value)
else:
dict[key] = [value, ]
def test():
name = 'Bob'
web.render('browse.html')
name = None
web.render('browse.html')