/
landmark.py
182 lines (158 loc) · 5.77 KB
/
landmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# export DYLD_LIBRARY_PATH=/Applications/MATLAB_R2011b.app/bin/maci64/
import fingerprint
import db
import sqlalchemy
import conf
import log
import subprocess
import queue
import sqlalchemy.dialects.mysql
import os
import codecs
import tempfile
if not conf.has_section("landmark"):
raise Exception("No landmark configuration section present")
FPRINT_PATH = conf.get("landmark", "audfprint_path")
class LandmarkModel(db.Base):
__tablename__ = "landmark"
id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
file_id = sqlalchemy.Column(sqlalchemy.Integer, sqlalchemy.ForeignKey('file.id'), nullable=False)
trid = sqlalchemy.Column(sqlalchemy.dialects.mysql.VARCHAR(255, charset='utf8'))
def __init__(self, file, trid):
self.file_id = file.id
self.trid = trid
def __repr__(self):
return "Landmark<%s, id=%s>" % (self.file_id, self.trid)
class Landmark(fingerprint.Fingerprinter):
def fingerprint(self, file):
"""
The audfprint app uses filenames to reference items in the hashtable.
So, we just return (shortname, fname) as the (key, data).
shortname is the file name less the prefix in the config
file. Data is just the filename to pass to audfprint
"""
read_path = conf.path
shortname = file.replace(read_path, "")
if shortname.startswith("/"):
shortname = shortname[1:]
return shortname, file
def num_lookups(self):
"""
The number of files to look up at a time
"""
return 100
def lookup(self, files):
""" Look up a file and return the unique fp identifier """
fp,tmpname = tempfile.mkstemp()
os.close(fp)
fp = codecs.open(tmpname, "w", "utf8")
for f in files:
fname = f["file"]
# We do a quick check that this file actually exists
# and is size > 0, so that matlab doesn't hate it.
if fname.endswith(".mp3"):
args = ["mp3info", "-r", "m", "-p", "%Q %u %b %r %v * %C %e %E %L %O %o %p", fname]
out, err, ret = self.run_process(args)
if ret != 0:
log.warning("Testing a file with mp3info gave a BAD result")
log.warning(fname)
else:
# If it's not an mp3 (wav) then 'success'
ret = 0
if os.path.exists(fname) and os.path.getsize(fname) > 0 and ret == 0:
fp.write("%s\n" % fname)
fp.close()
args = [FPRINT_PATH, "-dbase", "landmarkdb", "-matchlist", tmpname]
log.debug(args)
data, err, retval = self.run_process(args)
res = data.split("\n")
os.unlink(tmpname)
log.debug(data)
log.debug(err)
if err != "":
print "Stderr has content (FAILED) returning."
return None
ret = []
for f in files:
infile = f["file"]
matches = [x for x in res if x.startswith("%s 1" % infile.encode("utf-8"))]
if len(matches) == 0:
f["result"] = None
f["fptime"] = 0
f["lookuptime"] = 0
else:
parts = matches[0].split(" ")
name = parts[2:-2]
read_path = conf.path
name = " ".join(name)
shortname = name.replace(read_path, "")
if shortname.startswith("/"):
shortname = shortname[1:]
f["result"] = shortname
f["fptime"] = int(parts[-2])
f["lookuptime"] = int(float(parts[-1]) * 100)
ret.append(f)
return ret
def ingest_many(self, data):
""" Bulk import a list of data. May loop through data
and do ingest single, or may do a bulk import
"""
if not len(data):
return
# If the database file doesn't exist, we need to add the flag to
# create it the first time we run
args = [FPRINT_PATH, "-dbase", "landmarkdb"]
if not os.path.exists("landmarkdb.mat"):
args.extend(["-cleardbase", "1"])
args.append("-addlist")
fp,fname = tempfile.mkstemp()
os.close(fp)
fp = codecs.open(fname, "w", "utf8")
for line in data:
if "!" not in line and "&" not in line:
fp.write("%s\n" % line)
fp.close()
log.debug("importing from %s" % fname)
args.append(fname)
data, err, retval = self.run_process(args)
os.unlink(fname)
log.debug(data)
def run_process(self, args):
""" Run some args with subprocess and get *all* stdout """
outfp,outname = tempfile.mkstemp()
errfp,errname = tempfile.mkstemp()
p = subprocess.Popen(args, stdout=outfp, stderr=errfp)
p.wait()
os.close(outfp)
os.close(errfp)
with open(outname) as fp:
o = fp.read()
with open(errname) as fp:
e = fp.read()
os.unlink(outname)
os.unlink(errname)
return o, e, p.returncode
def delete_all(self):
""" Delete all entries from the local database table
and also any external stores
"""
# Delete from the local database
db.session.query(LandmarkModel).delete()
db.session.commit()
# Delete hash file
try:
os.unlink("landmarkdb.mat")
except OSError:
pass
q = queue.FpQueue("ingest_landmark")
q.clear_queue()
fingerprint.fingerprint_index["landmark"] = {
"dbmodel": LandmarkModel,
"instance": Landmark
}
db.create_tables()
def stats():
q = queue.FpQueue("ingest_landmark")
print "Ingest queue size: %s" % q.size()
if __name__ == "__main__":
stats()