/
echoprint.py
141 lines (119 loc) · 4.29 KB
/
echoprint.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import json
import os
import subprocess
import time
import fingerprint
import db
import sqlalchemy
import conf
import queue
import echoprint_support.fp
import echoprint_support.solr
if not conf.has_section("echoprint"):
raise Exception("No echoprint configuration section present")
s = conf.get("echoprint", "solr_server")
th = conf.get("echoprint", "tyrant_host")
tp = conf.getint("echoprint", "tyrant_port")
echoprint_support.fp._fp_solr = echoprint_support.solr.SolrConnectionPool(s)
echoprint_support.fp._tyrant_address = [th, tp]
codegen_path = conf.get("echoprint", "codegen_path")
class EchoprintModel(db.Base):
__tablename__ = "echoprint"
id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
file_id = sqlalchemy.Column(sqlalchemy.Integer, sqlalchemy.ForeignKey('file.id'), nullable=False)
trid = sqlalchemy.Column(sqlalchemy.String(20))
def __init__(self, file, trid):
self.file_id = file.id
self.trid = trid
def __repr__(self):
return "<Echoprint(id=%s, file=%s, trid=%s)>" % (self.id, self.file_id, self.trid)
class Echoprint(fingerprint.Fingerprinter):
def fingerprint(self, file):
data = self._codegen(file)
trid = echoprint_support.fp.new_track_id()
data = data[0]
ret = {}
ret["track_id"] = trid
if "code" in data:
ret["fp"] = echoprint_support.fp.decode_code_string(data["code"])
ret["codever"] = data["metadata"]["version"]
ret.update(data["metadata"])
ret["length"] = ret["duration"]
else:
ret["error"] = data
return (trid, ret)
def _codegen(self, file, start=-1, duration=-1):
proclist = [codegen_path, os.path.abspath(file)]
if start > 0:
proclist.append("%d" % start)
if duration > 0:
proclist.append("%d" % duration)
p = subprocess.Popen(proclist, stdout=subprocess.PIPE)
code = p.communicate()[0]
try:
return json.loads(code)
except ValueError as e:
print "Error loading"
print code
return [{}]
def ingest_many(self, data):
# echoprint ingest will take a list then commit
echoprint_support.fp.ingest(data, do_commit=True)
def lookup(self, files):
if len(files) > 1:
raise Exception("Can only look up one file at a time")
res = files[0]
fname = res["file"]
stime = time.time()
data = self._codegen(fname)
mtime = time.time()
codegen = data[0]
if "code" in codegen:
code = codegen["code"]
else:
print codegen
code = ""
match = echoprint_support.fp.best_match_for_query(code)
etime = time.time()
fptime = (mtime-stime)*1000
looktime = (etime-mtime)*1000
res["result"] = match.TRID
res["fptime"] = fptime
res["lookuptime"] = looktime
return [res]
def delete_all(self):
# Erase solr and tokyo tyrant
echoprint_support.fp.erase_database(True)
# Erase the local database
db.session.query(EchoprintModel).delete()
db.session.commit()
q = queue.FpQueue("ingest_echoprint")
q.clear_queue()
fingerprint.fingerprint_index["echoprint"] = {
"dbmodel": EchoprintModel,
"instance": Echoprint
}
db.create_tables()
def stats():
cur = db.session.query(EchoprintModel)
print "Number of records: %d" % cur.count()
numtyrant = len(echoprint_support.fp.get_tyrant())
print "Number of TT records: %d" % numtyrant
uniqsolr = set()
with echoprint_support.solr.pooled_connection(echoprint_support.fp._fp_solr) as host:
cur = host.query("*:*", fields="track_id", rows=10000)
numsolr = cur.results.numFound
#while cur.results is not None:
# for r in cur.results:
# uniqsolr.add(r["track_id"][:-1])
# cur = cur.next_batch()
print "Number of Solr records: %s" % numsolr
alltyrant = echoprint_support.fp.get_tyrant().iterkeys()
uniqtt = set()
for x in alltyrant:
uniqtt.add(x.split("-")[0])
print "Number of unique TT records: %s " % len(uniqtt)
q = queue.FpQueue("ingest_echoprint")
print "Ingest queue size: %s" % q.size()
if __name__ == "__main__":
stats()