def create_run(testset, fp, mungename): """ Make a run. A run is a testset with a specific fingerprinter. Files are put through a list of munges before looked up in the fingerprinter """ if fp not in fingerprint.fingerprint_index.keys(): raise Exception("Unknown fingerprint name %s" % (fp)) munges = munge.munge_classes.keys() for m in mungename.split(","): m = m.strip() if m not in munges: raise Exception("Unknown munge %s" % (m)) testset = int(testset) ts = db.session.query(Testset).get(testset) if ts is None: raise Exception("Testset %d not in database" % testset) run = Run(testset, mungename, fp) db.session.add(run) db.session.commit() # Once the run has been created, make a queue that contains # all the testfiles to be evaluated thequeue = queue.FpQueue("run_%s" % run.id) for tf in ts.testfiles: data = {"testfile_id": tf.id} thequeue.put(data)
def add_queue(run): print "Adding missing files for run %s" % run results = db.session.query(evaluation.Result.testfile_id).filter(evaluation.Result.run_id==run).subquery() testfiles = db.session.query(evaluation.Testfile).filter(~evaluation.Testfile.id.in_(results)) thequeue = queue.FpQueue("run_%s" % run) for tf in testfiles: data = {"testfile_id": tf.id} thequeue.put(data)
def delete_all(self): # Erase solr and tokyo tyrant echoprint_support.fp.erase_database(True) # Erase the local database db.session.query(EchoprintModel).delete() db.session.commit() q = queue.FpQueue("ingest_echoprint") q.clear_queue()
def reset_run(run): """Reset a run. Clear the start/end date and all results, then clear and repopulate the queue. """ db.session.query(Result).filter(Result.run_id == run).delete() r = db.session.query(Run).get(run) r.started = None r.finished = None db.session.add(r) db.session.commit() thequeue = queue.FpQueue("run_%s" % run) thequeue.clear_queue() thequeue = queue.FpQueue("run_%s" % run) for tf in r.testset.testfiles: data = {"testfile_id": tf.id} thequeue.put(data)
def add_queue(engine): engine_map = fingerprint.fingerprint_index.get(engine) engine_table = engine_map.get("dbmodel") if engine_map else None cur = db.session.query(db.FPFile).filter(db.FPFile.negative == False)\ .outerjoin(engine_table).filter(engine_table.file_id == None) log.info("got %d things to add to the %s queue" % (cur.count(), engine)) thequeue = queue.FpQueue("ingest_%s" % engine) thequeue.clear_queue() for f in cur: d = {"id": f.id} thequeue.put(d) log.info("...done")
def delete(engine): """ Call the delete-specific method for a fingerprint engine to remove all traces of it from this eval test, and from the engine's specific storage system """ engine_map = fingerprint.fingerprint_index.get(engine) engine_class = engine_map.get("instance") if engine_map else None if not engine_class: raise Exception("%s is not a valid fingerprint engine" % (engine)) instance = engine_class() instance.delete_all() thequeue = queue.FpQueue("ingest_%s" % engine) thequeue.clear_queue()
def main(engine): thequeue = queue.FpQueue("ingest_%s" % engine) engine_map = fingerprint.fingerprint_index.get(engine) engine_class = engine_map.get("instance") if engine_map else None if not engine_class: raise Exception("%s is not a valid fingerprint engine" % (engine)) engine_table = engine_map.get("dbmodel") if engine_map else None instance = engine_class() log.info("Importing files for engine %s" % (engine)) fp_list = [] ack_handles = [] count = 0 print "%s to import" % thequeue.size() while True: data, handle = thequeue.get() if data is None: break cur = db.session.query(db.FPFile).filter(db.FPFile.id == data["id"]) f = cur.one() (trackid, fpdata) = instance.fingerprint(f.path) error = "error" in fpdata and type(fpdata) == type({}) if not error: ack_handles.append(handle) e = engine_table(f, trackid) db.session.add(e) fp_list.append(fpdata) count += 1 else: log.debug("Error parsing file %s. Error was: %s" % (f, fpdata["error"])) # Ingest every 100 songs if len(fp_list) > 99: log.info("Ingesting 100 files at once") queuesize = thequeue.size() log.info("%d done, %d remaining" % (count, queuesize)) db.session.commit() instance.ingest_many(fp_list) fp_list = [] for h in ack_handles: thequeue.ack(h) ack_handles = [] # After there's no more data, import the remaining files log.info("Ingesting remaining %d files" % len(fp_list)) instance.ingest_many(fp_list) db.session.commit() for h in ack_handles: thequeue.ack(h)
def delete_all(self): """ Delete all entries from the local database table and also any external stores """ # Delete from the local database db.session.query(LandmarkModel).delete() db.session.commit() # Delete hash file try: os.unlink("landmarkdb.mat") except OSError: pass q = queue.FpQueue("ingest_landmark") q.clear_queue()
def stats(): cur = db.session.query(EchoprintModel) print "Number of records: %d" % cur.count() numtyrant = len(echoprint_support.fp.get_tyrant()) print "Number of TT records: %d" % numtyrant uniqsolr = set() with echoprint_support.solr.pooled_connection( echoprint_support.fp._fp_solr) as host: cur = host.query("*:*", fields="track_id", rows=10000) numsolr = cur.results.numFound #while cur.results is not None: # for r in cur.results: # uniqsolr.add(r["track_id"][:-1]) # cur = cur.next_batch() print "Number of Solr records: %s" % numsolr alltyrant = echoprint_support.fp.get_tyrant().iterkeys() uniqtt = set() for x in alltyrant: uniqtt.add(x.split("-")[0]) print "Number of unique TT records: %s " % len(uniqtt) q = queue.FpQueue("ingest_echoprint") print "Ingest queue size: %s" % q.size()
def stats(): q = queue.FpQueue("ingest_landmark") print "Ingest queue size: %s" % q.size()
def execute_run(run_id): """ Execute a run. This does most of the magic. """ run = db.session.query(Run).filter(Run.id == run_id) if run.count() == 0: raise Exception("No run with this id") else: run = run.one() if run.started is None: now = datetime.datetime.now() now = now.replace(microsecond=0) run.started = now db.session.add(run) db.session.commit() engine = run.engine munges = run.munge fpclass = fingerprint.fingerprint_index[engine] fp = fpclass["instance"]() thequeue = queue.FpQueue("run_%s" % run.id) ack_handles = [] log.info("Reading queue for run %s. Got %s files" % (run.id, thequeue.size())) num_lookups = fp.num_lookups() to_lookup = [] count = 0 def do_fp(to_lookup): try: res = fp.lookup(to_lookup) if res is None: return False for r in res: fptime = r["fptime"] lookuptime = r["lookuptime"] fpresult = r["result"] t = r["track"] newpath = r["file"] remove_file(newpath) result = Result(run, t.id, fpresult, int(fptime), int(lookuptime)) db.session.add(result) return True except Exception as e: log.warning("Error performing fingerprint") log.warning(e) raise while True: data, handle = thequeue.get() if data is None: break ack_handles.append(handle) # Find the FpFile that this Testfile points to t = db.session.query(Testfile).filter( Testfile.id == data["testfile_id"]).one() fpfile = t.file metadata = fp.pre_lookup(fpfile.path) newpath = munge_file(fpfile.path, munges) if newpath is None or not os.path.exists(newpath): log.warning("File %s doesn't exist, not fingerprinting it" % newpath) continue to_lookup.append({"track": t, "file": newpath, "data": metadata}) done_fp = False if len(to_lookup) >= num_lookups: done_fp = do_fp(to_lookup) log.debug("lookup result is: %s (bool)" % done_fp) to_lookup = [] count += 1 if (num_lookups > 1 and len(to_lookup) == 0) or (num_lookups <= 1 and count % 10 == 0): log.info("%s more files to evaluate" % thequeue.size()) db.session.commit() if done_fp: for h in ack_handles: thequeue.ack(h) ack_handles = [] done_fp = False if len(to_lookup) > 0: # Last fingerprint done_fp = do_fp(to_lookup) print "last - done_fp is %s" % done_fp else: done_fp = True # Mark the run as done now = datetime.datetime.now().replace(microsecond=0) run.finished = now # Finish any acks that are required if done_fp: for h in ack_handles: thequeue.ack(h) db.session.add(run) db.session.commit()