Exemple #1
0
def create_run(testset, fp, mungename):
    """
    Make a run. A run is a testset with a specific fingerprinter.
    Files are put through a list of munges before looked up in
    the fingerprinter
    """
    if fp not in fingerprint.fingerprint_index.keys():
        raise Exception("Unknown fingerprint name %s" % (fp))
    munges = munge.munge_classes.keys()
    for m in mungename.split(","):
        m = m.strip()
        if m not in munges:
            raise Exception("Unknown munge %s" % (m))
    testset = int(testset)
    ts = db.session.query(Testset).get(testset)
    if ts is None:
        raise Exception("Testset %d not in database" % testset)
    run = Run(testset, mungename, fp)
    db.session.add(run)
    db.session.commit()

    # Once the run has been created, make a queue that contains
    # all the testfiles to be evaluated
    thequeue = queue.FpQueue("run_%s" % run.id)
    for tf in ts.testfiles:
        data = {"testfile_id": tf.id}
        thequeue.put(data)
def add_queue(run):
    print "Adding missing files for run %s" % run
    results = db.session.query(evaluation.Result.testfile_id).filter(evaluation.Result.run_id==run).subquery()
    testfiles = db.session.query(evaluation.Testfile).filter(~evaluation.Testfile.id.in_(results))
    thequeue = queue.FpQueue("run_%s" % run)
    for tf in testfiles:
        data = {"testfile_id": tf.id}
        thequeue.put(data)
Exemple #3
0
 def delete_all(self):
     # Erase solr and tokyo tyrant
     echoprint_support.fp.erase_database(True)
     # Erase the local database
     db.session.query(EchoprintModel).delete()
     db.session.commit()
     q = queue.FpQueue("ingest_echoprint")
     q.clear_queue()
Exemple #4
0
def reset_run(run):
    """Reset a run.
    Clear the start/end date and all results, then clear and repopulate
    the queue.
    """
    db.session.query(Result).filter(Result.run_id == run).delete()
    r = db.session.query(Run).get(run)
    r.started = None
    r.finished = None
    db.session.add(r)
    db.session.commit()
    thequeue = queue.FpQueue("run_%s" % run)
    thequeue.clear_queue()
    thequeue = queue.FpQueue("run_%s" % run)
    for tf in r.testset.testfiles:
        data = {"testfile_id": tf.id}
        thequeue.put(data)
Exemple #5
0
def add_queue(engine):
    engine_map = fingerprint.fingerprint_index.get(engine)
    engine_table = engine_map.get("dbmodel") if engine_map else None
    cur = db.session.query(db.FPFile).filter(db.FPFile.negative == False)\
            .outerjoin(engine_table).filter(engine_table.file_id == None)
    log.info("got %d things to add to the %s queue" % (cur.count(), engine))
    thequeue = queue.FpQueue("ingest_%s" % engine)
    thequeue.clear_queue()
    for f in cur:
        d = {"id": f.id}
        thequeue.put(d)
    log.info("...done")
Exemple #6
0
def delete(engine):
    """ Call the delete-specific method for a fingerprint engine to
        remove all traces of it from this eval test, and from
        the engine's specific storage system """
    engine_map = fingerprint.fingerprint_index.get(engine)
    engine_class = engine_map.get("instance") if engine_map else None
    if not engine_class:
        raise Exception("%s is not a valid fingerprint engine" % (engine))
    instance = engine_class()
    instance.delete_all()
    thequeue = queue.FpQueue("ingest_%s" % engine)
    thequeue.clear_queue()
Exemple #7
0
def main(engine):
    thequeue = queue.FpQueue("ingest_%s" % engine)
    engine_map = fingerprint.fingerprint_index.get(engine)
    engine_class = engine_map.get("instance") if engine_map else None
    if not engine_class:
        raise Exception("%s is not a valid fingerprint engine" % (engine))
    engine_table = engine_map.get("dbmodel") if engine_map else None

    instance = engine_class()

    log.info("Importing files for engine %s" % (engine))
    fp_list = []
    ack_handles = []
    count = 0
    print "%s to import" % thequeue.size()
    while True:
        data, handle = thequeue.get()
        if data is None:
            break
        cur = db.session.query(db.FPFile).filter(db.FPFile.id == data["id"])
        f = cur.one()
        (trackid, fpdata) = instance.fingerprint(f.path)
        error = "error" in fpdata and type(fpdata) == type({})
        if not error:
            ack_handles.append(handle)
            e = engine_table(f, trackid)
            db.session.add(e)
            fp_list.append(fpdata)
            count += 1
        else:
            log.debug("Error parsing file %s. Error was: %s" %
                      (f, fpdata["error"]))

        # Ingest every 100 songs
        if len(fp_list) > 99:
            log.info("Ingesting 100 files at once")
            queuesize = thequeue.size()
            log.info("%d done, %d remaining" % (count, queuesize))
            db.session.commit()
            instance.ingest_many(fp_list)
            fp_list = []
            for h in ack_handles:
                thequeue.ack(h)
            ack_handles = []

    # After there's no more data, import the remaining files
    log.info("Ingesting remaining %d files" % len(fp_list))
    instance.ingest_many(fp_list)
    db.session.commit()
    for h in ack_handles:
        thequeue.ack(h)
Exemple #8
0
    def delete_all(self):
        """ Delete all entries from the local database table
            and also any external stores
        """
        # Delete from the local database
        db.session.query(LandmarkModel).delete()
        db.session.commit()
        # Delete hash file
        try:
            os.unlink("landmarkdb.mat")
        except OSError:
            pass

        q = queue.FpQueue("ingest_landmark")
        q.clear_queue()
Exemple #9
0
def stats():
    cur = db.session.query(EchoprintModel)
    print "Number of records: %d" % cur.count()
    numtyrant = len(echoprint_support.fp.get_tyrant())
    print "Number of TT records: %d" % numtyrant
    uniqsolr = set()
    with echoprint_support.solr.pooled_connection(
            echoprint_support.fp._fp_solr) as host:
        cur = host.query("*:*", fields="track_id", rows=10000)
        numsolr = cur.results.numFound
        #while cur.results is not None:
        #    for r in cur.results:
        #        uniqsolr.add(r["track_id"][:-1])
        #    cur = cur.next_batch()
    print "Number of Solr records: %s" % numsolr
    alltyrant = echoprint_support.fp.get_tyrant().iterkeys()
    uniqtt = set()
    for x in alltyrant:
        uniqtt.add(x.split("-")[0])
    print "Number of unique TT records: %s " % len(uniqtt)
    q = queue.FpQueue("ingest_echoprint")
    print "Ingest queue size: %s" % q.size()
Exemple #10
0
def stats():
    q = queue.FpQueue("ingest_landmark")
    print "Ingest queue size: %s" % q.size()
Exemple #11
0
def execute_run(run_id):
    """
    Execute a run.

    This does most of the magic. 
    """
    run = db.session.query(Run).filter(Run.id == run_id)
    if run.count() == 0:
        raise Exception("No run with this id")
    else:
        run = run.one()

    if run.started is None:
        now = datetime.datetime.now()
        now = now.replace(microsecond=0)
        run.started = now
    db.session.add(run)
    db.session.commit()

    engine = run.engine
    munges = run.munge
    fpclass = fingerprint.fingerprint_index[engine]
    fp = fpclass["instance"]()
    thequeue = queue.FpQueue("run_%s" % run.id)
    ack_handles = []
    log.info("Reading queue for run %s. Got %s files" %
             (run.id, thequeue.size()))
    num_lookups = fp.num_lookups()
    to_lookup = []
    count = 0

    def do_fp(to_lookup):
        try:
            res = fp.lookup(to_lookup)
            if res is None:
                return False
            for r in res:
                fptime = r["fptime"]
                lookuptime = r["lookuptime"]
                fpresult = r["result"]
                t = r["track"]
                newpath = r["file"]

                remove_file(newpath)
                result = Result(run, t.id, fpresult, int(fptime),
                                int(lookuptime))
                db.session.add(result)
            return True
        except Exception as e:
            log.warning("Error performing fingerprint")
            log.warning(e)
            raise

    while True:
        data, handle = thequeue.get()
        if data is None:
            break
        ack_handles.append(handle)
        # Find the FpFile that this Testfile points to
        t = db.session.query(Testfile).filter(
            Testfile.id == data["testfile_id"]).one()
        fpfile = t.file
        metadata = fp.pre_lookup(fpfile.path)

        newpath = munge_file(fpfile.path, munges)

        if newpath is None or not os.path.exists(newpath):
            log.warning("File %s doesn't exist, not fingerprinting it" %
                        newpath)
            continue

        to_lookup.append({"track": t, "file": newpath, "data": metadata})

        done_fp = False
        if len(to_lookup) >= num_lookups:
            done_fp = do_fp(to_lookup)
            log.debug("lookup result is: %s (bool)" % done_fp)
            to_lookup = []

        count += 1
        if (num_lookups > 1 and len(to_lookup) == 0) or (num_lookups <= 1
                                                         and count % 10 == 0):
            log.info("%s more files to evaluate" % thequeue.size())
            db.session.commit()
            if done_fp:
                for h in ack_handles:
                    thequeue.ack(h)
            ack_handles = []

    done_fp = False
    if len(to_lookup) > 0:
        # Last fingerprint
        done_fp = do_fp(to_lookup)
        print "last - done_fp is %s" % done_fp
    else:
        done_fp = True

    # Mark the run as done
    now = datetime.datetime.now().replace(microsecond=0)
    run.finished = now
    # Finish any acks that are required
    if done_fp:
        for h in ack_handles:
            thequeue.ack(h)
    db.session.add(run)
    db.session.commit()