def add_missing():
    # For this testset, find how short of 10,000 we are

    # Find that many new FpFiles that are not
    # already part of our testset, add to testset (make sure >60sec)
    files = db.session.query(db.FPFile).filter(db.FPFile.negative == False)\
            .outerjoin(evaluation.Testfile).filter(evaluation.Testfile.id==None).all()
    print "need %s candidate pos files. got %s to choose from" % (num_pos, len(files))
    todo = []
    random.shuffle(files)
    while len(todo) < num_pos:
        x = files.pop(0)
        with audioread.audio_open(x.path.encode("utf-8")) as f:
            duration = f.duration
            print duration
        if duration >= 60.0:
            todo.append(x)
    neg = db.session.query(db.FPFile).filter(db.FPFile.negative == True)\
            .outerjoin(evaluation.Testfile).filter(evaluation.Testfile.id==None).all()
    print "need %s candidate neg files. got %s to choose from" % (num_neg, len(neg))
    random.shuffle(neg)
    while len(todo) < num_pos+num_neg:
        x = neg.pop(0)
        with audioread.audio_open(x.path.encode("utf-8")) as f:
            duration = f.duration
        if duration >= 60.0:
            todo.append(x)

    print "adding %s files" % len(todo)
    testset = db.session.query(evaluation.Testset).get(testset_id)
    for fpfile in todo:
        tfile = evaluation.Testfile(testset, fpfile)
        db.session.add(tfile)
    db.session.commit()
Example #2
0
def short():
    """Actually delete bad testfiles (and all their results)"""
    countneg = 0
    countpos = 0
    testset_id = 4

    testfiles = db.session.query(evaluation.Testfile).filter(evaluation.Testfile.testset_id==testset_id)
    print "Number testfiles: %s" % testfiles.count()
    for i, tf in enumerate(testfiles):
        if i % 100 == 0:
            print i
        with audioread.audio_open(tf.file.path.encode("utf-8")) as f:
            duration = f.duration
        if duration < 60.0:
            if tf.file.negative:
                countneg+=1
            else:
                countpos+=1
            print "Removing short duration file: %s (%s)" % (tf.file.path.encode("utf-8"), duration)
            cur = db.session.query(evaluation.Result).filter(evaluation.Result.testfile_id==tf.id)
            print "%d results to remove" % cur.count()
            cur.delete()
            db.session.query(evaluation.Testfile).filter(evaluation.Testfile.id==tf.id).delete()
    db.session.commit()
    testfiles = db.session.query(evaluation.Testfile).filter(evaluation.Testfile.testset_id==testset_id)
    print "New number testfiles: %s" % testfiles.count()
    print "deleted negative: %s" % countneg
    print "deleted positive: %s" % countpos
Example #3
0
 def getExecCommand(self, fromfile, tofile):
     from chromaprint_support import audioread
     try:
         with audioread.audio_open(fromfile) as f:
             if f.samplerate == 44100:
                 return ["sox", "-m", fromfile, self.mixfile, tofile, "trim", "0", "35"]
             else:
                 c1 = ["sox", fromfile, "-t", "sox", "-", "trim", "0", "35", "rate", "44100"]
                 c2 = ["sox", "-m", "-t", "sox", "-", self.mixfile, tofile]
                 return (c1, c2)
     except audioread.DecodeError:
         return ["sox", "-m", fromfile, self.mixfile, tofile, "trim", "0", "35"]
Example #4
0
 def getExecCommand(self, fromfile, tofile):
     from chromaprint_support import audioread
     try:
         with audioread.audio_open(fromfile) as f:
             if f.samplerate == 44100:
                 return [
                     "sox", "-m", fromfile, self.mixfile, tofile, "trim",
                     "0", "35"
                 ]
             else:
                 c1 = [
                     "sox", fromfile, "-t", "sox", "-", "trim", "0", "35",
                     "rate", "44100"
                 ]
                 c2 = ["sox", "-m", "-t", "sox", "-", self.mixfile, tofile]
                 return (c1, c2)
     except audioread.DecodeError:
         return [
             "sox", "-m", fromfile, self.mixfile, tofile, "trim", "0", "35"
         ]
Example #5
0
def rate():
    """ Delete results for files that have a non-44.1k samplerate so we can re-do"""
    testset_id = 4
    c = 0
    testfiles = db.session.query(evaluation.Testfile).filter(evaluation.Testfile.testset_id==testset_id)
    print "Number testfiles: %s" % testfiles.count()
    for i, tf in enumerate(testfiles):
        if i % 100 == 0:
            print i
        with audioread.audio_open(tf.file.path.encode("utf-8")) as f:
            rate = f.samplerate
            if rate != 44100:
                c += 1
                print "Unexpected samplerate: %s (%s)" % (tf.file.path.encode("utf-8"), rate)
                cur = db.session.query(evaluation.Result).filter(evaluation.Result.testfile_id==tf.id)
                print "%d results to remove" % cur.count()
                #cur.delete()
    db.session.commit()
    testfiles = db.session.query(evaluation.Testfile).filter(evaluation.Testfile.testset_id==testset_id)
    print "to change", c
    print "New number testfiles: %s" % testfiles.count()
Example #6
0
 def pre_lookup(self, file):
     with audioread.audio_open(file) as f:
         duration = f.duration
     return {"duration": duration}
Example #7
0
 def pre_lookup(self, file):
     with audioread.audio_open(file) as f:
         duration = f.duration
     return {"duration": duration}