def test_saved_model_folder(dirname,feats,output,filt=False):
    """
    Test a saved model by loading it and applying to features.
    Output is the output file, used with print_write()
    RETURN
      avgerage dist
      nPatterns
      nIters
      totaltime
    """
    print_write('*** MODEL SAVED IN: '+dirname+' ***',output)
    if filt:
        print_write('model uses FILTERING',output)
    # load model
    model = ANALYZE.unpickle(os.path.join(dirname,'model.p'))
    print_write('model loaded',output)
    # find nIters (#tracks), nPatterns, totaltime
    nIters, nPatterns, totalTime = ANALYZE.traceback_stats(dirname)
    if filt:
        nPatterns = model._nPatternUsed
    print_write('nIters (=nTracks): '+str(nIters),output)
    print_write('nPatterns: '+str(nPatterns),output)
    print_write('total time ran: '+str(totalTime),output)
    # predict
    best_code_per_p, dists = model.predicts(feats)
    print_write('prediction done, avg. dist: '+str(np.average(dists)),output)
    # return
    return np.average(dists),nPatterns,nIters,totalTime
def test_saved_model_folder(dirname, feats, output):
    """
    Test a saved model by loading it and applying to features.
    Output is the output file, used with print_write()
    RETURN
      avgerage dist
      nPatterns
      nIters
      totaltime
    """
    print_write('*** MODEL SAVED IN: ' + dirname + ' ***', output)
    # load model
    model = ANALYZE.unpickle(os.path.join(dirname, 'model.p'))
    print_write('model loaded', output)
    # find nIters (#tracks), nPatterns, totaltime
    nIters, nPatterns, totalTime = ANALYZE.traceback_stats(dirname)
    print_write('nIters (=nTracks): ' + str(nIters), output)
    print_write('nPatterns: ' + str(nPatterns), output)
    print_write('total time ran: ' + str(totalTime), output)
    # predict
    best_code_per_p, dists = model.predicts(feats)
    print_write('prediction done, avg. dist: ' + str(np.average(dists)),
                output)
    # return
    return np.average(dists), nPatterns, nIters, totalTime
def analyze_one_batch_of_models(savedmodel, matfilesdir, output, filt=False):
    """
    Main job, from a saved model, find other saved model in same dir,
    test them all, return plotting info (nSamples and dist)
    """
    # hack
    if savedmodel[-1] == os.path.sep:
        savedmodel = savedmodel[:-1]
    # GET SAVED MODELS
    parentdir,tmp = os.path.split(savedmodel)
    # traceback
    tb = ANALYZE.traceback(savedmodel)
    # find everything in parent folder, then just folders
    all_in_folder = glob.glob(os.path.join(parentdir,'*'))
    all_in_folder = filter(lambda x: os.path.isdir(x), all_in_folder)
    # keep those that have same origin
    leaves = filter(lambda x: ANALYZE.traceback(x)[0]==tb[0],all_in_folder)
    # everything to test, matfile at the end
    all_to_test = set()
    for f in tb:
        if os.path.isdir(f):
            all_to_test.add(f)
    for f in leaves:
        all_to_test.add(f)
    all_to_test = list(all_to_test)
    # GET PARAMS / LOAD DATA
    params = ANALYZE.unpickle(os.path.join(savedmodel,'params.p'))
    oracle = ORACLE.OracleMatfiles(params,matfilesdir,oneFullIter=True)
    data = [x for x in oracle]
    data = filter(lambda x: x != None, data)
    data = np.concatenate(data)
    data = data[np.where(np.sum(data,axis=1)>0)]
    if data.shape[0] == 0:
        print_write('No patterns loaded, quit.',output)
        sys.exit(0)
    # PREDICT ON EVERY MODEL
    dists = []
    patterns = []
    for f in all_to_test:
        a,b,c,d = test_saved_model_folder(f,data,output,filt=filt)
        dist,nPatterns,nIters,totalTime = a,b,c,d
        dists.append(dist)
        patterns.append(nPatterns)
    # delete data before plotting
    del data
    # plot data
    dists = np.array(dists)
    patterns = np.array(patterns)
    order = np.argsort(patterns)
    # return 2 lists: nPatterns,dists and filt   ordered by increasing patterns
    return patterns[order],dists[order]
Ejemplo n.º 4
0
def analyze_one_batch_of_models(savedmodel, matfilesdir, output, filt=False):
    """
    Main job, from a saved model, find other saved model in same dir,
    test them all, return plotting info (nSamples and dist)
    """
    # hack
    if savedmodel[-1] == os.path.sep:
        savedmodel = savedmodel[:-1]
    # GET SAVED MODELS
    parentdir, tmp = os.path.split(savedmodel)
    # traceback
    tb = ANALYZE.traceback(savedmodel)
    # find everything in parent folder, then just folders
    all_in_folder = glob.glob(os.path.join(parentdir, '*'))
    all_in_folder = filter(lambda x: os.path.isdir(x), all_in_folder)
    # keep those that have same origin
    leaves = filter(lambda x: ANALYZE.traceback(x)[0] == tb[0], all_in_folder)
    # everything to test, matfile at the end
    all_to_test = set()
    for f in tb:
        if os.path.isdir(f):
            all_to_test.add(f)
    for f in leaves:
        all_to_test.add(f)
    all_to_test = list(all_to_test)
    # GET PARAMS / LOAD DATA
    params = ANALYZE.unpickle(os.path.join(savedmodel, 'params.p'))
    oracle = ORACLE.OracleMatfiles(params, matfilesdir, oneFullIter=True)
    data = [x for x in oracle]
    data = filter(lambda x: x != None, data)
    data = np.concatenate(data)
    data = data[np.where(np.sum(data, axis=1) > 0)]
    if data.shape[0] == 0:
        print_write('No patterns loaded, quit.', output)
        sys.exit(0)
    # PREDICT ON EVERY MODEL
    dists = []
    patterns = []
    for f in all_to_test:
        a, b, c, d = test_saved_model_folder(f, data, output, filt=filt)
        dist, nPatterns, nIters, totalTime = a, b, c, d
        dists.append(dist)
        patterns.append(nPatterns)
    # delete data before plotting
    del data
    # plot data
    dists = np.array(dists)
    patterns = np.array(patterns)
    order = np.argsort(patterns)
    # return 2 lists: nPatterns,dists and filt   ordered by increasing patterns
    return patterns[order], dists[order]
    all_in_folder = glob.glob(os.path.join(parentdir,'*'))
    all_in_folder = filter(lambda x: os.path.isdir(x), all_in_folder)
    # keep those that have same origin
    leaves = filter(lambda x: ANALYZE.traceback(x)[0]==tb[0],all_in_folder)
    # everything to test, matfile at the end
    all_to_testNONFILT = set()
    for f in tb:
        if os.path.isdir(f):
            all_to_testNONFILT.add(f)
    for f in leaves:
        all_to_testNONFILT.add(f)
    all_to_testNONFILT = list(all_to_testNONFILT)

    
    # GET PARAMS / LOAD DATA
    params = ANALYZE.unpickle(os.path.join(savedmodelFILT,'params.p'))
    oracle = ORACLE.OracleMatfiles(params,matfilesdir,oneFullIter=True)
    data = [x for x in oracle]
    data = filter(lambda x: x != None, data)
    data = np.concatenate(data)
    data = data[np.where(np.sum(data,axis=1)>0)]
    if data.shape[0] == 0:
        print_write('No patterns loaded, quit.',output)
        sys.exit(0)

    # PREDICT ON EVERY MODEL FILT
    distsFILT = []
    patternsFILT = []
    for f in all_to_testFILT:
        a,b,c,d = test_saved_model_folder(f,data,output,filt=True)
        dist,nPatterns,nIters,totalTime = a,b,c,d
def analyze_one_exp_dir(expdir, validdir, testdir, autobar=False, most_recent=False):
    """
    Analyze one experiment dir.
    This directory contains many subdirectory, for all the saved models.
    Check every saved model with validdata (numpy array), and test the
    best one on the test data.
    Returns numbers: patternsize, codebook size, distortion error, best saved model
    """
    # get all subdirs
    alldirs = glob.glob(os.path.join(expdir, "*"))
    if len(alldirs) > 0:
        alldirs = filter(lambda x: os.path.isdir(x), alldirs)
        alldirs = filter(lambda x: os.path.split(x)[-1][:4] == "exp_", alldirs)
        # trim badly saved models
        alldirs = filter(lambda x: check_saved_model_full(x, False), alldirs)
    if len(alldirs) == 0:
        print "no saved model found in:", expdir
        return None, None, None, None

    # get params
    savedmodel = np.sort(alldirs)[-1]
    params = ANALYZE.unpickle(os.path.join(savedmodel, "params.p"))

    # if test only one model, the most recent
    if most_recent:
        alldirs = [savedmodel]

    # load valid data
    if not autobar:
        oracle = ORACLE.OracleMatfiles(params, validdir, oneFullIter=True)
        # if autobar:
        #    oracle.use_autobar_in_iterator(savedmodel)
        validdata = [x for x in oracle]
        validdata = filter(lambda x: x != None, validdata)
        validdata = np.concatenate(validdata)
        assert validdata.shape[1] > 0, "no valid data??"

    # load test data
    if not autobar:
        if validdir != testdir:
            oracle = ORACLE.OracleMatfiles(params, testdir, oneFullIter=True)
            testdata = [x for x in oracle]
            testdata = filter(lambda x: x != None, testdata)
            testdata = np.concatenate(testdata)
            assert testdata.shape[1] > 0, "no valid data??"
        else:
            testdata = validdata

    # test all subdirs with valid data, keep the best
    best_model = ""
    best_dist = np.inf
    for sm in alldirs:
        model = ANALYZE.unpickle(os.path.join(sm, "model.p"))
        # IF AUTOBAR LOAD DATA NOW
        oracle = ORACLE.OracleMatfiles(params, validdir, oneFullIter=True)
        oracle.use_autobar_in_iterator(model)
        validdata = [x for x in oracle]
        validdata = filter(lambda x: x != None, validdata)
        validdata = np.concatenate(validdata)
        assert validdata.shape[1] > 0, "no valid data??"
        ####
        codewords, dists = model.predicts(validdata)
        avg_dist = np.average(dists)
        if avg_dist < best_dist:
            best_model = sm
            best_dist = avg_dist
    assert best_model != "", "no data found???"

    if testdir == validdir:
        # we're done
        # load model, verbose
        model = ANALYZE.unpickle(os.path.join(best_model, "model.p"))
        print "best model:", best_model, " ( dist =", best_dist, ")"
        # return patternsize, codebook size, distortion errror, best saved model
        return validdata.shape[1] / 12, model._codebook.shape[0], best_dist, best_model

    # test with test data
    model = ANALYZE.unpickle(os.path.join(best_model, "model.p"))
    # IF AUTOBAR LOAD DATA NOW
    oracle = ORACLE.OracleMatfiles(params, testdir, oneFullIter=True)
    oracle.use_autobar_in_iterator(model)
    testdata = [x for x in oracle]
    testdata = filter(lambda x: x != None, testdata)
    testdata = np.concatenate(testdata)
    assert testdata.shape[1] > 0, "no valid data??"
    ####
    codewords, dists = model.predicts(testdata)
    avg_dist = np.average(dists)
    print "best model:", best_model, " ( dist =", avg_dist, ")"

    # return patternsize, codebook size, distortion errror, best saved model
    return testdata.shape[1] / 12, model._codebook.shape[0], avg_dist, best_model
def analyze_one_exp_dir(expdir,
                        validdir,
                        testdir,
                        autobar=False,
                        most_recent=False):
    """
    Analyze one experiment dir.
    This directory contains many subdirectory, for all the saved models.
    Check every saved model with validdata (numpy array), and test the
    best one on the test data.
    Returns numbers: patternsize, codebook size, distortion error, best saved model
    """
    # get all subdirs
    alldirs = glob.glob(os.path.join(expdir, '*'))
    if len(alldirs) > 0:
        alldirs = filter(lambda x: os.path.isdir(x), alldirs)
        alldirs = filter(lambda x: os.path.split(x)[-1][:4] == 'exp_', alldirs)
        # trim badly saved models
        alldirs = filter(lambda x: check_saved_model_full(x, False), alldirs)
    if len(alldirs) == 0:
        print 'no saved model found in:', expdir
        return None, None, None, None

    # get params
    savedmodel = np.sort(alldirs)[-1]
    params = ANALYZE.unpickle(os.path.join(savedmodel, 'params.p'))

    # if test only one model, the most recent
    if most_recent:
        alldirs = [savedmodel]

    # load valid data
    if not autobar:
        oracle = ORACLE.OracleMatfiles(params, validdir, oneFullIter=True)
        #if autobar:
        #    oracle.use_autobar_in_iterator(savedmodel)
        validdata = [x for x in oracle]
        validdata = filter(lambda x: x != None, validdata)
        validdata = np.concatenate(validdata)
        assert validdata.shape[1] > 0, 'no valid data??'

    # load test data
    if not autobar:
        if validdir != testdir:
            oracle = ORACLE.OracleMatfiles(params, testdir, oneFullIter=True)
            testdata = [x for x in oracle]
            testdata = filter(lambda x: x != None, testdata)
            testdata = np.concatenate(testdata)
            assert testdata.shape[1] > 0, 'no valid data??'
        else:
            testdata = validdata

    # test all subdirs with valid data, keep the best
    best_model = ''
    best_dist = np.inf
    for sm in alldirs:
        model = ANALYZE.unpickle(os.path.join(sm, 'model.p'))
        # IF AUTOBAR LOAD DATA NOW
        oracle = ORACLE.OracleMatfiles(params, validdir, oneFullIter=True)
        oracle.use_autobar_in_iterator(model)
        validdata = [x for x in oracle]
        validdata = filter(lambda x: x != None, validdata)
        validdata = np.concatenate(validdata)
        assert validdata.shape[1] > 0, 'no valid data??'
        ####
        codewords, dists = model.predicts(validdata)
        avg_dist = np.average(dists)
        if avg_dist < best_dist:
            best_model = sm
            best_dist = avg_dist
    assert best_model != '', 'no data found???'

    if testdir == validdir:
        # we're done
        # load model, verbose
        model = ANALYZE.unpickle(os.path.join(best_model, 'model.p'))
        print 'best model:', best_model, ' ( dist =', best_dist, ')'
        # return patternsize, codebook size, distortion errror, best saved model
        return validdata.shape[1] / 12, model._codebook.shape[
            0], best_dist, best_model

    # test with test data
    model = ANALYZE.unpickle(os.path.join(best_model, 'model.p'))
    # IF AUTOBAR LOAD DATA NOW
    oracle = ORACLE.OracleMatfiles(params, testdir, oneFullIter=True)
    oracle.use_autobar_in_iterator(model)
    testdata = [x for x in oracle]
    testdata = filter(lambda x: x != None, testdata)
    testdata = np.concatenate(testdata)
    assert testdata.shape[1] > 0, 'no valid data??'
    ####
    codewords, dists = model.predicts(testdata)
    avg_dist = np.average(dists)
    print 'best model:', best_model, ' ( dist =', avg_dist, ')'

    # return patternsize, codebook size, distortion errror, best saved model
    return testdata.shape[1] / 12, model._codebook.shape[
        0], avg_dist, best_model
        # everything to test, matfile at the end
        all_to_test = set()
        for f in tb:
            if os.path.isdir(f):
                all_to_test.add(f)
        for f in leaves:
            all_to_test.add(f)
        all_to_test = list(all_to_test)
    print_write('all models to try:', output)
    for f in all_to_test:
        print_write(str(f), output)
    print_write('number of models to test: ' + str(len(all_to_test)), output)

    #******************************************************************
    # get params
    params = ANALYZE.unpickle(os.path.join(savedmodel, 'params.p'))
    print_write('PARAMS:', output)
    for k in params.keys():
        print_write(str(k) + ' : ' + str(params[k]), output)
    # load data into memory
    oracle = ORACLE.OracleMatfiles(params, matfilesdir, oneFullIter=True)
    # get all features
    data = [x for x in oracle]
    print_write('retrieved ' + str(len(data)) + ' tracks.', output)
    # get none none features
    data = filter(lambda x: x != None, data)
    print_write(str(len(data)) + ' tracks not None remaining.', output)
    # transform into numpy array
    data = np.concatenate(data)
    print_write(str(data.shape[0]) + ' patterns loaded.', output)
    # remove empty patterns