Beispiel #1
0
def script_sda_detector(resolution):

    import pymatlab
    session = pymatlab.session_factory()

    nruns = 20
    partiDetMethod = 'log_detector'

    for nrun in range(18, nruns + 1):

        basefilename = '../final_results/baseline_resized/{0:05d}/models/res_baseline_resized_{0:05d}_111111/{1:05d}_{2:03d}_'.format(
            resolution, nrun, resolution)

        trainfilename = basefilename + 'train_ids.pkl.gz'
        valfilename = basefilename + 'val_ids.pkl.gz'
        trainfinalfilename = basefilename + 'trainfinal_ids.pkl.gz'
        valfinalfilename = basefilename + 'valfinal_ids.pkl.gz'
        testfilename = basefilename + 'test_ids.pkl.gz'

        # [0,max_ids] -> [1,max_ids+1]
        train_ids = load_savedgzdata(trainfilename) + 1
        val_ids = load_savedgzdata(valfilename) + 1
        trainfinal_ids = load_savedgzdata(trainfinalfilename) + 1
        valfinal_ids = load_savedgzdata(valfinalfilename) + 1
        test_ids = load_savedgzdata(testfilename) + 1

        print >> sys.stderr, train_ids
        print >> sys.stderr, val_ids
        print >> sys.stderr, trainfinal_ids
        print >> sys.stderr, valfinal_ids
        print >> sys.stderr, test_ids

        session.putvalue('partiDetMethod', partiDetMethod)
        session.putvalue('resolution', str(resolution) + '_' + str(nrun))
        session.putvalue('train_ids', train_ids)
        session.putvalue('val_ids', val_ids)
        session.putvalue('trainfinal_ids', trainfinal_ids)
        session.putvalue('valfinal_ids', valfinal_ids)
        session.putvalue('test_ids', test_ids)

        mscript = """
        data = struct();
        data.partiDetMethod = partiDetMethod;
        data.resolution     = resolution;
        data.train_ids      = train_ids;
        data.val_ids        = val_ids;
        data.trainfinal_ids = trainfinal_ids;
        data.valfinal_ids   = valfinal_ids;
        data.test_ids       = test_ids;
        res = script_sda_detector( data )
        """

        session.putvalue('MSCRIPT', mscript)
        session.run('eval(MSCRIPT)')
        res = session.getvalue('res')
        print res
Beispiel #2
0
def script_sda_detector(resolution):

    import pymatlab
    session = pymatlab.session_factory()
    
    nruns      = 20
    partiDetMethod = 'log_detector'
 
    for nrun in range(18,nruns+1):
    
        basefilename = '../final_results/baseline_resized/{0:05d}/models/res_baseline_resized_{0:05d}_111111/{1:05d}_{2:03d}_'.format(resolution,nrun,resolution)

        trainfilename      = basefilename + 'train_ids.pkl.gz'
        valfilename        = basefilename + 'val_ids.pkl.gz'
        trainfinalfilename = basefilename + 'trainfinal_ids.pkl.gz'
        valfinalfilename   = basefilename + 'valfinal_ids.pkl.gz'
        testfilename       = basefilename + 'test_ids.pkl.gz'

        # [0,max_ids] -> [1,max_ids+1]
        train_ids      = load_savedgzdata(trainfilename)+1
        val_ids        = load_savedgzdata(valfilename)+1
        trainfinal_ids = load_savedgzdata(trainfinalfilename)+1
        valfinal_ids   = load_savedgzdata(valfinalfilename)+1
        test_ids       = load_savedgzdata(testfilename)+1

        print >> sys.stderr, train_ids
        print >> sys.stderr, val_ids
        print >> sys.stderr, trainfinal_ids
        print >> sys.stderr, valfinal_ids
        print >> sys.stderr, test_ids

        session.putvalue('partiDetMethod',partiDetMethod)
        session.putvalue('resolution',str(resolution) + '_' + str(nrun))
        session.putvalue('train_ids',train_ids)
        session.putvalue('val_ids',val_ids)
        session.putvalue('trainfinal_ids',trainfinal_ids)
        session.putvalue('valfinal_ids',valfinal_ids)
        session.putvalue('test_ids',test_ids)
        
        mscript = """
        data = struct();
        data.partiDetMethod = partiDetMethod;
        data.resolution     = resolution;
        data.train_ids      = train_ids;
        data.val_ids        = val_ids;
        data.trainfinal_ids = trainfinal_ids;
        data.valfinal_ids   = valfinal_ids;
        data.test_ids       = test_ids;
        res = script_sda_detector( data )
        """
        
        session.putvalue('MSCRIPT', mscript)
        session.run('eval(MSCRIPT)')
        res = session.getvalue('res')
        print res
Beispiel #3
0
#!/usr/bin/python

import os, string, sys

lib_path = os.path.abspath('./TL/')
sys.path.append(lib_path)

from data_handling import load_savedgzdata

basepath_15000 = sys.argv[1]
resolution_15000 = '15000'
basepath_50000 = sys.argv[2]

for nrun in range(1, 21):
    pathids0 = '{0:s}/{1:05d}_{2:05d}_test_ids.pkl.gz'.format(
        basepath_15000, nrun, string.atoi(resolution_15000))
    pathids1 = '{0:s}/{1:05d}_{2:05d}_test_ids.pkl.gz'.format(
        basepath_50000, nrun, string.atoi(resolution_15000))

    print >> sys.stderr, 'Loading ' + pathids0 + '...'
    ids0 = load_savedgzdata(pathids0)
    print >> sys.stderr, 'Loading ' + pathids1 + '...'
    ids1 = load_savedgzdata(pathids1)
    print >> sys.stderr, ids0
    print >> sys.stderr, ids1

    #raw_input()
Beispiel #4
0
def load_data(datasetpath, options):
    ''' Loads the dataset

    :type dataset: string
    :param dataset: the path to the dataset 
    '''

    # if options['oneclass']:
    #     nclasses = 1
    # else:
    #     nclasses = 2

    nclasses = 2
    
    #############
    # LOAD DATA #
    #############

    if options['database'] == 'mnist':
        train_set, valid_set, test_set = load_savedgzdata('mnist.pkl.gz')

        train_set_x = numpy.array(train_set[0])
        train_set_y = numpy.array(train_set[1])
        valid_set_x = numpy.array(valid_set[0])
        valid_set_y = numpy.array(valid_set[1])
        test_set_x  = numpy.array(test_set[0])
        test_set_y  = numpy.array(test_set[1])

        (nelem_train,ndim) = train_set_x.shape
        (nelem_valid,ndim) = valid_set_x.shape
        (nelem_test,ndim)  = test_set_x.shape
        
        dataset  = numpy.zeros((options['patchsize']*options['patchsize']+2,nelem_train+nelem_valid+nelem_test))
        ids      = 0
        imgwidth = numpy.sqrt(ndim)
        #print >> sys.stderr, "train....", ids
        (dataset, ids) = loadConvertMNIST(options['patchsize'], ids, dataset, imgwidth, train_set_x, train_set_y)
        #print >> sys.stderr, "val....", ids
        (dataset, ids) = loadConvertMNIST(options['patchsize'], ids, dataset, imgwidth, valid_set_x, valid_set_y)
        #print >> sys.stderr, "test....", ids
        (dataset, ids) = loadConvertMNIST(options['patchsize'], ids, dataset, imgwidth, test_set_x, test_set_y )
        
        nclasses = len(list(set(train_set_y)))
        return (dataset, options['patchsize']*options['patchsize'], nclasses)

    elif options['database'] == 'shapes':
        dataset = load_savedgzdata('shapes.pkl.gz')

        return(dataset,20*20,4)
        
    # --------------------------------------------------------------
    # data_dir, data_file = os.path.split(dataset)
    if not 'all' in options['database']:
        onlyfiles = [ f for f in listdir(datasetpath) \
                      if ( isfile(join(datasetpath,f)) and splitext(f)[1] == '.mat' and \
                           options['database'] in f and options['resolution'] in f ) ]
    else:
        onlyfiles = [ f for f in listdir(datasetpath) if ( isfile(join(datasetpath,f)) and splitext(f)[1] == '.mat' ) ]

    # onlyfiles = [ f for f in listdir(datasetpath) if ( isfile(join(datasetpath,f)) and splitext(f)[1] == '.mat' ) ]
    onlyfiles.sort()

    first = True
    ids   = 0;
    for file in onlyfiles:
        print >> sys.stderr, ( "---> " + datasetpath + file )
        f = h5py.File(datasetpath + file,'r')
        
        # print f.items();
        mpatches = f.get('mpatches')
        # print mpatches.items();
        back = numpy.array( mpatches.get('negative') )
        nano = numpy.array( mpatches.get('positive') )
        if options['replicate']:
            nano = numpy.c_[ back, back ]
        
        # print >> sys.stderr, back.shape
        # print >> sys.stderr, nano.shape
        
        (back_ndim,back_npoints) = back.shape
        (nano_ndim,nano_npoints) = nano.shape
        back[0,:] = back[0,:] + ids
        nano[0,:] = nano[0,:] + ids

        ids = max(back[0,:])
        # raw_input('> press any key <')
        
        back = numpy.r_[ back, numpy.ones((1,back_npoints)) ]
        nano = numpy.r_[ nano, -1 * numpy.ones((1,nano_npoints)) ]

        if first:
            dataset = numpy.c_[ back, nano]
            first   = False
        else:
            dataset = numpy.c_[ dataset, back, nano]
        
        # print type( dataset )
        # raw_input('....')

    # dataset was constructed according to the following structure
    # [[ .... ids ....],
    #  [ .... data ...],
    #  [ ..... cls ...]]

    datasetfilename = 'nanoparticles.npz'
    #save_gzdata(datasetfilename,dataset)
    numpy.savez_compressed(datasetfilename,dataset)

    kakak
    return (dataset, nano_ndim-1, nclasses)
Beispiel #5
0
def TL(source,
       target=None,
       path='../gen_patches/dataset_noisy/',
       retrain=False,
       retrain_ft_layers=[1, 1, 1, 1, 1, 1],
       outputfolder='backup',
       outputfolderres='backup_res',
       batchsize=1000,
       sourcemodelspath='./'):
    """
    TO DO: FINISH DOCUMENTATION
    """

    options = {
        'sourcemodelspath': sourcemodelspath,
        'outputfolder': outputfolder,
        'outputfolderres': outputfolderres,
        'verbose': 0,
        'viewdata': False,
        'trainsize': 0.6,
        'patchsize': 20,
        'measure': 'acc',
        'weight': 200,
        'datanormalize': True,
        # ---------- one-class learning
        'replicate': False,
        'oneclass': False,
        # ---------- source problem params
        'database_source': 'db2',
        'resolution_source': source,
        'nclasses_source': 2,  # TODO: do this automatically
        # ---------- target problem params
        'database_target': 'db2',
        'resolution_target': target,
        # ---------- TL hyperparams
        'retrain': retrain,
        'retrain_ft_layers': retrain_ft_layers,
        # ---------- hyperparams
        'nruns': 20,
        'folds': 3,
        'hlayers': [len(retrain_ft_layers) / 2],  # X hidden + 1 log layer
        'nneurons': [1000],  # range(500, 1001, 250),
        'pretraining_epochs': [1000],  # [200]
        'training_epochs': [3000],  # [100]
        'pretrain_lr': [0.01, 0.001],  #[ 0.01, 0.001],
        'finetune_lr': [0.1, 0.01],  #[ 0.1, 0.01],
        'threshold': [0.8],  #[ 0.5 , 0.6, 0.8], #numpy.arange(.5,1.01,.1),
        'batchsize':
        [batchsize],  #[100] or [1000] depending on the size of the dataset. 
        # ---------- end of hyperparams
        'corruptlevels': [0.1],  #numpy.arange(0.1, 0.4, 0.1)
    }

    print >> sys.stderr, (options), "\n"

    # -------------------------------------------------------------------------------
    datasetpath = path
    # print argv
    # print datasetpath
    # print retrain_ft_layers
    # alaallala

    # -------------------------------------------------------------------------------
    # load dataset
    if options['retrain'] == 1:
        options['database'] = options['database_target']
        options['resolution'] = options['resolution_target']
    else:
        options['database'] = options['database_source']
        options['resolution'] = options['resolution_source']

    (dataset, ndim, nclasses) = load_data(datasetpath, options)
    options['ndim'] = ndim
    options['nclasses'] = nclasses

    # --------------------------------------------------------------------------------------------
    for nrun in range(1, options['nruns'] + 1):
        print >> sys.stderr, ("### {0:03d} of {1:03d}".format(
            nrun, options['nruns']))
        options['numpy_rng'] = numpy.random.RandomState(nrun)
        options['theano_rng'] = RandomStreams(seed=nrun)

        # --------------
        # generate folds
        folds = gen_folds(dataset, options, nrun)
        # continue

        if options['retrain'] == 1:
            filename = "{0:s}/{1:05d}_{2:03d}_model.pkl.gz".format(
                options['sourcemodelspath'], nrun,
                string.atoi(options['resolution_source']))
            print >> sys.stderr, ":: Loading model {0:s}...\n".format(filename)
            sda_reuse_model = load_savedgzdata(filename)

            #print sda_reuse_model.logLayer.W.get_value()
            #print sda_reuse_model.logLayer.W.get_value()
            #kkk

        else:
            sda_reuse_model = None

        # ----------------------------------------------------------------------------
        results = do_experiment(folds, options, nrun, sda_reuse_model)
        # ----------------------------------------------------------------------------

        # --------------------------------------------------
        filename = '{0:s}/res_{1:05d}_{2:03d}.pkl.gz'.format(
            options['outputfolderres'], nrun,
            string.atoi(options['resolution']))
        save_results(filename, results)
Beispiel #6
0
def main(resolution, method, pathRes):
    # load results from LoG

    imgpathsae = '../../imgs_nanoparticles/{0:03d}/db2/resultado_sae/'.format(
        string.atoi(resolution))

    if method == 'baseline':
        basepath = './{0:s}/{1:05d}/models/res_baseline_resized_{1:05d}_111111/'.format(
            pathRes, string.atoi(resolution))
    elif method == 'tl':
        basepath = './{0:s}/{1:05d}/models/res_tl_resized_50000_{1:05d}_111111/'.format(
            pathRes, string.atoi(resolution))

    # annotations
    annbasepath = '../../imgs_nanoparticles/{0:03d}/db2/annotation/user/'.format(
        string.atoi(resolution))
    annfiles = [
        f for f in os.listdir(annbasepath) if re.match(r'[\w\W]*csv', f)
    ]
    annfiles = sorted(annfiles)

    # imgs base paths
    imgsbasepath = '../../imgs_nanoparticles/{0:03d}/db2/'.format(
        string.atoi(resolution))
    imgspath = os.listdir(imgsbasepath)
    imgspath = sorted(imgspath)

    # ------------------------------------------------------------------------------------------------
    # TEST DATA

    PrecisionAll = []
    RecallAll = []

    PrecisionLoGAll = []
    RecallLoGAll = []

    nDetectionsAll = []

    for nrun in range(1, 21):  #
        print >> sys.stderr, "\n**************************\n"
        print >> sys.stderr, "NRUN {0:05d}/20 ".format(nrun)

        filename = '{0:s}/{1:05d}_{2:03d}_model.pkl.gz'.format(
            basepath, nrun, string.atoi(resolution))
        print >> sys.stderr, "Loading " + filename
        model = load_savedgzdata(filename)

        # get ids
        pathids = '{0:s}/{1:05d}_{2:05d}_test_ids.pkl.gz'.format(
            basepath, nrun, string.atoi(resolution))
        print >> sys.stderr, 'Loading ' + pathids + '...'
        ids = load_savedgzdata(pathids)
        print >> sys.stderr, ids

        reg = 'detectedNanoParticlesDetectionResult_log_detector_test_{0:03d}_'.format(
            nrun)
        files = [f for f in os.listdir(imgpathsae) if re.match(reg, f)]
        # order data
        files = sorted(files)

        nfiles = len(files)

        (Precision, Recall, PrecisionLoG, RecallLoG,
         nDetections) = getPrecisionRecall(nfiles,
                                           files,
                                           ids,
                                           imgpathsae,
                                           imgsbasepath,
                                           imgspath,
                                           annbasepath,
                                           annfiles,
                                           model, (0, 0, nrun, 0),
                                           printImg=True)

        print >> sys.stderr, "Precision LoG: {0:05f} | Recall LoG: {1:05f}".format(
            PrecisionLoG, RecallLoG)
        print >> sys.stderr, "Precision SdA: {0:05f} | Recall SdA: {1:05f}".format(
            Precision, Recall)
        # kaka

        PrecisionAll.append(Precision)
        RecallAll.append(Recall)

        PrecisionLoGAll.append(PrecisionLoG)
        RecallLoGAll.append(RecallLoG)

        nDetectionsAll.append(nDetections)

    # ---------------------------------------------------------
    PrecisionAll = numpy.array(PrecisionAll)
    RecallAll = numpy.array(RecallAll)

    PrecisionLoGAll = numpy.array(PrecisionLoGAll)
    RecallLoGAll = numpy.array(RecallLoGAll)

    nDetectionsAll = numpy.array(nDetectionsAll)

    print "--------------------------------------------\n"
    print "Precision LoG: {0:03f} ({1:03f}) | Recall LoG: {2:03f} ({3:03f})".format(
        numpy.mean(PrecisionLoGAll), numpy.std(PrecisionLoGAll),
        numpy.mean(RecallLoGAll), numpy.std(RecallLoGAll))
    print "Precision SdA: {0:03f} ({1:03f}) | Recall SdA: {2:03f} ({3:03f})".format(
        numpy.mean(PrecisionAll), numpy.std(PrecisionAll),
        numpy.mean(RecallAll), numpy.std(RecallAll))
    print "number detections: {0:03f} ({1:03f})".format(
        numpy.mean(nDetectionsAll), numpy.std(nDetectionsAll))

    PrecisionRecall = numpy.c_[PrecisionAll, RecallAll]
    filename = 'results/sae_{0:s}_{1:s}_test_all.pkl.gz'.format(
        method, resolution)
    save_gzdata(filename, PrecisionRecall)

    PrecisionRecallLoG = numpy.c_[PrecisionLoGAll, RecallLoGAll]
    filename = 'results/log_{0:s}_{1:s}_test_all.pkl.gz'.format(
        method, resolution)
    save_gzdata(filename, PrecisionRecallLoG)

    PrecisionRecall = numpy.r_[numpy.mean(PrecisionAll), numpy.mean(RecallAll)]
    filename = 'results/sae_{0:s}_{1:s}_test.pkl.gz'.format(method, resolution)
    save_gzdata(filename, PrecisionRecall)

    PrecisionRecallLoG = numpy.r_[numpy.mean(PrecisionLoGAll),
                                  numpy.mean(RecallLoGAll)]
    filename = 'results/log_{0:s}_{1:s}_test.pkl.gz'.format(method, resolution)
    save_gzdata(filename, PrecisionRecallLoG)

    filename = 'results/ndetections_{0:s}_{1:s}_test.pkl.gz'.format(
        method, resolution)
    save_gzdata(filename, nDetectionsAll)
Beispiel #7
0
def TL(
        source, target = None,
        path = '../gen_patches/dataset_noisy/', retrain = False, retrain_ft_layers = [1,1,1,1,1,1],
        outputfolder='backup',
        outputfolderres='backup_res',
        batchsize = 1000,
        sourcemodelspath = './'
):
    
    """
    TO DO: FINISH DOCUMENTATION
    """

    options = {
        'sourcemodelspath'  : sourcemodelspath,
        'outputfolder'      : outputfolder,
        'outputfolderres'   : outputfolderres,
        'verbose'           : 0,
        'viewdata'          : False,
        'trainsize'         : 0.6,
        'patchsize'         : 20,
        'measure'           : 'acc',
        'weight'            : 200,
        'datanormalize'     : True,
        # ---------- one-class learning
        'replicate'         : False,
        'oneclass'          : False,
        # ---------- source problem params
        'database_source'   : 'db2',
        'resolution_source' : source,
        'nclasses_source'   : 2, # TODO: do this automatically
        # ---------- target problem params
        'database_target'   : 'db2',
        'resolution_target' : target,
        # ---------- TL hyperparams
        'retrain'           : retrain,
        'retrain_ft_layers' : retrain_ft_layers,
        # ---------- hyperparams
        'nruns'             : 20,
        'folds'             : 3,
        'hlayers'           : [len(retrain_ft_layers) / 2],    # X hidden + 1 log layer
        'nneurons'          : [ 1000],     # range(500, 1001, 250),
        'pretraining_epochs': [ 1000],     # [200]
        'training_epochs'   : [ 3000],    # [100]
        'pretrain_lr'       : [ 0.01, 0.001],   #[ 0.01, 0.001],
        'finetune_lr'       : [ 0.1 , 0.01],  #[ 0.1, 0.01],
        'threshold'         : [0.8], #[ 0.5 , 0.6, 0.8], #numpy.arange(.5,1.01,.1),
        'batchsize'         : [ batchsize], #[100] or [1000] depending on the size of the dataset. 
        # ---------- end of hyperparams
        'corruptlevels'     : [0.1], #numpy.arange(0.1, 0.4, 0.1)
    }
    
    print >> sys.stderr, (options), "\n"

    # -------------------------------------------------------------------------------
    datasetpath = path
    # print argv
    # print datasetpath
    # print retrain_ft_layers
    # alaallala

    # -------------------------------------------------------------------------------
    # load dataset
    if options['retrain'] == 1:
        options['database']   = options['database_target']
        options['resolution'] = options['resolution_target']
    else:
        options['database']   = options['database_source']
        options['resolution'] = options['resolution_source']

    (dataset, ndim, nclasses)   = load_data( datasetpath, options )
    options['ndim']     = ndim
    options['nclasses'] = nclasses

    # --------------------------------------------------------------------------------------------
    for nrun in range(1,options['nruns']+1):
        print >> sys.stderr, ("### {0:03d} of {1:03d}".format(nrun,options['nruns']))
        options['numpy_rng']  = numpy.random.RandomState(nrun)
        options['theano_rng'] = RandomStreams(seed=nrun)

        # --------------
        # generate folds
        folds = gen_folds( dataset, options, nrun )    
        # continue
        
        if options['retrain'] == 1:
            filename = "{0:s}/{1:05d}_{2:03d}_model.pkl.gz".format(options['sourcemodelspath'], nrun,
                                                              string.atoi(options['resolution_source']))
            print >> sys.stderr, ":: Loading model {0:s}...\n".format(filename)
            sda_reuse_model = load_savedgzdata ( filename )

            #print sda_reuse_model.logLayer.W.get_value()
            #print sda_reuse_model.logLayer.W.get_value()
            #kkk
            
        else:
            sda_reuse_model = None

        # ----------------------------------------------------------------------------
        results = do_experiment( folds, options, nrun, sda_reuse_model )
        # ----------------------------------------------------------------------------
    
        # --------------------------------------------------
        filename = '{0:s}/res_{1:05d}_{2:03d}.pkl.gz'.format(options['outputfolderres'],nrun,string.atoi(options['resolution']))
        save_results(filename,results)
Beispiel #8
0
def main(resolution,method,pathRes):
    # load results from LoG
    
    imgpathsae  = '../../imgs_nanoparticles/{0:03d}/db2/resultado_sae/'.format(string.atoi(resolution))

    if method == 'baseline':
        basepath = './{0:s}/{1:05d}/models/res_baseline_resized_{1:05d}_111111/'.format(pathRes,string.atoi(resolution))
    elif method == 'tl':
        basepath = './{0:s}/{1:05d}/models/res_tl_resized_50000_{1:05d}_111111/'.format(pathRes,string.atoi(resolution))
    
    # annotations
    annbasepath = '../../imgs_nanoparticles/{0:03d}/db2/annotation/user/'.format(string.atoi(resolution))
    annfiles = [f for f in os.listdir(annbasepath) if re.match(r'[\w\W]*csv', f)]
    annfiles = sorted( annfiles )

    # imgs base paths
    imgsbasepath = '../../imgs_nanoparticles/{0:03d}/db2/'.format(string.atoi(resolution))
    imgspath = os.listdir(imgsbasepath)
    imgspath = sorted( imgspath )

    # ------------------------------------------------------------------------------------------------
    # TEST DATA

    PrecisionAll = []
    RecallAll    = []

    PrecisionLoGAll = []
    RecallLoGAll    = []

    nDetectionsAll  = []
    
    for nrun in range(1,21): #
        print >> sys.stderr, "\n**************************\n"
        print >> sys.stderr, "NRUN {0:05d}/20 ".format(nrun)
        
        filename = '{0:s}/{1:05d}_{2:03d}_model.pkl.gz'.format(basepath,nrun,string.atoi(resolution))
        print >> sys.stderr, "Loading " + filename
        model    = load_savedgzdata(filename)

        # get ids
        pathids = '{0:s}/{1:05d}_{2:05d}_test_ids.pkl.gz'.format(basepath,nrun,string.atoi(resolution))
        print >> sys.stderr, 'Loading ' + pathids + '...'
        ids = load_savedgzdata(pathids)
        print >> sys.stderr, ids
        
        reg = 'detectedNanoParticlesDetectionResult_log_detector_test_{0:03d}_'.format(nrun)
        files = [f for f in os.listdir(imgpathsae) if re.match(reg, f)]
        # order data
        files = sorted( files )
        
        nfiles = len(files)

        (Precision, Recall, PrecisionLoG,RecallLoG,nDetections) = getPrecisionRecall(nfiles,files,ids,imgpathsae,imgsbasepath,imgspath,annbasepath,annfiles,model,(0,0,nrun,0),printImg=True)
        
        print >> sys.stderr, "Precision LoG: {0:05f} | Recall LoG: {1:05f}".format(PrecisionLoG, RecallLoG)
        print >> sys.stderr, "Precision SdA: {0:05f} | Recall SdA: {1:05f}".format(Precision, Recall)
        # kaka
        
        PrecisionAll.append( Precision )
        RecallAll.append( Recall )

        PrecisionLoGAll.append( PrecisionLoG )
        RecallLoGAll.append( RecallLoG )

        nDetectionsAll.append( nDetections )
        
    # ---------------------------------------------------------
    PrecisionAll = numpy.array( PrecisionAll ) 
    RecallAll    = numpy.array( RecallAll )

    PrecisionLoGAll = numpy.array( PrecisionLoGAll )
    RecallLoGAll    = numpy.array( RecallLoGAll )

    nDetectionsAll  = numpy.array( nDetectionsAll )
    
    print "--------------------------------------------\n"
    print "Precision LoG: {0:03f} ({1:03f}) | Recall LoG: {2:03f} ({3:03f})".format(numpy.mean(PrecisionLoGAll),numpy.std(PrecisionLoGAll),numpy.mean(RecallLoGAll),numpy.std(RecallLoGAll))
    print "Precision SdA: {0:03f} ({1:03f}) | Recall SdA: {2:03f} ({3:03f})".format(numpy.mean(PrecisionAll),numpy.std(PrecisionAll),numpy.mean(RecallAll),numpy.std(RecallAll))
    print "number detections: {0:03f} ({1:03f})".format(numpy.mean(nDetectionsAll),numpy.std(nDetectionsAll))
    
    PrecisionRecall = numpy.c_[PrecisionAll,RecallAll]
    filename = 'results/sae_{0:s}_{1:s}_test_all.pkl.gz'.format(method,resolution)
    save_gzdata(filename, PrecisionRecall )

    PrecisionRecallLoG = numpy.c_[PrecisionLoGAll,RecallLoGAll]
    filename = 'results/log_{0:s}_{1:s}_test_all.pkl.gz'.format(method,resolution)
    save_gzdata(filename, PrecisionRecallLoG )

    PrecisionRecall = numpy.r_[numpy.mean(PrecisionAll),numpy.mean(RecallAll)]
    filename = 'results/sae_{0:s}_{1:s}_test.pkl.gz'.format(method,resolution)
    save_gzdata(filename, PrecisionRecall )

    PrecisionRecallLoG = numpy.r_[numpy.mean(PrecisionLoGAll),numpy.mean(RecallLoGAll)]
    filename = 'results/log_{0:s}_{1:s}_test.pkl.gz'.format(method,resolution)
    save_gzdata(filename, PrecisionRecallLoG )

    filename = 'results/ndetections_{0:s}_{1:s}_test.pkl.gz'.format(method,resolution)
    save_gzdata(filename, nDetectionsAll )
Beispiel #9
0
def load_data(datasetpath, options):
    ''' Loads the dataset

    :type dataset: string
    :param dataset: the path to the dataset 
    '''

    # if options['oneclass']:
    #     nclasses = 1
    # else:
    #     nclasses = 2

    nclasses = 2

    #############
    # LOAD DATA #
    #############

    if options['database'] == 'mnist':
        train_set, valid_set, test_set = load_savedgzdata('mnist.pkl.gz')

        train_set_x = numpy.array(train_set[0])
        train_set_y = numpy.array(train_set[1])
        valid_set_x = numpy.array(valid_set[0])
        valid_set_y = numpy.array(valid_set[1])
        test_set_x = numpy.array(test_set[0])
        test_set_y = numpy.array(test_set[1])

        (nelem_train, ndim) = train_set_x.shape
        (nelem_valid, ndim) = valid_set_x.shape
        (nelem_test, ndim) = test_set_x.shape

        dataset = numpy.zeros((options['patchsize'] * options['patchsize'] + 2,
                               nelem_train + nelem_valid + nelem_test))
        ids = 0
        imgwidth = numpy.sqrt(ndim)
        #print >> sys.stderr, "train....", ids
        (dataset, ids) = loadConvertMNIST(options['patchsize'], ids, dataset,
                                          imgwidth, train_set_x, train_set_y)
        #print >> sys.stderr, "val....", ids
        (dataset, ids) = loadConvertMNIST(options['patchsize'], ids, dataset,
                                          imgwidth, valid_set_x, valid_set_y)
        #print >> sys.stderr, "test....", ids
        (dataset, ids) = loadConvertMNIST(options['patchsize'], ids, dataset,
                                          imgwidth, test_set_x, test_set_y)

        nclasses = len(list(set(train_set_y)))
        return (dataset, options['patchsize'] * options['patchsize'], nclasses)

    elif options['database'] == 'shapes':
        dataset = load_savedgzdata('shapes.pkl.gz')

        return (dataset, 20 * 20, 4)

    # --------------------------------------------------------------
    # data_dir, data_file = os.path.split(dataset)
    if not 'all' in options['database']:
        onlyfiles = [ f for f in listdir(datasetpath) \
                      if ( isfile(join(datasetpath,f)) and splitext(f)[1] == '.mat' and \
                           options['database'] in f and options['resolution'] in f ) ]
    else:
        onlyfiles = [
            f for f in listdir(datasetpath)
            if (isfile(join(datasetpath, f)) and splitext(f)[1] == '.mat')
        ]

    # onlyfiles = [ f for f in listdir(datasetpath) if ( isfile(join(datasetpath,f)) and splitext(f)[1] == '.mat' ) ]
    onlyfiles.sort()

    first = True
    ids = 0
    for file in onlyfiles:
        print >> sys.stderr, ("---> " + datasetpath + file)
        f = h5py.File(datasetpath + file, 'r')

        # print f.items();
        mpatches = f.get('mpatches')
        # print mpatches.items();
        back = numpy.array(mpatches.get('negative'))
        nano = numpy.array(mpatches.get('positive'))
        if options['replicate']:
            nano = numpy.c_[back, back]

        # print >> sys.stderr, back.shape
        # print >> sys.stderr, nano.shape

        (back_ndim, back_npoints) = back.shape
        (nano_ndim, nano_npoints) = nano.shape
        back[0, :] = back[0, :] + ids
        nano[0, :] = nano[0, :] + ids

        ids = max(back[0, :])
        # raw_input('> press any key <')

        back = numpy.r_[back, numpy.ones((1, back_npoints))]
        nano = numpy.r_[nano, -1 * numpy.ones((1, nano_npoints))]

        if first:
            dataset = numpy.c_[back, nano]
            first = False
        else:
            dataset = numpy.c_[dataset, back, nano]

        # print type( dataset )
        # raw_input('....')

    # dataset was constructed according to the following structure
    # [[ .... ids ....],
    #  [ .... data ...],
    #  [ ..... cls ...]]

    datasetfilename = 'nanoparticles.npz'
    #save_gzdata(datasetfilename,dataset)
    numpy.savez_compressed(datasetfilename, dataset)

    kakak
    return (dataset, nano_ndim - 1, nclasses)