Ejemplo n.º 1
0
def initialiseModels(argv, update, initMode='training'):
    # argv[1] = dataPath
    # argv[2] = modelPath
    # argv[3] = driverName
    # update = 'update' or 'new'

    from SAM.SAM_Core import SAMDriver as Driver
    dataPath = argv[0]
    modelPath = argv[1]
    driverName = argv[2]

    print argv
    stringCommand = 'from SAM.SAM_Drivers import ' + driverName + ' as Driver'
    print stringCommand
    exec stringCommand

    mySAMpy = Driver()
    mode = update
    trainName = dataPath.split('/')[-1]

    # participantList is extracted from number of subdirectories of dataPath
    participantList = [
        f for f in listdir(dataPath) if isdir(join(dataPath, f))
    ]

    off = 17
    print '-------------------'
    print 'Training Settings:'
    print
    print 'Data Path: '.ljust(off), dataPath
    print 'Model Path: '.ljust(off), modelPath
    print 'Participants: '.ljust(off), participantList
    print 'Model Root Name: '.ljust(off), trainName
    print 'Training Mode:'.ljust(off), mode
    print 'Driver:'.ljust(off), driverName
    print '-------------------'
    print 'Loading Parameters...'
    print
    temporalFlag = False
    modeConfig = ''
    found = ''
    try:
        parser = SafeConfigParser()
        found = parser.read(dataPath + "/config.ini")

        if parser.has_option(trainName, 'update_mode'):
            modeConfig = parser.get(trainName, 'update_mode')
        else:
            modeConfig = 'update'
        print modeConfig
    except IOError:
        pass

    defaultParamsList = [
        'experiment_number', 'model_type', 'model_num_inducing',
        'model_num_iterations', 'model_init_iterations', 'verbose', 'Quser',
        'kernelString', 'ratioData', 'update_mode', 'model_mode', 'windowSize'
    ]

    mySAMpy.experiment_number = None
    mySAMpy.model_type = None
    mySAMpy.kernelString = None
    mySAMpy.fname = None
    mySAMpy.ratioData = None

    if initMode == 'training' and (mode == 'new' or modeConfig == 'new'
                                   or 'exp' not in modelPath):
        print 'Loading training parameters from: \n ', '\t' + dataPath + "/config.ini"
        try:
            default = False
            parser = SafeConfigParser()
            parser.optionxform = str
            found = parser.read(dataPath + "/config.ini")

            # load parameters from config file
            if parser.has_option(trainName, 'experiment_number'):
                mySAMpy.experiment_number = int(
                    parser.get(trainName, 'experiment_number'))
            elif '.pickle' in modelPath:
                mySAMpy.experiment_number = int(
                    modelPath.split('__')[-2].replace('exp', '')) + 1
            else:
                fail = True
                print 'No experiment_number found'

            if parser.has_option(trainName, 'model_type'):
                mySAMpy.model_type = parser.get(trainName, 'model_type')
            else:
                default = True
                mySAMpy.model_type = 'mrd'

            if parser.has_option(trainName, 'model_num_inducing'):
                mySAMpy.model_num_inducing = int(
                    parser.get(trainName, 'model_num_inducing'))
            else:
                default = True
                mySAMpy.model_num_inducing = 30

            if parser.has_option(trainName, 'model_num_iterations'):
                mySAMpy.model_num_iterations = int(
                    parser.get(trainName, 'model_num_iterations'))
            else:
                default = True
                mySAMpy.model_num_iterations = 700

            if parser.has_option(trainName, 'model_init_iterations'):
                mySAMpy.model_init_iterations = int(
                    parser.get(trainName, 'model_init_iterations'))
            else:
                default = True
                mySAMpy.model_init_iterations = 2000

            if parser.has_option(trainName, 'verbose'):
                mySAMpy.verbose = parser.get(trainName, 'verbose') == 'True'
            else:
                default = True
                mySAMpy.verbose = False

            if parser.has_option(trainName, 'model_mode'):
                mySAMpy.model_mode = parser.get(trainName, 'model_mode')
                if mySAMpy.model_mode == 'temporal' and parser.has_option(
                        trainName, 'windowSize'):
                    mySAMpy.windowSize = int(
                        parser.get(trainName, 'windowSize'))
                else:
                    temporalFlag = True
            else:
                default = True
                mySAMpy.model_mode = 'single'

            if parser.has_option(trainName, 'Quser'):
                mySAMpy.Quser = int(parser.get(trainName, 'Quser'))
            else:
                default = True
                mySAMpy.Quser = 2

            if parser.has_option(trainName, 'kernelString'):
                mySAMpy.kernelString = parser.get(trainName, 'kernelString')
            else:
                default = True
                mySAMpy.kernelString = "GPy.kern.RBF(Q, ARD=False) + GPy.kern.Bias(Q) + GPy.kern.White(Q)"

            if parser.has_option(trainName, 'ratioData'):
                mySAMpy.ratioData = int(parser.get(trainName, 'ratioData'))
            else:
                default = True
                mySAMpy.ratioData = 50

            if default:
                print 'Default settings applied'

            mySAMpy.paramsDict = dict()
            mySAMpy.loadParameters(parser, trainName)

        except IOError:
            print 'IO Exception reading ', found
            pass
    else:
        print 'Loading parameters from: \n ', '\t' + modelPath
        try:
            parser = SafeConfigParser()
            parser.optionxform = str
            found = parser.read(dataPath + "/config.ini")

            # load parameters from config file
            if parser.has_option(trainName, 'experiment_number'):
                mySAMpy.experiment_number = int(
                    parser.get(trainName, 'experiment_number'))
            else:
                mySAMpy.experiment_number = int(
                    modelPath.split('__')[-2].replace('exp', ''))

            modelPickle = pickle.load(open(modelPath + '.pickle', 'rb'))
            mySAMpy.paramsDict = dict()
            for j in parser.options(trainName):
                if j not in defaultParamsList:
                    print j
                    mySAMpy.paramsDict[j] = modelPickle[j]

            mySAMpy.ratioData = modelPickle['ratioData']
            mySAMpy.model_type = modelPickle['model_type']
            mySAMpy.model_mode = modelPickle['model_mode']
            if mySAMpy.model_mode == 'temporal':
                mySAMpy.windowSize = modelPickle['windowSize']
                mySAMpy.model_type = 'mrd'
            mySAMpy.model_num_inducing = modelPickle['model_num_inducing']
            mySAMpy.model_num_iterations = modelPickle['model_num_iterations']
            mySAMpy.model_init_iterations = modelPickle[
                'model_init_iterations']
            mySAMpy.verbose = modelPickle['verbose']
            mySAMpy.Quser = modelPickle['Quser']
            mySAMpy.kernelString = modelPickle['kernelString']
            try:
                mySAMpy.listOfModels = modelPickle['listOfModels']
                mySAMpy.classifiers = modelPickle['classifiers']
                mySAMpy.classif_thresh = modelPickle['classif_thresh']
            except:
                pass

        except IOError:
            print 'IO Exception reading ', found
            pass

    if 'exp' in modelPath:
        fnameProto = '/'.join(modelPath.split('/')[:-1]) + '/' + dataPath.split('/')[-1] + '__' + driverName + \
                             '__' + mySAMpy.model_type + '__exp' + str(mySAMpy.experiment_number)
    else:
        fnameProto = modelPath + dataPath.split('/')[-1] + '__' + driverName + '__' + mySAMpy.model_type + \
                             '__exp' + str(mySAMpy.experiment_number)

    print 'Full model name: \n', '\t' + fnameProto
    print '-------------------'
    print

    mySAMpy.save_model = False
    mySAMpy.economy_save = True
    mySAMpy.visualise_output = False
    # test_mode = True

    mySAMpy.readData(dataPath, participantList)
    # at this point, all the data that will be eventually used for training is contained in mySAMpy.Y
    # and mySAMpy.L contains all labels if any (depending on mrd model or bgplvm model)
    # mySAMpy.L is a list of labels while mySAMpy.Y is a numpy array of data
    # mySAMpy.Y should have 2 dimensions, length of dimension 0 = number of instances
    # length of dimension 1 = length of feature vector

    if mySAMpy.model_mode != 'temporal':
        # get list of labels
        mySAMpy.textLabels = list(set(mySAMpy.L))

        # convert L from list of strings to array of indices
        mySAMpy.L = np.asarray(
            [mySAMpy.textLabels.index(i) for i in mySAMpy.L])[:, None]
        mySAMpy.textLabels = mySAMpy.textLabels
    else:
        mySAMpy.X, mySAMpy.Y = transformTimeSeriesToSeq(
            mySAMpy.Y1, mySAMpy.windowSize)
        mySAMpy.L, mySAMpy.tmp = transformTimeSeriesToSeq(
            mySAMpy.U1, mySAMpy.windowSize)

    mm = [mySAMpy]
    # mm.append(mySAMpy)
    # mm[0] contains root model
    # this is the only model in the case of a single model
    # or contains all info for the rest of the models in case of multiple models
    #

    if mySAMpy.model_mode == 'single' or mySAMpy.model_mode == 'temporal':
        mm[0].participantList = ['all']
    else:
        mm[0].participantList = ['root'] + mySAMpy.textLabels

    for k in range(len(mm[0].participantList)):
        if mm[0].participantList[k] == 'all':
            minData = len(mm[k].L)
            mm[0].fname = fnameProto
            mm[0].model_type = mySAMpy.model_type
            Ntr = int(mySAMpy.ratioData * minData / 100)
        else:
            if k > 0:
                mm.append(Driver())
                # extract subset of data corresponding to this model
                inds = [
                    i for i in range(len(mm[0].Y['L']))
                    if mm[0].Y['L'][i] == k - 1
                ]
                mm[k].Y = mm[0].Y['Y'][inds]
                mm[k].L = mm[0].Y['L'][inds]
                mm[k].Quser = mm[0].Quser
                mm[k].verbose = mm[0].verbose
                print 'Object class: ', mm[0].participantList[k]
                minData = len(inds)
                mm[k].fname = fnameProto + '__L' + str(k - 1)
                mm[0].listOfModels.append(mm[k].fname)
                mm[k].model_type = 'bgplvm'
                Ntr = int(mySAMpy.ratioData * minData / 100)
            else:
                mm[0].listOfModels = []
                mm[0].fname = fnameProto
                mm[0].SAMObject.kernelString = ''
                minData = len(mm[0].L)
                Ntr = int(mySAMpy.ratioData * minData / 100)
            mm[k].modelLabel = mm[0].participantList[k]

        if mm[0].model_mode != 'temporal':
            [Yall, Lall, YtestAll,
             LtestAll] = mm[k].prepareData(mm[k].model_type,
                                           Ntr,
                                           randSeed=mm[0].experiment_number)
            mm[k].Yall = Yall
            mm[k].Lall = Lall
            mm[k].YtestAll = YtestAll
            mm[k].LtestAll = LtestAll
        elif mm[0].model_mode == 'temporal':
            [Xall, Yall, Lall, XtestAll, YtestAll,
             LtestAll] = mm[k].prepareData(mm[k].model_type,
                                           Ntr,
                                           randSeed=mm[0].experiment_number)
            mm[k].Xall = Xall
            mm[k].Yall = Yall
            mm[k].Lall = Lall
            mm[k].XtestAll = XtestAll
            mm[k].YtestAll = YtestAll
            mm[k].LtestAll = LtestAll

        print 'minData = ' + str(minData)
        print 'ratioData = ' + str(mySAMpy.ratioData)

    if initMode == 'training':
        for k in range(len(mm[0].participantList)):
            # for k = 0 check if multiple model or not
            if mm[0].participantList[k] != 'root':

                print "Training with ", mm[0].model_num_inducing, 'inducing points for ', \
                    mm[0].model_init_iterations, '|', mm[0].model_num_iterations

                mm[k].training(mm[0].model_num_inducing,
                               mm[0].model_num_iterations,
                               mm[0].model_init_iterations,
                               mm[k].fname,
                               mm[0].save_model,
                               mm[0].economy_save,
                               keepIfPresent=False,
                               kernelStr=mm[0].kernelString)

                if mm[0].visualise_output:
                    ax = mm[k].SAMObject.visualise()
                    visualiseInfo = dict()
                    visualiseInfo['ax'] = ax
                else:
                    visualiseInfo = None
    else:
        for k in range(len(mm[0].participantList)):
            # for k = 0 check if multiple model or not
            if mm[0].participantList[k] != 'root':
                print "Training with ", mm[0].model_num_inducing, 'inducing points for ', \
                    mm[0].model_init_iterations, '|', mm[0].model_num_iterations

                mm[k].training(mm[0].model_num_inducing,
                               mm[0].model_num_iterations,
                               mm[0].model_init_iterations,
                               mm[k].fname,
                               mm[0].save_model,
                               mm[0].economy_save,
                               keepIfPresent=True,
                               kernelStr=mm[0].kernelString)

    return mm