Beispiel #1
0
def CreateRep(ModelPath,DataPrefix):
    createvecfile(ModelPath,DataPrefix+ '.vec',3,DataPrefix+ '_DL3.vec')
Beispiel #2
0
def evalMain(FoldsNumber, ClassifierTrainingSize, DataPrefix, ModelPath, Seed):
    numpy.random.seed(Seed)
    # learn the model
    OpenTableSDAEexp(ModelPath + 'DARPA.conf', ModelPath)

    # createrepresentations
    createvecfile(ModelPath + '/depth3', DataPrefix + '-test.vec', 3,
                  ModelPath + '/DLrep_depth3_test.vec')
    createvecfile(ModelPath + '/depth1', DataPrefix + '-test.vec', 1,
                  ModelPath + '/DLrep_depth1_test.vec')

    resbaseline = {}
    resshallow = {}
    resdeep = {}

    for task in range(5):
        ListLabel = ReadLabelFile(DataPrefix + '-train.lab', task)
        Train_idx = SampleStratified(numpy.random, ListLabel,
                                     ClassifierTrainingSize)
        orig_vec = open(DataPrefix + '-train.vec', 'r').readlines()
        orig_lab = open(DataPrefix + '-train.lab', 'r').readlines()
        textvec = ''
        textlab = ''
        for idx in Train_idx:
            textvec += orig_vec[idx]
            textlab += orig_lab[idx]
        vec_10k = open(DataPrefix + '-train_10k_task%s.vec' % task, 'w')
        lab_10k = open(DataPrefix + '-train_10k_task%s.lab' % task, 'w')
        vec_10k.write(textvec)
        lab_10k.write(textlab)
        vec_10k.close()
        lab_10k.close()
        createvecfile(ModelPath + '/depth3',
                      DataPrefix + '-train_10k_task%s.vec' % task, 3,
                      ModelPath + '/DLrep_depth3_train_10k_task%s.vec' % task)
        createvecfile(ModelPath + '/depth1',
                      DataPrefix + '-train_10k_task%s.vec' % task, 1,
                      ModelPath + '/DLrep_depth1_train_10k_task%s.vec' % task)
        ListLabel = ReadLabelFile(DataPrefix + '-train_10k_task%s.lab' % task,
                                  task)
        Folds_idx = SampleStratifiedFolds(ListLabel, FoldsNumber)
        resbaseline.update({task: []})
        resshallow.update({task: []})
        resdeep.update({task: []})
        for idxfold, k in enumerate(Folds_idx):
            # Creating the index file
            CreateIdxFile(k, DataPrefix + '_current_idx_train.idx')
            # baseline
            TrainingData, ValidationData = loadTrainDataset(
                task, DataPrefix + '-train_10k_task%s.lab' % task,
                DataPrefix + '-train_10k_task%s.vec' % task,
                DataPrefix + '_current_idx_train.idx')
            best_classifier = TrainAndOptimizeClassifer(
                TrainingData, ValidationData, True)
            TestData = loadTestDataset(task, DataPrefix + '-test.lab',
                                       DataPrefix + '-test.vec')
            resbaseline[task] += [
                Classifier(
                    best_classifier, TestData,
                    DataPrefix + '_baseline_task_%s_fold_%s' % (task, idxfold))
            ]
            # Shallow
            TrainingData, ValidationData = loadTrainDataset(
                task, DataPrefix + '-train_10k_task%s.lab' % task,
                ModelPath + '/DLrep_depth1_train_10k_task%s.vec' % task,
                DataPrefix + '_current_idx_train.idx')
            best_classifier = TrainAndOptimizeClassifer(
                TrainingData, ValidationData, True)
            TestData = loadTestDataset(task, DataPrefix + '-test.lab',
                                       ModelPath + '/DLrep_depth1_test.vec')
            resshallow[task] += [
                Classifier(
                    best_classifier, TestData,
                    DataPrefix + '_shallow_task_%s_fold_%s' % (task, idxfold))
            ]
            # Deep
            TrainingData, ValidationData = loadTrainDataset(
                task, DataPrefix + '-train_10k_task%s.lab' % task,
                ModelPath + '/DLrep_depth3_train_10k_task%s.vec' % task,
                DataPrefix + '_current_idx_train.idx')
            best_classifier = TrainAndOptimizeClassifer(
                TrainingData, ValidationData, True)
            TestData = loadTestDataset(task, DataPrefix + '-test.lab',
                                       ModelPath + '/DLrep_depth3_test.vec')
            resdeep[task] += [
                Classifier(
                    best_classifier, TestData,
                    DataPrefix + '_deep_task_%s_fold_%s' % (task, idxfold))
            ]

            f = open(ModelPath + 'kfold_results_dictionnaries.pkl', 'w')
            cPickle.dump(resbaseline, f, -1)
            cPickle.dump(resshallow, f, -1)
            cPickle.dump(resdeep, f, -1)
            f.close()

    for i in range(5):
        print >> sys.stderr, 'baseline', numpy.mean(
            resbaseline[i]), " +/- ", numpy.std(resbaseline[i])
        print >> sys.stderr, 'shallow', numpy.mean(
            resshallow[i]), " +/- ", numpy.std(resshallow[i])
        print >> sys.stderr, 'deep', numpy.mean(
            resdeep[i]), " +/- ", numpy.std(resdeep[i])

    f = open(ModelPath + 'kfold_results_dictionnaries.pkl', 'w')
    cPickle.dump(resbaseline, f, -1)
    cPickle.dump(resshallow, f, -1)
    cPickle.dump(resdeep, f, -1)
    f.close()
Beispiel #3
0
def evalMain( FoldsNumber, ClassifierTrainingSize, DataPrefix, ModelPath, Seed ):
    numpy.random.seed(Seed)
    # learn the model
    OpenTableSDAEexp(ModelPath+'DARPA.conf',ModelPath)
    
    # createrepresentations
    createvecfile(ModelPath+'/depth3',DataPrefix+ '-test.vec',3,ModelPath + '/DLrep_depth3_test.vec')
    createvecfile(ModelPath+'/depth1',DataPrefix+ '-test.vec',1,ModelPath + '/DLrep_depth1_test.vec')

    resbaseline = {}
    resshallow = {}
    resdeep = {}

    for task in range(5):
        ListLabel = ReadLabelFile( DataPrefix + '-train.lab',task)
        Train_idx = SampleStratified(numpy.random,ListLabel,ClassifierTrainingSize)
        orig_vec = open(DataPrefix + '-train.vec','r').readlines()
        orig_lab = open(DataPrefix + '-train.lab','r').readlines()
        textvec = ''
        textlab = ''
        for idx in Train_idx:
            textvec += orig_vec[idx]
            textlab += orig_lab[idx]
        vec_10k = open(DataPrefix + '-train_10k_task%s.vec'%task,'w')
        lab_10k = open(DataPrefix + '-train_10k_task%s.lab'%task,'w')
        vec_10k.write(textvec)
        lab_10k.write(textlab)
        vec_10k.close()
        lab_10k.close()
        createvecfile(ModelPath+'/depth3',DataPrefix+ '-train_10k_task%s.vec'%task,3,ModelPath + '/DLrep_depth3_train_10k_task%s.vec'%task)
        createvecfile(ModelPath+'/depth1',DataPrefix+ '-train_10k_task%s.vec'%task,1,ModelPath + '/DLrep_depth1_train_10k_task%s.vec'%task)
        ListLabel = ReadLabelFile(DataPrefix + '-train_10k_task%s.lab'%task,task)
        Folds_idx = SampleStratifiedFolds(ListLabel,FoldsNumber)
        resbaseline.update({task : []})
        resshallow.update({task : []})
        resdeep.update({task : []})
        for idxfold,k in enumerate(Folds_idx):
            # Creating the index file
            CreateIdxFile(k,DataPrefix + '_current_idx_train.idx')
            # baseline
            TrainingData, ValidationData = loadTrainDataset(task, DataPrefix+ '-train_10k_task%s.lab'%task, DataPrefix+ '-train_10k_task%s.vec'%task, DataPrefix + '_current_idx_train.idx')
            best_classifier = TrainAndOptimizeClassifer(TrainingData, ValidationData, True)
            TestData = loadTestDataset(task, DataPrefix+ '-test.lab', DataPrefix+ '-test.vec')
            resbaseline[task] += [Classifier(best_classifier, TestData, DataPrefix + '_baseline_task_%s_fold_%s'%(task,idxfold))]
            # Shallow
            TrainingData, ValidationData = loadTrainDataset(task, DataPrefix+ '-train_10k_task%s.lab'%task, ModelPath + '/DLrep_depth1_train_10k_task%s.vec'%task, DataPrefix + '_current_idx_train.idx')
            best_classifier = TrainAndOptimizeClassifer(TrainingData, ValidationData, True)
            TestData = loadTestDataset(task, DataPrefix+ '-test.lab', ModelPath + '/DLrep_depth1_test.vec')
            resshallow[task] += [Classifier(best_classifier, TestData, DataPrefix + '_shallow_task_%s_fold_%s'%(task,idxfold))]
            # Deep
            TrainingData, ValidationData = loadTrainDataset(task, DataPrefix+ '-train_10k_task%s.lab'%task, ModelPath + '/DLrep_depth3_train_10k_task%s.vec'%task, DataPrefix + '_current_idx_train.idx')
            best_classifier = TrainAndOptimizeClassifer(TrainingData, ValidationData, True)
            TestData = loadTestDataset(task, DataPrefix+ '-test.lab', ModelPath + '/DLrep_depth3_test.vec')
            resdeep[task] += [Classifier(best_classifier, TestData, DataPrefix + '_deep_task_%s_fold_%s'%(task,idxfold))]
            
            f = open(ModelPath + 'kfold_results_dictionnaries.pkl','w')
            cPickle.dump(resbaseline,f,-1)
            cPickle.dump(resshallow,f,-1)
            cPickle.dump(resdeep,f,-1)
            f.close()

    for i in range(5):
        print >> sys.stderr, 'baseline', numpy.mean(resbaseline[i]), " +/- ", numpy.std(resbaseline[i])
        print >> sys.stderr, 'shallow', numpy.mean(resshallow[i]), " +/- ", numpy.std(resshallow[i])
        print >> sys.stderr, 'deep', numpy.mean(resdeep[i]), " +/- ", numpy.std(resdeep[i])

    f = open(ModelPath + 'kfold_results_dictionnaries.pkl','w')
    cPickle.dump(resbaseline,f,-1)
    cPickle.dump(resshallow,f,-1)
    cPickle.dump(resdeep,f,-1)
    f.close()