def CreateRep(ModelPath,DataPrefix): createvecfile(ModelPath,DataPrefix+ '.vec',3,DataPrefix+ '_DL3.vec')
def evalMain(FoldsNumber, ClassifierTrainingSize, DataPrefix, ModelPath, Seed): numpy.random.seed(Seed) # learn the model OpenTableSDAEexp(ModelPath + 'DARPA.conf', ModelPath) # createrepresentations createvecfile(ModelPath + '/depth3', DataPrefix + '-test.vec', 3, ModelPath + '/DLrep_depth3_test.vec') createvecfile(ModelPath + '/depth1', DataPrefix + '-test.vec', 1, ModelPath + '/DLrep_depth1_test.vec') resbaseline = {} resshallow = {} resdeep = {} for task in range(5): ListLabel = ReadLabelFile(DataPrefix + '-train.lab', task) Train_idx = SampleStratified(numpy.random, ListLabel, ClassifierTrainingSize) orig_vec = open(DataPrefix + '-train.vec', 'r').readlines() orig_lab = open(DataPrefix + '-train.lab', 'r').readlines() textvec = '' textlab = '' for idx in Train_idx: textvec += orig_vec[idx] textlab += orig_lab[idx] vec_10k = open(DataPrefix + '-train_10k_task%s.vec' % task, 'w') lab_10k = open(DataPrefix + '-train_10k_task%s.lab' % task, 'w') vec_10k.write(textvec) lab_10k.write(textlab) vec_10k.close() lab_10k.close() createvecfile(ModelPath + '/depth3', DataPrefix + '-train_10k_task%s.vec' % task, 3, ModelPath + '/DLrep_depth3_train_10k_task%s.vec' % task) createvecfile(ModelPath + '/depth1', DataPrefix + '-train_10k_task%s.vec' % task, 1, ModelPath + '/DLrep_depth1_train_10k_task%s.vec' % task) ListLabel = ReadLabelFile(DataPrefix + '-train_10k_task%s.lab' % task, task) Folds_idx = SampleStratifiedFolds(ListLabel, FoldsNumber) resbaseline.update({task: []}) resshallow.update({task: []}) resdeep.update({task: []}) for idxfold, k in enumerate(Folds_idx): # Creating the index file CreateIdxFile(k, DataPrefix + '_current_idx_train.idx') # baseline TrainingData, ValidationData = loadTrainDataset( task, DataPrefix + '-train_10k_task%s.lab' % task, DataPrefix + '-train_10k_task%s.vec' % task, DataPrefix + '_current_idx_train.idx') best_classifier = TrainAndOptimizeClassifer( TrainingData, ValidationData, True) TestData = loadTestDataset(task, DataPrefix + '-test.lab', DataPrefix + '-test.vec') resbaseline[task] += [ Classifier( best_classifier, TestData, DataPrefix + '_baseline_task_%s_fold_%s' % (task, idxfold)) ] # Shallow TrainingData, ValidationData = loadTrainDataset( task, DataPrefix + '-train_10k_task%s.lab' % task, ModelPath + '/DLrep_depth1_train_10k_task%s.vec' % task, DataPrefix + '_current_idx_train.idx') best_classifier = TrainAndOptimizeClassifer( TrainingData, ValidationData, True) TestData = loadTestDataset(task, DataPrefix + '-test.lab', ModelPath + '/DLrep_depth1_test.vec') resshallow[task] += [ Classifier( best_classifier, TestData, DataPrefix + '_shallow_task_%s_fold_%s' % (task, idxfold)) ] # Deep TrainingData, ValidationData = loadTrainDataset( task, DataPrefix + '-train_10k_task%s.lab' % task, ModelPath + '/DLrep_depth3_train_10k_task%s.vec' % task, DataPrefix + '_current_idx_train.idx') best_classifier = TrainAndOptimizeClassifer( TrainingData, ValidationData, True) TestData = loadTestDataset(task, DataPrefix + '-test.lab', ModelPath + '/DLrep_depth3_test.vec') resdeep[task] += [ Classifier( best_classifier, TestData, DataPrefix + '_deep_task_%s_fold_%s' % (task, idxfold)) ] f = open(ModelPath + 'kfold_results_dictionnaries.pkl', 'w') cPickle.dump(resbaseline, f, -1) cPickle.dump(resshallow, f, -1) cPickle.dump(resdeep, f, -1) f.close() for i in range(5): print >> sys.stderr, 'baseline', numpy.mean( resbaseline[i]), " +/- ", numpy.std(resbaseline[i]) print >> sys.stderr, 'shallow', numpy.mean( resshallow[i]), " +/- ", numpy.std(resshallow[i]) print >> sys.stderr, 'deep', numpy.mean( resdeep[i]), " +/- ", numpy.std(resdeep[i]) f = open(ModelPath + 'kfold_results_dictionnaries.pkl', 'w') cPickle.dump(resbaseline, f, -1) cPickle.dump(resshallow, f, -1) cPickle.dump(resdeep, f, -1) f.close()
def evalMain( FoldsNumber, ClassifierTrainingSize, DataPrefix, ModelPath, Seed ): numpy.random.seed(Seed) # learn the model OpenTableSDAEexp(ModelPath+'DARPA.conf',ModelPath) # createrepresentations createvecfile(ModelPath+'/depth3',DataPrefix+ '-test.vec',3,ModelPath + '/DLrep_depth3_test.vec') createvecfile(ModelPath+'/depth1',DataPrefix+ '-test.vec',1,ModelPath + '/DLrep_depth1_test.vec') resbaseline = {} resshallow = {} resdeep = {} for task in range(5): ListLabel = ReadLabelFile( DataPrefix + '-train.lab',task) Train_idx = SampleStratified(numpy.random,ListLabel,ClassifierTrainingSize) orig_vec = open(DataPrefix + '-train.vec','r').readlines() orig_lab = open(DataPrefix + '-train.lab','r').readlines() textvec = '' textlab = '' for idx in Train_idx: textvec += orig_vec[idx] textlab += orig_lab[idx] vec_10k = open(DataPrefix + '-train_10k_task%s.vec'%task,'w') lab_10k = open(DataPrefix + '-train_10k_task%s.lab'%task,'w') vec_10k.write(textvec) lab_10k.write(textlab) vec_10k.close() lab_10k.close() createvecfile(ModelPath+'/depth3',DataPrefix+ '-train_10k_task%s.vec'%task,3,ModelPath + '/DLrep_depth3_train_10k_task%s.vec'%task) createvecfile(ModelPath+'/depth1',DataPrefix+ '-train_10k_task%s.vec'%task,1,ModelPath + '/DLrep_depth1_train_10k_task%s.vec'%task) ListLabel = ReadLabelFile(DataPrefix + '-train_10k_task%s.lab'%task,task) Folds_idx = SampleStratifiedFolds(ListLabel,FoldsNumber) resbaseline.update({task : []}) resshallow.update({task : []}) resdeep.update({task : []}) for idxfold,k in enumerate(Folds_idx): # Creating the index file CreateIdxFile(k,DataPrefix + '_current_idx_train.idx') # baseline TrainingData, ValidationData = loadTrainDataset(task, DataPrefix+ '-train_10k_task%s.lab'%task, DataPrefix+ '-train_10k_task%s.vec'%task, DataPrefix + '_current_idx_train.idx') best_classifier = TrainAndOptimizeClassifer(TrainingData, ValidationData, True) TestData = loadTestDataset(task, DataPrefix+ '-test.lab', DataPrefix+ '-test.vec') resbaseline[task] += [Classifier(best_classifier, TestData, DataPrefix + '_baseline_task_%s_fold_%s'%(task,idxfold))] # Shallow TrainingData, ValidationData = loadTrainDataset(task, DataPrefix+ '-train_10k_task%s.lab'%task, ModelPath + '/DLrep_depth1_train_10k_task%s.vec'%task, DataPrefix + '_current_idx_train.idx') best_classifier = TrainAndOptimizeClassifer(TrainingData, ValidationData, True) TestData = loadTestDataset(task, DataPrefix+ '-test.lab', ModelPath + '/DLrep_depth1_test.vec') resshallow[task] += [Classifier(best_classifier, TestData, DataPrefix + '_shallow_task_%s_fold_%s'%(task,idxfold))] # Deep TrainingData, ValidationData = loadTrainDataset(task, DataPrefix+ '-train_10k_task%s.lab'%task, ModelPath + '/DLrep_depth3_train_10k_task%s.vec'%task, DataPrefix + '_current_idx_train.idx') best_classifier = TrainAndOptimizeClassifer(TrainingData, ValidationData, True) TestData = loadTestDataset(task, DataPrefix+ '-test.lab', ModelPath + '/DLrep_depth3_test.vec') resdeep[task] += [Classifier(best_classifier, TestData, DataPrefix + '_deep_task_%s_fold_%s'%(task,idxfold))] f = open(ModelPath + 'kfold_results_dictionnaries.pkl','w') cPickle.dump(resbaseline,f,-1) cPickle.dump(resshallow,f,-1) cPickle.dump(resdeep,f,-1) f.close() for i in range(5): print >> sys.stderr, 'baseline', numpy.mean(resbaseline[i]), " +/- ", numpy.std(resbaseline[i]) print >> sys.stderr, 'shallow', numpy.mean(resshallow[i]), " +/- ", numpy.std(resshallow[i]) print >> sys.stderr, 'deep', numpy.mean(resdeep[i]), " +/- ", numpy.std(resdeep[i]) f = open(ModelPath + 'kfold_results_dictionnaries.pkl','w') cPickle.dump(resbaseline,f,-1) cPickle.dump(resshallow,f,-1) cPickle.dump(resdeep,f,-1) f.close()