def train(self): outputPrefix=self.readField(self.config,self.name,"output_directory") outputDir=os.path.join(outputPrefix,self.name) if not os.path.exists(outputDir): os.makedirs(outputDir) showFreq = int(self.readField(self.config, self.name, "show_freq")) if showFreq > 0: visDir = os.path.join(outputDir,'vis') if not os.path.exists(visDir): os.mkdir(visDir) #do normalization for images if they are not normalized before normalize=self.str2bool(self.readField(self.config, self.name, "normalize")) trainDataSize=int(self.readField(self.config, self.name, "train_size")) numBatch = trainDataSize / self.batchsize trainDataPath = self.readField(self.config, self.name, "train_data") if self.readField(self.config,self.name,"extract_reps")=="True": trainRepsPath=self.readField(self.config, self.name, "train_reps") else: trainRepsPath=None trainDataLoader=DataHandler(trainDataPath, trainRepsPath, self.vDim, self.hDim, self.batchsize,numBatch, normalize) evalFreq=int(self.readField(self.config,self.name,'eval_freq')) if evalFreq!=0: qsize=int(self.readField(self.config, self.name, "query_size")) evalPath=self.readField(self.config,self.name,"validation_data") labelPath=self.readField(self.config,self.name,"label") queryPath=self.readField(self.config, self.name, "query") label=np.load(labelPath) eval=Evaluator(queryPath,label ,os.path.join(outputDir,'perf'), self.name, query_size=qsize,verbose=self.verbose) validation_data=gp.garray(np.load(evalPath)) if normalize: validation_data=trainDataLoader.doNormalization(validation_data) maxEpoch = int(self.readField(self.config, self.name, "max_epoch")) nCommon, nMetric, title=self.getDisplayFields() if self.verbose: print title for epoch in range(maxEpoch): perf=np.zeros( nMetric) trainDataLoader.reset() for i in range(numBatch): batch = trainDataLoader.getOneBatch() curr = self.trainOneBatch(batch, epoch, computeStat=True) perf=self.aggregatePerf(perf, curr) if showFreq != 0 and (1+epoch) % showFreq == 0: validation_code=self.getReps(validation_data) np.save(os.path.join(visDir, '%dvis' % (1+epoch)), validation_code) if evalFreq !=0 and (1+epoch) % evalFreq ==0: validation_code=self.getReps(validation_data) eval.evalSingleModal(validation_code,epoch,self.name+'V') validation_code=None if self.verbose: self.printEpochInfo(epoch,perf,nCommon) if self.readField(self.config,self.name,"checkpoint")=="True": self.doCheckpoint(outputDir) if self.readField(self.config,self.name,"extract_reps")=="True": if evalFreq!=0: validation_reps_path=self.readField(self.config, self.name, "validation_reps") self.extractValidationReps(validation_data, validation_reps_path) self.extractTrainReps(trainDataLoader, numBatch) self.saveConfig(outputDir)
def train(self): outputPrefix = self.readField(self.config, self.name, "output_directory") outputDir = os.path.join(outputPrefix, self.name) if not os.path.exists(outputDir): os.makedirs(outputDir) showFreq = int(self.readField(self.config, self.name, "show_freq")) if showFreq > 0: visDir = os.path.join(outputDir, 'vis') if not os.path.exists(visDir): os.mkdir(visDir) #do normalization for images if they are not normalized before normalize = self.str2bool( self.readField(self.config, self.name, "normalize")) trainDataSize = int( self.readField(self.config, self.name, "train_size")) numBatch = trainDataSize / self.batchsize trainDataPath = self.readField(self.config, self.name, "train_data") if self.readField(self.config, self.name, "extract_reps") == "True": trainRepsPath = self.readField(self.config, self.name, "train_reps") else: trainRepsPath = None trainDataLoader = DataHandler(trainDataPath, trainRepsPath, self.vDim, self.hDim, self.batchsize, numBatch, normalize) evalFreq = int(self.readField(self.config, self.name, 'eval_freq')) if evalFreq != 0: qsize = int(self.readField(self.config, self.name, "query_size")) evalPath = self.readField(self.config, self.name, "validation_data") labelPath = self.readField(self.config, self.name, "label") queryPath = self.readField(self.config, self.name, "query") label = np.load(labelPath) eval = Evaluator(queryPath, label, os.path.join(outputDir, 'perf'), self.name, query_size=qsize, verbose=self.verbose) validation_data = gp.garray(np.load(evalPath)) if normalize: validation_data = trainDataLoader.doNormalization( validation_data) maxEpoch = int(self.readField(self.config, self.name, "max_epoch")) nCommon, nMetric, title = self.getDisplayFields() if self.verbose: print title for epoch in range(maxEpoch): perf = np.zeros(nMetric) trainDataLoader.reset() for i in range(numBatch): batch = trainDataLoader.getOneBatch() curr = self.trainOneBatch(batch, epoch, computeStat=True) perf = self.aggregatePerf(perf, curr) if showFreq != 0 and (1 + epoch) % showFreq == 0: validation_code = self.getReps(validation_data) np.save(os.path.join(visDir, '%dvis' % (1 + epoch)), validation_code) if evalFreq != 0 and (1 + epoch) % evalFreq == 0: validation_code = self.getReps(validation_data) eval.evalSingleModal(validation_code, epoch, self.name + 'V') validation_code = None if self.verbose: self.printEpochInfo(epoch, perf, nCommon) if self.readField(self.config, self.name, "checkpoint") == "True": self.doCheckpoint(outputDir) if self.readField(self.config, self.name, "extract_reps") == "True": if evalFreq != 0: validation_reps_path = self.readField(self.config, self.name, "validation_reps") self.extractValidationReps(validation_data, validation_reps_path) self.extractTrainReps(trainDataLoader, numBatch) self.saveConfig(outputDir)
def train(self): outputPrefix = self.readField(self.config, self.name, "output_directory") outputDir = os.path.join(outputPrefix, self.name) if not os.path.exists(outputDir): os.makedirs(outputDir) showFreq = int(self.readField(self.config, self.name, "show_freq")) if showFreq > 0: visDir = os.path.join(outputDir, 'vis') if not os.path.exists(visDir): os.mkdir(visDir) #do normalization for images if they are not normalized before normalize = self.str2bool( self.readField(self.config, self.name, "normalize")) trainDataSize = int( self.readField(self.config, self.name, "train_size")) numBatch = trainDataSize / self.batchsize if self.readField(self.config, self.name, "extract_reps") == "True": trainRepsPath = self.readField(self.config, self.name, "train_reps") else: trainRepsPath = None print trainDataSize #Ehsan trainDataPath = (self.readField(self.config, self.name, 'train_data')).split(',') print trainDataPath trainDataLoader = [None for x in trainDataPath] trainDataFiles = len(trainDataPath) dims = [self.vDim] if self.config.has_option(self.name, 'train_dims'): dimsstr = (self.readField(self.config, self.name, 'train_dims')).split(',') dims = [int(i) for i in dimsstr] for i in range(trainDataFiles): trainDataLoader[i] = DataHandler(trainDataPath[i], trainRepsPath, dims[i], self.hDim, self.batchsize, numBatch, normalize) evalFreq = int(self.readField(self.config, self.name, 'eval_freq')) if evalFreq != 0: qsize = int(self.readField(self.config, self.name, "query_size")) evalPath = self.readField(self.config, self.name, "validation_data") labelPath = self.readField(self.config, self.name, "label") queryPath = self.readField(self.config, self.name, "query") label = np.load(labelPath) eval = Evaluator(queryPath, label, os.path.join(outputDir, 'perf'), self.name, query_size=qsize, verbose=self.verbose) validation_data = gp.garray(np.load(evalPath)) if normalize: validation_data = trainDataLoader.doNormalization( validation_data) maxEpoch = int(self.readField(self.config, self.name, "max_epoch")) nCommon, nMetric, title = self.getDisplayFields() if self.verbose: print title for epoch in range(maxEpoch): perf = np.zeros(nMetric) for tl in trainDataLoader: tl.reset() for i in range(numBatch): batches = [None for x in trainDataLoader] for i in range(len(batches)): batches[i] = trainDataLoader[i].getOneBatch() batch = gp.concatenate(tuple(batches), axis=1) curr = self.trainOneBatch(batch, epoch, computeStat=True) perf = self.aggregatePerf(perf, curr) if showFreq != 0 and (1 + epoch) % showFreq == 0: validation_code = self.getReps(validation_data) np.save(os.path.join(visDir, '%dvis' % (1 + epoch)), validation_code) if evalFreq != 0 and (1 + epoch) % evalFreq == 0: validation_code = self.getReps(validation_data) eval.evalSingleModal(validation_code, epoch, self.name + 'V') validation_code = None if self.verbose: self.printEpochInfo(epoch, perf, nCommon) #Ehsan try: keepDL = self.str2bool( self.readField(self.config, self.name, "keep_dataloader")) if keepDL: print 'saving tdl for ', self.name self.trainDataLoader = trainDataLoader except: print 'exception occured' if self.readField(self.config, self.name, "checkpoint") == "True": self.doCheckpoint(outputDir) if self.readField(self.config, self.name, "extract_reps") == "True": if evalFreq != 0: validation_reps_path = self.readField(self.config, self.name, "validation_reps") self.extractValidationReps(validation_data, validation_reps_path) self.extractTrainReps(trainDataLoader, numBatch) self.saveConfig(outputDir)