Example #1
0
    def commit(self, store, model):
        """Appends the current state of the store to the CSV. This one is meant to be called by the trainer"""
        def _fillLine(csvFile, score, minScore, maxScore, mapName, setLen,
                      outputName, outputFunction, **csvValues):
            line = csvFile.newLine()

            for k, v in csvValues.iteritems():
                line[k] = v
            line["score"] = score[0]
            line["min_score"] = minScore[0]
            line["min_score_commit"] = minScore[1]

            line["max_score"] = maxScore[0]
            line["max_score_commit"] = maxScore[1]

            line["set"] = "%s" % (mapName)
            line["set_size"] = "%s" % (setLen)
            line["output_layer"] = outputName
            line["output_function"] = outputFunction
            line.commit()

        self.length += 1
        if self.csvLegend is None:
            self.csvLegend = store["hyperParameters"].keys()
            self.csvLegend.extend(store["runInfos"].keys())
            self.csvLegend.extend([
                "score", "min_score", "min_score_commit", "max_score",
                "max_score_commit", "set", "set_size", "output_layer",
                "output_function"
            ])

            self.csvFile = CSVFile(legend=self.csvLegend, separator="\t")
            self.csvFile.streamToFile(self.filename, writeRate=self.writeRate)

        muchData = store["hyperParameters"]
        muchData.update(store["runInfos"])

        self.scores.update(store["scores"], store["runInfos"]["epoch"])
        for mapName, os in store["scores"].iteritems():
            for outputName, fs in os.iteritems():
                for functionName in fs:
                    _fillLine(
                        self.csvFile,
                        self.scores.getScore(mapName, outputName,
                                             functionName),
                        self.scores.getMinScore(mapName, outputName,
                                                functionName),
                        self.scores.getMaxScore(mapName, outputName,
                                                functionName), mapName,
                        store["setSizes"][mapName], outputName, functionName,
                        **muchData)

        if self.printRate > 0 and (self.length % self.printRate) == 0:
            self.printCurrentState()

        for w in self.whenToSave:
            if w._shouldISave(self):
                model.save(w.getFilename(self))
Example #2
0
def _importSNPs_AgnosticSNP(setName, species, genomeSource, snpsFile):
    "This function will also create an index on start->chromosomeNumber->setName. Warning : pyGeno wil interpret all positions as 0 based"
    printf('importing SNP set %s for species %s...' % (setName, species))

    snpData = CSVFile()
    snpData.parse(snpsFile, separator="\t")

    AgnosticSNP.dropIndex(('start', 'chromosomeNumber', 'setName'))
    conf.db.beginTransaction()

    pBar = ProgressBar(len(snpData))
    pLabel = ''
    currChrNumber = None
    for snpEntry in snpData:
        tmpChr = snpEntry['chromosomeNumber']
        if tmpChr != currChrNumber:
            currChrNumber = tmpChr
            pLabel = 'Chr %s...' % currChrNumber

        snp = AgnosticSNP()
        snp.species = species
        snp.setName = setName
        for f in snp.getFields():
            try:
                setattr(snp, f, snpEntry[f])
            except KeyError:
                if f != 'species' and f != 'setName':
                    printf("Warning filetype as no key %s", f)
        snp.quality = float(snp.quality)
        snp.start = int(snp.start)
        snp.end = int(snp.end)
        snp.save()
        pBar.update(label=pLabel)

    pBar.close()

    snpMaster = SNPMaster()
    snpMaster.set(setName=setName, SNPType='AgnosticSNP', species=species)
    snpMaster.save()

    printf('saving...')
    conf.db.endTransaction()
    printf('creating indexes...')
    AgnosticSNP.ensureGlobalIndex(('start', 'chromosomeNumber', 'setName'))
    printf('importation of SNP set %s for species %s done.' %
           (setName, species))

    return True
Example #3
0
	def commit(self, store, model) :
		"""Appends the current state of the store to the CSV. This one is meant to be called by the trainer"""
		def _fillLine(csvFile, score, minScore, maxScore, mapName, setLen, outputName, outputFunction, **csvValues) :
			line = csvFile.newLine()

			for k, v in csvValues.iteritems() :
				line[k] = v
			line["score"] = score[0]
			line["min_score"] = minScore[0]
			line["min_score_commit"] = minScore[1]
			
			line["max_score"] = maxScore[0]
			line["max_score_commit"] = maxScore[1]
			
			line["set"] = "%s" %(mapName)
			line["set_size"] = "%s" %(setLen)
			line["output_layer"] = outputName
			line["output_function"] = outputFunction
			line.commit()
		
		self.length += 1
		if self.csvLegend is None :
			self.csvLegend = store["hyperParameters"].keys()
			self.csvLegend.extend(store["runInfos"].keys())
			self.csvLegend.extend( ["score", "min_score", "min_score_commit", "max_score", "max_score_commit", "set", "set_size", "output_layer", "output_function"] )

			self.csvFile = CSVFile(legend = self.csvLegend, separator = "\t")
			self.csvFile.streamToFile( self.filename, writeRate = self.writeRate )

		muchData = store["hyperParameters"]
		muchData.update(store["runInfos"]) 

		self.scores.update(store["scores"], store["runInfos"]["epoch"])
		for mapName, os in store["scores"].iteritems() :
			for outputName, fs in os.iteritems() :
				for functionName in fs :
					_fillLine(
						self.csvFile,
						self.scores.getScore(mapName, outputName, functionName),
						self.scores.getMinScore(mapName, outputName, functionName),
						self.scores.getMaxScore(mapName, outputName, functionName),
						mapName,
						store["setSizes"][mapName],
						outputName,
						functionName,
						**muchData
					)

		self.trainerStore = store
		self.epoch = store["runInfos"]["epoch"]

		if self.printRate > 0 and (self.length%self.printRate) == 0:
			self.printCurrentState()

		for w in self.whenToSave :
			if w._shouldISave(self) :
				model.save(w.getFilename(self))
Example #4
0
    def commit(self, store, model):
        """Appends the current state of the store to the CSV. This one is meant to be called by the trainer"""
        def _fillLine(csvFile, score, bestScore, setName, setLen, outputName,
                      **csvValues):
            line = csvFile.newLine()
            for k, v in csvValues.iteritems():
                line[k] = v
            line["score"] = score
            line["best_score"] = bestScore[0]
            line["best_score_commit"] = bestScore[1]
            line["set"] = "%s(%s)" % (setName, setLen)
            line["output"] = outputName
            line.commit()

        self.length += 1
        if self.csvLegend is None:
            self.csvLegend = store["hyperParameters"].keys()
            self.csvLegend.extend(store["runInfos"].keys())
            self.csvLegend.extend(
                ["score", "best_score", "best_score_commit", "set", "output"])

            self.csvFile = CSVFile(legend=self.csvLegend)
            self.csvFile.streamToFile(self.filename, writeRate=self.writeRate)

        for theSet, scores in store["scores"].iteritems():
            self.currentScores[theSet] = {}
            if theSet not in self.bestScores:
                self.bestScores[theSet] = {}
            for outputName, score in scores.iteritems():
                self.currentScores[theSet][outputName] = score
                if outputName not in self.bestScores[
                        theSet] or score < self.bestScores[theSet][outputName][
                            0]:
                    self.bestScores[theSet][outputName] = (score, self.length)
                    model.save(self.getBestModelFilename(outputName, theSet))

                muchData = store["hyperParameters"]
                muchData.update(store["runInfos"])
                _fillLine(self.csvFile, self.currentScores[theSet][outputName],
                          self.bestScores[theSet][outputName], theSet,
                          store["setSizes"][theSet], outputName, **muchData)

        if self.printRate > 0 and (self.length % self.printRate) == 0:
            self.printCurrentState()
Example #5
0
class CSV(Writer_ABC):
    """This training recorder will create a nice CSV (or tab delimited) file fit for using with ggplot2 and will update
    it as the training goes. It will also save the best model for each set of the trainer, and print
    regular reports on the console.

    :param string filename: The filename of the tsv to be generated. the extension '.ggplot2.tsv' will be added automatically
    :param int write: The rate at which the status is written on disk
    """
    def __init__(self, filename, loggers, separator="\t", writeRate=1):
        self.filename = filename.replace(".tsv", "") + ".ggplot.tsv"
        self.writeRate = writeRate
        self.loggers = loggers

        self.csvFile = None
        self.nbCommits = 0
        self.separator = separator

    def commit(self, trainer):
        """"""

        if self.csvFile is not None:
            line = self.csvFile.newLine()
        else:
            line = {}

        for log in self.loggers:
            for k, v in log.log(trainer):
                line[k] = v

        if self.csvFile is None:
            self.csvFile = CSVFile(legend=line.keys(),
                                   separator=self.separator)
            self.csvFile.streamToFile(self.filename, writeRate=self.writeRate)
            newLine = self.csvFile.newLine()
            for k, v in line.iteritems():
                newLine[k] = v
            line = newLine

        self.nbCommits += 1
        line.commit()

    def __len__(self):
        """returns the number of commits performed"""
        return self.nbCommits
Example #6
0
    def commit(self, trainer):
        """"""

        if self.csvFile is not None:
            line = self.csvFile.newLine()
        else:
            line = {}

        for log in self.loggers:
            for k, v in log.log(trainer):
                line[k] = v

        if self.csvFile is None:
            self.csvFile = CSVFile(legend=line.keys(),
                                   separator=self.separator)
            self.csvFile.streamToFile(self.filename, writeRate=self.writeRate)
            newLine = self.csvFile.newLine()
            for k, v in line.iteritems():
                newLine[k] = v
            line = newLine

        self.nbCommits += 1
        line.commit()
Example #7
0
	def commit(self, store, model) :
		"""Appends the current state of the store to the CSV. This one is meant to be called by the trainer"""
		def _fillLine(csvFile, score, bestScore, setName, setLen, outputName, **csvValues) :
			line = csvFile.newLine()
			for k, v in csvValues.iteritems() :
				line[k] = v
			line["score"] = score
			line["best_score"] = bestScore[0]
			line["best_score_commit"] = bestScore[1]
			line["set"] = "%s(%s)" %(setName, setLen)
			line["output"] = outputName
			line.commit()
		
		self.length += 1
		if self.csvLegend is None :
			self.csvLegend = store["hyperParameters"].keys()
			self.csvLegend.extend(store["runInfos"].keys())
			self.csvLegend.extend( ["score", "best_score", "best_score_commit", "set", "output"] )

			self.csvFile = CSVFile(legend = self.csvLegend)
			self.csvFile.streamToFile( self.filename, writeRate = 1 )

		for theSet, scores in store["scores"].iteritems() :
			self.currentScores[theSet] = {}
			if theSet not in self.bestScores :
				self.bestScores[theSet] = {}
			for outputName, score in scores.iteritems() :
				self.currentScores[theSet][outputName] = score
				if outputName not in self.bestScores[theSet] or score < self.bestScores[theSet][outputName][0] :
					self.bestScores[theSet][outputName] = (score, self.length)
					model.save("best-%s-%s" % (theSet, self.filename))

				muchData = store["hyperParameters"]
				muchData.update(store["runInfos"]) 
				_fillLine(
					self.csvFile,
					self.currentScores[theSet][outputName],
					self.bestScores[theSet][outputName],
					theSet,
					store["setSizes"][theSet],
					outputName,
					**muchData
				)
	
	
		if self.verbose :
			self.printCurrentState()
Example #8
0
class GGPlot2(Recorder_ABC):
 	"""This training recorder will create a nice CSV file fit for using with ggplot2 and will update
 	it as the training gos. It will also save the best model for each set of the trainer, and print
 	regular reports if you tell it to be verbose."""
 	def __init__(self, filename, verbose = True):
		
		self.filename = filename.replace(".csv", "") + ".ggplot2.csv"
		self.verbose = verbose
	
 		self.bestScores = {}
		self.currentScores = {}

		self.csvLegend = None
		self.csvFile = None

		self.length = 0

	def commit(self, store, model) :
		"""Appends the current state of the store to the CSV. This one is meant to be called by the trainer"""
		def _fillLine(csvFile, score, bestScore, setName, setLen, outputName, **csvValues) :
			line = csvFile.newLine()
			for k, v in csvValues.iteritems() :
				line[k] = v
			line["score"] = score
			line["best_score"] = bestScore[0]
			line["best_score_commit"] = bestScore[1]
			line["set"] = "%s(%s)" %(setName, setLen)
			line["output"] = outputName
			line.commit()
		
		self.length += 1
		if self.csvLegend is None :
			self.csvLegend = store["hyperParameters"].keys()
			self.csvLegend.extend(store["runInfos"].keys())
			self.csvLegend.extend( ["score", "best_score", "best_score_commit", "set", "output"] )

			self.csvFile = CSVFile(legend = self.csvLegend)
			self.csvFile.streamToFile( self.filename, writeRate = 1 )

		for theSet, scores in store["scores"].iteritems() :
			self.currentScores[theSet] = {}
			if theSet not in self.bestScores :
				self.bestScores[theSet] = {}
			for outputName, score in scores.iteritems() :
				self.currentScores[theSet][outputName] = score
				if outputName not in self.bestScores[theSet] or score < self.bestScores[theSet][outputName][0] :
					self.bestScores[theSet][outputName] = (score, self.length)
					model.save("best-%s-%s" % (theSet, self.filename))

				muchData = store["hyperParameters"]
				muchData.update(store["runInfos"]) 
				_fillLine(
					self.csvFile,
					self.currentScores[theSet][outputName],
					self.bestScores[theSet][outputName],
					theSet,
					store["setSizes"][theSet],
					outputName,
					**muchData
				)
	
	
		if self.verbose :
			self.printCurrentState()

	def printCurrentState(self) :
		"""prints the current state stored in the recorder"""
		if self.length > 0 :
			print "\n==>rec: ggplot2, commit %s, pid: %s:" % (self.length, os.getpid())
			for setName, scores in self.bestScores.iteritems() :
				print "  |-%s set" % setName
				for outputName in scores :
					if self.currentScores[setName][outputName] == self.bestScores[setName][outputName][0] :
						highlight = "+best+"
					else :
						score, epoch = self.bestScores[setName][outputName]
						highlight = "(best: %s @ commit: %s)" % (score, epoch)
					
					print "    |->%s: %s %s" % (outputName, self.currentScores[setName][outputName], highlight)
		else :
			print "==>rec: ggplot2, nothing to show yet"
		
		sys.stdout.flush()

	def __len__(self) :
		"""returns the number of commits performed"""
		return self.length
Example #9
0
class GGPlot2(Recorder_ABC):
    """This training recorder will create a nice CSV file fit for using with ggplot2 and will update
 	it as the training goes. It will also save the best model for each set of the trainer, and print
 	regular reports on the console.

 	:param int printRate: The rate at which the status is printed on the console. If set to <= to 0, will never print.
 	:param int write: The rate at which the status is written on disk
 	"""
    def __init__(self, filename, printRate=1, writeRate=1):

        self.filename = filename.replace(".csv", "") + ".ggplot2.csv"

        self.bestScores = {}
        self.currentScores = {}

        self.csvLegend = None
        self.csvFile = None

        self.length = 0

        self.printRate = printRate
        self.writeRate = writeRate

    def getBestModelFilename(self, outputName, theSet):
        return "best-%s-%s-%s" % (outputName, theSet, self.filename)

    def commit(self, store, model):
        """Appends the current state of the store to the CSV. This one is meant to be called by the trainer"""
        def _fillLine(csvFile, score, bestScore, setName, setLen, outputName,
                      **csvValues):
            line = csvFile.newLine()
            for k, v in csvValues.iteritems():
                line[k] = v
            line["score"] = score
            line["best_score"] = bestScore[0]
            line["best_score_commit"] = bestScore[1]
            line["set"] = "%s(%s)" % (setName, setLen)
            line["output"] = outputName
            line.commit()

        self.length += 1
        if self.csvLegend is None:
            self.csvLegend = store["hyperParameters"].keys()
            self.csvLegend.extend(store["runInfos"].keys())
            self.csvLegend.extend(
                ["score", "best_score", "best_score_commit", "set", "output"])

            self.csvFile = CSVFile(legend=self.csvLegend)
            self.csvFile.streamToFile(self.filename, writeRate=self.writeRate)

        for theSet, scores in store["scores"].iteritems():
            self.currentScores[theSet] = {}
            if theSet not in self.bestScores:
                self.bestScores[theSet] = {}
            for outputName, score in scores.iteritems():
                self.currentScores[theSet][outputName] = score
                if outputName not in self.bestScores[
                        theSet] or score < self.bestScores[theSet][outputName][
                            0]:
                    self.bestScores[theSet][outputName] = (score, self.length)
                    model.save(self.getBestModelFilename(outputName, theSet))

                muchData = store["hyperParameters"]
                muchData.update(store["runInfos"])
                _fillLine(self.csvFile, self.currentScores[theSet][outputName],
                          self.bestScores[theSet][outputName], theSet,
                          store["setSizes"][theSet], outputName, **muchData)

        if self.printRate > 0 and (self.length % self.printRate) == 0:
            self.printCurrentState()

    def printCurrentState(self):
        """prints the current state stored in the recorder"""
        if self.length > 0:
            print "\n==>rec: ggplot2, commit %s, pid: %s:" % (self.length,
                                                              os.getpid())
            for setName, scores in self.bestScores.iteritems():
                print "  |-%s set" % setName
                for outputName in scores:
                    if self.currentScores[setName][
                            outputName] == self.bestScores[setName][
                                outputName][0]:
                        highlight = "+best+"
                    else:
                        score, epoch = self.bestScores[setName][outputName]
                        highlight = "(best: %s @ commit: %s)" % (score, epoch)

                    print "    |->%s: %s %s" % (
                        outputName, self.currentScores[setName][outputName],
                        highlight)
        else:
            print "==>rec: ggplot2, nothing to show yet"

        sys.stdout.flush()

    def getBestModel(self, outputName, theSet):
        import cPickle
        f = open(
            self.getBestModelFilename(outputName, theSet) + ".mariana.pkl")
        model = cPickle.load(f)
        f.close()
        return model

    def __len__(self):
        """returns the number of commits performed"""
        return self.length
Example #10
0
class GGPlot2(Recorder_ABC):
 	"""This training recorder will create a nice TSV (tab delimited) file fit for using with ggplot2 and will update
 	it as the training goes. It will also save the best model for each set of the trainer, and print
 	regular reports on the console.

 	:param string filename: The filename of the tsv to be generated. the extension '.ggplot2.tsv' will be added automatically
 	:param list whenToSave: List of saving rules.
 	:param int printRate: The rate at which the status is printed on the console. If set to <= to 0, will never print.
 	:param int write: The rate at which the status is written on disk
 	"""

 	def __init__(self, filename, whenToSave = [], printRate=1, writeRate=1):
		
		self.filename = filename.replace(".tsv", "") + ".ggplot2.tsv"
		self.scores = Scores()

		self.csvLegend = None
		self.csvFile = None

		self.length = 0

		self.printRate = printRate
		self.writeRate = writeRate
		self.whenToSave = whenToSave

	def commit(self, store, model) :
		"""Appends the current state of the store to the CSV. This one is meant to be called by the trainer"""
		def _fillLine(csvFile, score, minScore, maxScore, mapName, setLen, outputName, outputFunction, **csvValues) :
			line = csvFile.newLine()

			for k, v in csvValues.iteritems() :
				line[k] = v
			line["score"] = score[0]
			line["min_score"] = minScore[0]
			line["min_score_commit"] = minScore[1]
			
			line["max_score"] = maxScore[0]
			line["max_score_commit"] = maxScore[1]
			
			line["set"] = "%s" %(mapName)
			line["set_size"] = "%s" %(setLen)
			line["output_layer"] = outputName
			line["output_function"] = outputFunction
			line.commit()
		
		self.length += 1
		if self.csvLegend is None :
			self.csvLegend = store["hyperParameters"].keys()
			self.csvLegend.extend(store["runInfos"].keys())
			self.csvLegend.extend( ["score", "min_score", "min_score_commit", "max_score", "max_score_commit", "set", "set_size", "output_layer", "output_function"] )

			self.csvFile = CSVFile(legend = self.csvLegend, separator = "\t")
			self.csvFile.streamToFile( self.filename, writeRate = self.writeRate )

		muchData = store["hyperParameters"]
		muchData.update(store["runInfos"]) 

		self.scores.update(store["scores"], store["runInfos"]["epoch"])
		for mapName, os in store["scores"].iteritems() :
			for outputName, fs in os.iteritems() :
				for functionName in fs :
					_fillLine(
						self.csvFile,
						self.scores.getScore(mapName, outputName, functionName),
						self.scores.getMinScore(mapName, outputName, functionName),
						self.scores.getMaxScore(mapName, outputName, functionName),
						mapName,
						store["setSizes"][mapName],
						outputName,
						functionName,
						**muchData
					)

		if self.printRate > 0 and (self.length%self.printRate) == 0:
			self.printCurrentState()

		for w in self.whenToSave :
			if w._shouldISave(self) :
				model.save(w.getFilename(self))

	def printCurrentState(self) :
		"""prints the current state stored in the recorder"""
		if self.length > 0 :
			print "\n==>rec: ggplot2, commit %s, pid: %s:" % (self.length, os.getpid())
			for mapName, outs in self.scores.currentScores.iteritems() :
				print "  |-%s set" % mapName
				for outputName, fs in outs.iteritems() :
					print "    |-%s" % outputName
					for functionName in fs :
						s = self.scores.getScore(mapName, outputName, functionName)
						mi = self.scores.getMinScore(mapName, outputName, functionName)
						ma = self.scores.getMaxScore(mapName, outputName, functionName)

						highlight = []
						if s[0] == mi[0] :
							highlight.append("+min+")
						else :
							highlight.append("%s@%s" % (mi[0], mi[1]))

						if s[0] == ma[0] :
							highlight.append("+max+")
						else :
							highlight.append("%s@%s" % (ma[0], ma[1]))

						print "      |-%s: %s [%s]" % (functionName, s[0], "; ".join(highlight))
		else :
			print "==>rec: ggplot2, nothing to show yet"
		
		sys.stdout.flush()

	def __repr__(self):
		return "<recorder: %s, filename: %s>" % (self.__class__.__name__, self.filename)

	def __len__(self) :
		"""returns the number of commits performed"""
		return self.length
Example #11
0
class GGPlot2(Recorder_ABC):
    """This training recorder will create a nice CSV file fit for using with ggplot2. It will also print regular
 	reports if you tell it to be verbose and save the best models"""
    def __init__(self, filename, verbose=True):

        self.filename = filename.replace(".csv", "") + ".ggplot2.csv"
        self.verbose = verbose

        self.bestScores = {}
        self.currentScores = {}

        self.csvLegend = None
        self.csvFile = None

        self.length = 0

    def commit(self, store, model):
        """Appends the current state of the store to the CSV """
        def _fillLine(csvFile, score, bestScore, setName, setLen, outputName,
                      **csvValues):
            line = csvFile.newLine()
            for k, v in csvValues.iteritems():
                line[k] = v
            line["score"] = score
            line["best_score"] = bestScore[0]
            line["best_score_commit"] = bestScore[1]
            line["set"] = "%s(%s)" % (setName, setLen)
            line["output"] = outputName
            line.commit()

        self.length += 1
        if self.csvLegend is None:
            self.csvLegend = store["hyperParameters"].keys()
            self.csvLegend.extend(store["runInfos"].keys())
            self.csvLegend.extend(
                ["score", "best_score", "best_score_commit", "set", "output"])

            self.csvFile = CSVFile(legend=self.csvLegend)
            self.csvFile.streamToFile(self.filename, writeRate=1)

        for theSet, scores in store["scores"].iteritems():
            self.currentScores[theSet] = {}
            if theSet not in self.bestScores:
                self.bestScores[theSet] = {}
            for outputName, score in scores.iteritems():
                self.currentScores[theSet][outputName] = score
                if outputName not in self.bestScores[
                        theSet] or score < self.bestScores[theSet][outputName][
                            0]:
                    self.bestScores[theSet][outputName] = (score, self.length)
                    model.save("best-%s-%s" % (theSet, self.filename))

                muchData = store["hyperParameters"]
                muchData.update(store["runInfos"])
                _fillLine(self.csvFile, self.currentScores[theSet][outputName],
                          self.bestScores[theSet][outputName], theSet,
                          store["setSizes"][theSet], outputName, **muchData)

        if self.verbose:
            self.printCurrentState()

    def printCurrentState(self):
        """prints the current state stored in the recorder"""
        if self.length > 0:
            print "\n==>rec: ggplot2, commit %s, pid: %s:" % (self.length,
                                                              os.getpid())
            for setName, scores in self.bestScores.iteritems():
                print "  |-%s set" % setName
                for outputName in scores:
                    if self.currentScores[setName][
                            outputName] == self.bestScores[setName][
                                outputName][0]:
                        highlight = "+best+"
                    else:
                        score, epoch = self.bestScores[setName][outputName]
                        highlight = "(best: %s @ commit: %s)" % (score, epoch)

                    print "    |->%s: %s %s" % (
                        outputName, self.currentScores[setName][outputName],
                        highlight)
        else:
            print "==>rec: ggplot2, nothing to show yet"

        sys.stdout.flush()

    def __len__(self):
        """returns the number of commits performed"""
        return self.length
Example #12
0
class GGPlot(Recorder_ABC):
    """This training recorder will create a nice TSV (tab delimited) file fit for using with ggplot2 and will update
 	it as the training goes. It will also save the best model for each set of the trainer, and print
 	regular reports on the console.

 	:param string filename: The filename of the tsv to be generated. the extension '.ggplot2.tsv' will be added automatically
 	:param list whenToSave: List of saving rules.
 	:param int printRate: The rate at which the status is printed on the console. If set to <= to 0, will never print.
 	:param int write: The rate at which the status is written on disk
 	"""
    def __init__(self, filename, whenToSave=[], printRate=1, writeRate=1):

        self.filename = filename.replace(".tsv", "") + ".ggplot.tsv"
        self.scores = Scores()

        self.csvLegend = None
        self.csvFile = None

        self.length = 0
        self.epoch = 0

        self.trainerStore = None

        self.printRate = printRate
        self.writeRate = writeRate
        self.whenToSave = whenToSave

    def commit(self, store, model):
        """Appends the current state of the store to the CSV. This one is meant to be called by the trainer"""
        def _fillLine(csvFile, score, minScore, maxScore, mapName, setLen,
                      outputName, outputFunction, **csvValues):
            line = csvFile.newLine()

            for k, v in csvValues.iteritems():
                line[k] = v
            line["score"] = score[0]
            line["min_score"] = minScore[0]
            line["min_score_commit"] = minScore[1]

            line["max_score"] = maxScore[0]
            line["max_score_commit"] = maxScore[1]

            line["set"] = "%s" % (mapName)
            line["set_size"] = "%s" % (setLen)
            line["output_layer"] = outputName
            line["output_function"] = outputFunction
            line.commit()

        self.length += 1
        if self.csvLegend is None:
            self.csvLegend = store["hyperParameters"].keys()
            self.csvLegend.extend(store["runInfos"].keys())
            self.csvLegend.extend([
                "score", "min_score", "min_score_commit", "max_score",
                "max_score_commit", "set", "set_size", "output_layer",
                "output_function"
            ])

            self.csvFile = CSVFile(legend=self.csvLegend, separator="\t")
            self.csvFile.streamToFile(self.filename, writeRate=self.writeRate)

        muchData = store["hyperParameters"]
        muchData.update(store["runInfos"])

        self.scores.update(store["scores"], store["runInfos"]["epoch"])
        for mapName, os in store["scores"].iteritems():
            for outputName, fs in os.iteritems():
                for functionName in fs:
                    _fillLine(
                        self.csvFile,
                        self.scores.getScore(mapName, outputName,
                                             functionName),
                        self.scores.getMinScore(mapName, outputName,
                                                functionName),
                        self.scores.getMaxScore(mapName, outputName,
                                                functionName), mapName,
                        store["setSizes"][mapName], outputName, functionName,
                        **muchData)

        self.trainerStore = store
        self.epoch = store["runInfos"]["epoch"]

        if self.printRate > 0 and (self.length % self.printRate) == 0:
            self.printCurrentState()

        for w in self.whenToSave:
            if w._shouldISave(self):
                model.save(w.getFilename(self))

    def printCurrentState(self):
        """prints the current state stored in the recorder"""
        if self.length > 0:
            print "\n==>rec: ggplot2, epoch %s, commit %s, pid: %s:" % (
                self.epoch, self.length, os.getpid())
            for mapName, outs in self.scores.currentScores.iteritems():
                print "  |-%s set" % mapName
                for outputName, fs in outs.iteritems():
                    print "    |-%s" % outputName
                    for functionName in fs:
                        s = self.scores.getScore(mapName, outputName,
                                                 functionName)
                        mi = self.scores.getMinScore(mapName, outputName,
                                                     functionName)
                        ma = self.scores.getMaxScore(mapName, outputName,
                                                     functionName)

                        highlight = []
                        if s[0] == mi[0]:
                            highlight.append("+min+")
                        else:
                            highlight.append("%s@%s" % (mi[0], mi[1]))

                        if s[0] == ma[0]:
                            highlight.append("+max+")
                        else:
                            highlight.append("%s@%s" % (ma[0], ma[1]))

                        print "      |-%s: %s [%s]" % (functionName, s[0],
                                                       "; ".join(highlight))
        else:
            print "==>rec: ggplot2, nothing to show yet"

        sys.stdout.flush()

    def __repr__(self):
        return "<recorder: %s, filename: %s>" % (self.__class__.__name__,
                                                 self.filename)

    def __len__(self):
        """returns the number of commits performed"""
        return self.length