def main(self, argv): """Callable from Command line""" if argv is None: argv = sys.argv usageStr = \ """usage: %prog [options] archModelInFile inData inTarg archModelOutFile archModelInFile - MonteArchModel as created with MakeMonteArchModel.py with the details of the architecture. inData - a gzipped FeatureDict file inTarg - a text file with 'targcolumn' indicates which is the target to train to. Fmt: (idx, originalId, [target1, target2]) archModelOutFile - Where to save the trained (and intermediate) models """ parser = OptionParser(usage=usageStr) parser.add_option( '--targcolumn', dest='targcolumn', type='int', default=2, help= "Set which column of the target file is the actual target. (default=%default)" ) parser.add_option( '--nosaveint', dest='saveFile', action='store_false', default=True, help= 'If set, don\'t save the latest archmodel at the end of every epoch.' ) (options, args) = parser.parse_args(argv[1:]) if len(args) == 4: self.options = options self.saveFile = self.options.saveFile self.targcolumn = self.options.targcolumn archModelInFile = args[0] inDataFile = args[1] inTargDataFile = args[2] self.archModelOutFile = args[3] self.setup(inDataFile, inTargDataFile, archModelInFile) # PreStats on the data self.classifier self.classifier.postEpochCall(-1) self.classifier.train() saveArchModel(self.classifier.archModel, self.archModelOutFile) self.runStats() else: parser.print_help() sys.exit(2)
def setUp(self): """Set up anything for the tests.., """ super(TestFDictPairPredictor, self).setUp(); (self.ARCH_FD, self.ARCH_FILENAME) = tempfile.mkstemp(); (self.FEAT_FD, self.FEAT_FILENAME) = tempfile.mkstemp(); (self.OUT_FD, self.OUT_FILENAME) = tempfile.mkstemp(); self.LOW_DIST_MEANS = array([10, -10, 5, -5]); #self.LOW_DIST_MEANS = array([100, -100, 50, -50]); self.LOW_DIST_VARS = array([5, 5, 5, 5]) self.HIGH_DIST_MEANS = array([3, 2, 0, 5]); self.HIGH_DIST_VARS = array([3, 2, 5, 5]); self.NUM_PAIRS = 1000; self.lowFeatDictList = self.__generateFeatDictList(self.NUM_PAIRS, self.LOW_DIST_MEANS, self.LOW_DIST_VARS); self.highFeatDictList = self.__generateFeatDictList(self.NUM_PAIRS, self.HIGH_DIST_MEANS, self.HIGH_DIST_VARS); self.FEAT_DATA = []; self.FEAT_DATA.extend(self.lowFeatDictList) self.FEAT_DATA.extend(self.highFeatDictList) # Write out the fdict data ofs = gzip.open(self.FEAT_FILENAME, 'w') writer = FeatureDictWriter(ofs) for iLine, fd in enumerate(self.FEAT_DATA): writer.update(fd, str(iLine)) ofs.close(); self.EXPECTED_NUM_OUTPUTLINES = 2 * self.NUM_PAIRS; # Set up an arch model: archModel = MonteArchModel(); archModel.paramVar = 0.01 archModel.numhidden = 0; archModel.numfeats = len(self.LOW_DIST_MEANS); archModel.l2decay = 0.001; archModel.gradientChunkSize = 500; archModel.onlineChunkSize = 4000; archModel.cgIterations = 2; archModel.batch = False; archModel.numEpochs = 50; archModel.trainertype = 'gdescadapt' archModel.qLearningRate = 0.05; archModel.exponentAvgM = 0.95; archModel.learningrate = 0.1; archModel.setupParams(); self.ARCH_MDL = archModel; saveArchModel(self.ARCH_MDL, self.ARCH_FILENAME);
def main(self, argv): """Callable from Command line""" if argv is None: argv = sys.argv usageStr = \ """usage: %prog [options] archModelInFile inData inProbArr archModelOutFile archModelInFile - pickled MonteArchModel file with the machine setup inData - zipped FeatureDict file inProbArr - space delim file with (lIdx, lDbId, rIdx, rDbId) per line archModelOutFile - filename to place final (and intermediate) trained model results """ parser = OptionParser(usage=usageStr) parser.add_option( '--nosaveint', dest='saveFile', action='store_false', default=True, help= 'If set, don\'t save the latest archmodel at the end of every epoch.' ) (options, args) = parser.parse_args(argv[1:]) if len(args) == 4: self.options = options self.saveFile = self.options.saveFile archModelInFile = args[0] inDataFile = args[1] inProbArrFile = args[2] self.archModelOutFile = args[3] self.setup(inDataFile, inProbArrFile, archModelInFile) # Run some pre training stats self.classifier.postEpochCall(-1) self.classifier.train() saveArchModel(self.classifier.archModel, self.archModelOutFile) self.runStats() else: parser.print_help() sys.exit(2)
def main(self, argv): """Callable from Command line""" if argv is None: argv = sys.argv usageStr = \ """usage: %prog [options] archModelInFile inData inProbArr archModelOutFile archModelInFile - pickled MonteArchModel file with the machine setup inData - zipped FeatureDict file inProbArr - space delim file with (lIdx, lDbId, rIdx, rDbId) per line archModelOutFile - filename to place final (and intermediate) trained model results """ parser = OptionParser(usage = usageStr); parser.add_option('--nosaveint', dest='saveFile', action='store_false', default=True, help='If set, don\'t save the latest archmodel at the end of every epoch.') (options, args) = parser.parse_args(argv[1:]) if len(args) == 4: self.options = options; self.saveFile = self.options.saveFile; archModelInFile = args[0] inDataFile = args[1] inProbArrFile = args[2] self.archModelOutFile = args[3]; self.setup(inDataFile, inProbArrFile, archModelInFile); # Run some pre training stats self.classifier.postEpochCall(-1) self.classifier.train() saveArchModel(self.classifier.archModel, self.archModelOutFile); self.runStats(); else: parser.print_help(); sys.exit(2);
def postEpochCallback(self, classifier): """Callback method for the end of every epoch.""" if self.saveFile: self.classifier.archModel.costTrajectory = self.classifier.costTrajectory; saveArchModel(self.classifier.archModel, self.archModelOutFile);
def setUp(self): """Set up anything for the tests.., """ super(TestFDictClassPredictor, self).setUp(); # a file for the archmodel out (self.AMODELIN_FD, self.AMODELIN_FILENAME) = tempfile.mkstemp(); (self.FDICT_FD, self.FDICT_FILENAME) = tempfile.mkstemp(); (self.IDX_FD, self.IDX_FILENAME) = tempfile.mkstemp(); # A file for the complete output (self.OUT_FD, self.OUT_FILENAME) = tempfile.mkstemp(); # Set up an ArchModel self.NUMFEATS = 3; self.NUMDATA_POS = 1000 self.NUMDATA_NEG = self.NUMDATA_POS; self.ARCHMODEL = MonteArchModel(); self.ARCHMODEL.numfeats = self.NUMFEATS; self.ARCHMODEL.numEpochs = 15; self.ARCHMODEL.batch = False; self.ARCHMODEL.gradientChunkSize=1000; self.ARCHMODEL.l2decay = 0; self.ARCHMODEL.onlineChunkSize = 5000; self.ARCHMODEL.numhidden = 10; self.ARCHMODEL.trainertype = 'gdescadapt'; self.ARCHMODEL.qLearningRate = 0.05; self.ARCHMODEL.exponentAvgM = 0.95; self.ARCHMODEL.learningrate = 0.1; self.ARCHMODEL.setupParams(); saveArchModel(self.ARCHMODEL, self.AMODELIN_FILENAME); # Set up a data and targ arr (should be imbalanced.) self.POS_MEANS = [-20, -50, -10] self.NEG_MEANS = [1, -2, -3] self.POS_DATA = self.POS_MEANS * randn(self.NUMDATA_POS, self.NUMFEATS); self.NEG_DATA = self.NEG_MEANS * randn(self.NUMDATA_NEG, self.NUMFEATS); self.DATA_ARR = concatenate((self.POS_DATA, self.NEG_DATA), 0); # (And because all the data is in similar forms) self.DATA_ARR /= 10.0; self.FEAT_DATA = []; for iRow in range(self.DATA_ARR.shape[0]): d = {}; for iCol in range(self.DATA_ARR.shape[1]): d[iCol] = self.DATA_ARR[iRow,iCol]; self.FEAT_DATA.append(d); # Write out the fdict stuff ofs = gzip.open(self.FDICT_FILENAME, 'w'); writer = FeatureDictWriter(ofs); for iRow, d in enumerate(self.FEAT_DATA): writer.update(d, str(iRow)); ofs.close(); # construct the idxArr data posIdx = range(len(self.POS_DATA)); negIdx = range(len(self.NEG_DATA), len(self.POS_DATA) + len(self.NEG_DATA)); self.IDX_ARR = []; for aPos in posIdx: self.IDX_ARR.append([aPos, aPos, 1.0, 0.0]); for aNeg in negIdx: self.IDX_ARR.append([aNeg, aNeg, 0.0, 1.0]); ofs = open(self.IDX_FILENAME, 'w'); writer = csv.writer(ofs, quoting=csv.QUOTE_NONE); for line in self.IDX_ARR: writer.writerow(line); ofs.close();
def setUp(self): """Set up anything for the tests.., """ super(TestFDictClassPredictor, self).setUp() # a file for the archmodel out (self.AMODELIN_FD, self.AMODELIN_FILENAME) = tempfile.mkstemp() (self.FDICT_FD, self.FDICT_FILENAME) = tempfile.mkstemp() (self.IDX_FD, self.IDX_FILENAME) = tempfile.mkstemp() # A file for the complete output (self.OUT_FD, self.OUT_FILENAME) = tempfile.mkstemp() # Set up an ArchModel self.NUMFEATS = 3 self.NUMDATA_POS = 1000 self.NUMDATA_NEG = self.NUMDATA_POS self.ARCHMODEL = MonteArchModel() self.ARCHMODEL.numfeats = self.NUMFEATS self.ARCHMODEL.numEpochs = 15 self.ARCHMODEL.batch = False self.ARCHMODEL.gradientChunkSize = 1000 self.ARCHMODEL.l2decay = 0 self.ARCHMODEL.onlineChunkSize = 5000 self.ARCHMODEL.numhidden = 10 self.ARCHMODEL.trainertype = 'gdescadapt' self.ARCHMODEL.qLearningRate = 0.05 self.ARCHMODEL.exponentAvgM = 0.95 self.ARCHMODEL.learningrate = 0.1 self.ARCHMODEL.setupParams() saveArchModel(self.ARCHMODEL, self.AMODELIN_FILENAME) # Set up a data and targ arr (should be imbalanced.) self.POS_MEANS = [-20, -50, -10] self.NEG_MEANS = [1, -2, -3] self.POS_DATA = self.POS_MEANS * randn(self.NUMDATA_POS, self.NUMFEATS) self.NEG_DATA = self.NEG_MEANS * randn(self.NUMDATA_NEG, self.NUMFEATS) self.DATA_ARR = concatenate((self.POS_DATA, self.NEG_DATA), 0) # (And because all the data is in similar forms) self.DATA_ARR /= 10.0 self.FEAT_DATA = [] for iRow in range(self.DATA_ARR.shape[0]): d = {} for iCol in range(self.DATA_ARR.shape[1]): d[iCol] = self.DATA_ARR[iRow, iCol] self.FEAT_DATA.append(d) # Write out the fdict stuff ofs = gzip.open(self.FDICT_FILENAME, 'w') writer = FeatureDictWriter(ofs) for iRow, d in enumerate(self.FEAT_DATA): writer.update(d, str(iRow)) ofs.close() # construct the idxArr data posIdx = range(len(self.POS_DATA)) negIdx = range(len(self.NEG_DATA), len(self.POS_DATA) + len(self.NEG_DATA)) self.IDX_ARR = [] for aPos in posIdx: self.IDX_ARR.append([aPos, aPos, 1.0, 0.0]) for aNeg in negIdx: self.IDX_ARR.append([aNeg, aNeg, 0.0, 1.0]) ofs = open(self.IDX_FILENAME, 'w') writer = csv.writer(ofs, quoting=csv.QUOTE_NONE) for line in self.IDX_ARR: writer.writerow(line) ofs.close()
def postEpochCallback(self, classifier): """Callback method for the end of every epoch.""" if self.saveFile: self.classifier.archModel.costTrajectory = self.classifier.costTrajectory saveArchModel(self.classifier.archModel, self.archModelOutFile)