コード例 #1
0
    def main(self, argv):
        """Callable from Command line"""
        if argv is None:
            argv = sys.argv

        usageStr = \
            """usage: %prog [options] archModelInFile inData inTarg archModelOutFile 
            
            archModelInFile - MonteArchModel as created with MakeMonteArchModel.py with the 
                details of the architecture.  
            inData - a gzipped FeatureDict file
            inTarg - a text file with 'targcolumn' indicates which is the target to train to.
                Fmt: (idx, originalId, [target1, target2])
            archModelOutFile - Where to save the trained (and intermediate) models
            """

        parser = OptionParser(usage=usageStr)
        parser.add_option(
            '--targcolumn',
            dest='targcolumn',
            type='int',
            default=2,
            help=
            "Set which column of the target file is the actual target. (default=%default)"
        )
        parser.add_option(
            '--nosaveint',
            dest='saveFile',
            action='store_false',
            default=True,
            help=
            'If set, don\'t save the latest archmodel at the end of every epoch.'
        )
        (options, args) = parser.parse_args(argv[1:])

        if len(args) == 4:
            self.options = options
            self.saveFile = self.options.saveFile
            self.targcolumn = self.options.targcolumn

            archModelInFile = args[0]
            inDataFile = args[1]
            inTargDataFile = args[2]
            self.archModelOutFile = args[3]

            self.setup(inDataFile, inTargDataFile, archModelInFile)
            # PreStats on the data self.classifier
            self.classifier.postEpochCall(-1)
            self.classifier.train()
            saveArchModel(self.classifier.archModel, self.archModelOutFile)
            self.runStats()
        else:
            parser.print_help()
            sys.exit(2)
コード例 #2
0
 def setUp(self):
     """Set up anything for the tests.., """
     super(TestFDictPairPredictor, self).setUp();
     
     (self.ARCH_FD, self.ARCH_FILENAME) = tempfile.mkstemp();
     (self.FEAT_FD, self.FEAT_FILENAME) = tempfile.mkstemp();
     (self.OUT_FD, self.OUT_FILENAME) = tempfile.mkstemp();
     
     self.LOW_DIST_MEANS = array([10, -10, 5, -5]);
     #self.LOW_DIST_MEANS = array([100, -100, 50, -50]);
     self.LOW_DIST_VARS = array([5, 5, 5, 5])
     
     self.HIGH_DIST_MEANS = array([3, 2, 0, 5]);
     self.HIGH_DIST_VARS = array([3, 2, 5, 5]);
     
     self.NUM_PAIRS = 1000;
     self.lowFeatDictList = self.__generateFeatDictList(self.NUM_PAIRS, self.LOW_DIST_MEANS, self.LOW_DIST_VARS);
     self.highFeatDictList = self.__generateFeatDictList(self.NUM_PAIRS, self.HIGH_DIST_MEANS, self.HIGH_DIST_VARS);
     
     self.FEAT_DATA = [];
     self.FEAT_DATA.extend(self.lowFeatDictList)
     self.FEAT_DATA.extend(self.highFeatDictList)
     
     # Write out the fdict data
     ofs = gzip.open(self.FEAT_FILENAME, 'w')
     writer = FeatureDictWriter(ofs)
     for iLine, fd in enumerate(self.FEAT_DATA):
         writer.update(fd, str(iLine))
     ofs.close();
     
     self.EXPECTED_NUM_OUTPUTLINES = 2 * self.NUM_PAIRS;
     
     # Set up an arch model:
     archModel = MonteArchModel();
     archModel.paramVar = 0.01
     archModel.numhidden = 0;
     archModel.numfeats = len(self.LOW_DIST_MEANS);
     archModel.l2decay = 0.001;
     archModel.gradientChunkSize = 500;
     archModel.onlineChunkSize = 4000;
     archModel.cgIterations = 2;
     archModel.batch = False;
     archModel.numEpochs = 50;
     archModel.trainertype = 'gdescadapt'
     archModel.qLearningRate = 0.05;
     archModel.exponentAvgM = 0.95;
     archModel.learningrate = 0.1;
     archModel.setupParams();
     self.ARCH_MDL = archModel;
     
     saveArchModel(self.ARCH_MDL, self.ARCH_FILENAME);
コード例 #3
0
    def main(self, argv):
        """Callable from Command line"""
        if argv is None:
            argv = sys.argv

        usageStr = \
            """usage: %prog [options] archModelInFile inData inProbArr archModelOutFile
            
            archModelInFile - pickled MonteArchModel file with the machine setup
            inData - zipped FeatureDict file
            inProbArr - space delim file with (lIdx, lDbId, rIdx, rDbId) per line
            archModelOutFile - filename to place final (and intermediate) trained model results
            """

        parser = OptionParser(usage=usageStr)
        parser.add_option(
            '--nosaveint',
            dest='saveFile',
            action='store_false',
            default=True,
            help=
            'If set, don\'t save the latest archmodel at the end of every epoch.'
        )
        (options, args) = parser.parse_args(argv[1:])

        if len(args) == 4:
            self.options = options
            self.saveFile = self.options.saveFile

            archModelInFile = args[0]
            inDataFile = args[1]
            inProbArrFile = args[2]
            self.archModelOutFile = args[3]

            self.setup(inDataFile, inProbArrFile, archModelInFile)

            # Run some pre training stats
            self.classifier.postEpochCall(-1)

            self.classifier.train()
            saveArchModel(self.classifier.archModel, self.archModelOutFile)
            self.runStats()
        else:
            parser.print_help()
            sys.exit(2)
コード例 #4
0
    def main(self, argv):
        """Callable from Command line"""
        if argv is None:
            argv = sys.argv
        
        usageStr = \
            """usage: %prog [options] archModelInFile inData inProbArr archModelOutFile
            
            archModelInFile - pickled MonteArchModel file with the machine setup
            inData - zipped FeatureDict file
            inProbArr - space delim file with (lIdx, lDbId, rIdx, rDbId) per line
            archModelOutFile - filename to place final (and intermediate) trained model results
            """
        
        parser = OptionParser(usage = usageStr);
        parser.add_option('--nosaveint', dest='saveFile', action='store_false', default=True,
            help='If set, don\'t save the latest archmodel at the end of every epoch.')
        (options, args) = parser.parse_args(argv[1:])
        
        if len(args) == 4:
            self.options = options;
            self.saveFile = self.options.saveFile;
            
            archModelInFile = args[0]
            inDataFile = args[1]
            inProbArrFile = args[2]
            self.archModelOutFile = args[3];
            
            self.setup(inDataFile, inProbArrFile, archModelInFile);
            
            # Run some pre training stats
            self.classifier.postEpochCall(-1)

            self.classifier.train()
            saveArchModel(self.classifier.archModel, self.archModelOutFile);
            self.runStats();
        else:
            parser.print_help();
            sys.exit(2);
コード例 #5
0
 def postEpochCallback(self, classifier):
     """Callback method for the end of every epoch."""
     if self.saveFile:
         self.classifier.archModel.costTrajectory = self.classifier.costTrajectory;
         saveArchModel(self.classifier.archModel, self.archModelOutFile);
コード例 #6
0
 def setUp(self):
     """Set up anything for the tests.., """
     super(TestFDictClassPredictor, self).setUp();
     
     # a file for the archmodel out
     (self.AMODELIN_FD, self.AMODELIN_FILENAME) = tempfile.mkstemp();
     (self.FDICT_FD, self.FDICT_FILENAME) = tempfile.mkstemp();
     (self.IDX_FD, self.IDX_FILENAME) = tempfile.mkstemp();
     # A file for the complete output
     (self.OUT_FD, self.OUT_FILENAME) = tempfile.mkstemp();
     
     # Set up an ArchModel
     self.NUMFEATS = 3;
     self.NUMDATA_POS = 1000
     self.NUMDATA_NEG = self.NUMDATA_POS;
     self.ARCHMODEL = MonteArchModel();
     self.ARCHMODEL.numfeats = self.NUMFEATS;
     self.ARCHMODEL.numEpochs = 15;
     self.ARCHMODEL.batch = False;
     self.ARCHMODEL.gradientChunkSize=1000;
     self.ARCHMODEL.l2decay = 0;
     self.ARCHMODEL.onlineChunkSize = 5000;
     self.ARCHMODEL.numhidden = 10;
     self.ARCHMODEL.trainertype = 'gdescadapt';
     self.ARCHMODEL.qLearningRate = 0.05;
     self.ARCHMODEL.exponentAvgM = 0.95;
     self.ARCHMODEL.learningrate = 0.1;
     self.ARCHMODEL.setupParams();
     saveArchModel(self.ARCHMODEL, self.AMODELIN_FILENAME);
     
     # Set up a data and targ arr (should be imbalanced.)
     self.POS_MEANS = [-20, -50, -10]
     self.NEG_MEANS = [1, -2, -3]
     self.POS_DATA = self.POS_MEANS * randn(self.NUMDATA_POS, self.NUMFEATS);
     self.NEG_DATA = self.NEG_MEANS * randn(self.NUMDATA_NEG, self.NUMFEATS);
     self.DATA_ARR = concatenate((self.POS_DATA, self.NEG_DATA), 0);
     # (And because all the data is in similar forms)
     self.DATA_ARR /= 10.0;
     
     self.FEAT_DATA = [];
     for iRow in range(self.DATA_ARR.shape[0]):
         d = {};
         for iCol in range(self.DATA_ARR.shape[1]):
             d[iCol] = self.DATA_ARR[iRow,iCol];
         self.FEAT_DATA.append(d);
     
     # Write out the fdict stuff
     ofs = gzip.open(self.FDICT_FILENAME, 'w');
     writer = FeatureDictWriter(ofs);
     for iRow, d in enumerate(self.FEAT_DATA):
         writer.update(d, str(iRow));
     ofs.close();
     
     # construct the idxArr data
     posIdx = range(len(self.POS_DATA));
     negIdx = range(len(self.NEG_DATA), len(self.POS_DATA) + len(self.NEG_DATA));
     
     self.IDX_ARR = [];
     for aPos in posIdx:
         self.IDX_ARR.append([aPos, aPos, 1.0, 0.0]);
     for aNeg in negIdx:
         self.IDX_ARR.append([aNeg, aNeg, 0.0, 1.0]);
     
     ofs = open(self.IDX_FILENAME, 'w');
     writer = csv.writer(ofs, quoting=csv.QUOTE_NONE);
     for line in self.IDX_ARR:
         writer.writerow(line);
     ofs.close();
コード例 #7
0
    def setUp(self):
        """Set up anything for the tests.., """
        super(TestFDictClassPredictor, self).setUp()

        # a file for the archmodel out
        (self.AMODELIN_FD, self.AMODELIN_FILENAME) = tempfile.mkstemp()
        (self.FDICT_FD, self.FDICT_FILENAME) = tempfile.mkstemp()
        (self.IDX_FD, self.IDX_FILENAME) = tempfile.mkstemp()
        # A file for the complete output
        (self.OUT_FD, self.OUT_FILENAME) = tempfile.mkstemp()

        # Set up an ArchModel
        self.NUMFEATS = 3
        self.NUMDATA_POS = 1000
        self.NUMDATA_NEG = self.NUMDATA_POS
        self.ARCHMODEL = MonteArchModel()
        self.ARCHMODEL.numfeats = self.NUMFEATS
        self.ARCHMODEL.numEpochs = 15
        self.ARCHMODEL.batch = False
        self.ARCHMODEL.gradientChunkSize = 1000
        self.ARCHMODEL.l2decay = 0
        self.ARCHMODEL.onlineChunkSize = 5000
        self.ARCHMODEL.numhidden = 10
        self.ARCHMODEL.trainertype = 'gdescadapt'
        self.ARCHMODEL.qLearningRate = 0.05
        self.ARCHMODEL.exponentAvgM = 0.95
        self.ARCHMODEL.learningrate = 0.1
        self.ARCHMODEL.setupParams()
        saveArchModel(self.ARCHMODEL, self.AMODELIN_FILENAME)

        # Set up a data and targ arr (should be imbalanced.)
        self.POS_MEANS = [-20, -50, -10]
        self.NEG_MEANS = [1, -2, -3]
        self.POS_DATA = self.POS_MEANS * randn(self.NUMDATA_POS, self.NUMFEATS)
        self.NEG_DATA = self.NEG_MEANS * randn(self.NUMDATA_NEG, self.NUMFEATS)
        self.DATA_ARR = concatenate((self.POS_DATA, self.NEG_DATA), 0)
        # (And because all the data is in similar forms)
        self.DATA_ARR /= 10.0

        self.FEAT_DATA = []
        for iRow in range(self.DATA_ARR.shape[0]):
            d = {}
            for iCol in range(self.DATA_ARR.shape[1]):
                d[iCol] = self.DATA_ARR[iRow, iCol]
            self.FEAT_DATA.append(d)

        # Write out the fdict stuff
        ofs = gzip.open(self.FDICT_FILENAME, 'w')
        writer = FeatureDictWriter(ofs)
        for iRow, d in enumerate(self.FEAT_DATA):
            writer.update(d, str(iRow))
        ofs.close()

        # construct the idxArr data
        posIdx = range(len(self.POS_DATA))
        negIdx = range(len(self.NEG_DATA),
                       len(self.POS_DATA) + len(self.NEG_DATA))

        self.IDX_ARR = []
        for aPos in posIdx:
            self.IDX_ARR.append([aPos, aPos, 1.0, 0.0])
        for aNeg in negIdx:
            self.IDX_ARR.append([aNeg, aNeg, 0.0, 1.0])

        ofs = open(self.IDX_FILENAME, 'w')
        writer = csv.writer(ofs, quoting=csv.QUOTE_NONE)
        for line in self.IDX_ARR:
            writer.writerow(line)
        ofs.close()
コード例 #8
0
 def postEpochCallback(self, classifier):
     """Callback method for the end of every epoch."""
     if self.saveFile:
         self.classifier.archModel.costTrajectory = self.classifier.costTrajectory
         saveArchModel(self.classifier.archModel, self.archModelOutFile)