def trainAndClassify(params): logging.getLogger("trainAndClassify").info("~~~~~~~~~~~~~BEGIN") classifierPolicy = getClassifierPolicy(params.classifierType) # Load raw CSV reviews rawCsvReviews = csv.DictReader(open(params.csvReviewsPath)) rawCsvReviews = [review for review in rawCsvReviews] # TRAIN: Load raw CSV comments rawCsvCommentsTrain = csv.DictReader(open(params.csvCommentsPathTrain)) rawCsvCommentsTrain = [comment for comment in rawCsvCommentsTrain] # TRAIN: Create context logging.getLogger("trainAndClassify").info( "~~~~~~~~~~~~~CREATING TRAINING CONTEXT") ctxTrain = MinerContext.loadContext(params.ctxCacheTrainFileName, rawCsvCommentsTrain, rawCsvReviews, params.supportThresh) # TRAIN: Create features sets logging.getLogger("trainAndClassify").info( "~~~~~~~~~~~~~PREPARING TRAINING FEATURES") featuresMapsTrain = [] classifierPolicy[eClassifierCB.PrepareFeatures](ctxTrain, featuresMapsTrain, params.featuresBitMask) # TRAIN: Add CAR if desired if (params.featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.CAR): MinerFeaturesUtils.addFeaturesCAR(ctxTrain, featuresMapsTrain, params.CARMinSup, params.CARMinConf, params.CARCacheFileName) # TRAIN: Convert features set to classifier specific input logging.getLogger("trainAndClassify").info( "~~~~~~~~~~~~~CONVERTING TRAINING INPUTS") classifierInputsTrain = [] classifierPolicy[eClassifierCB.ClassifierInputs](ctxTrain, featuresMapsTrain, classifierInputsTrain, True) # TEST: Load raw CSV comments rawCsvCommentsTest = csv.DictReader(open(params.csvCommentsPathTest)) rawCsvCommentsTest = [comment for comment in rawCsvCommentsTest] # TEST: Create context logging.getLogger("trainAndClassify").info( "~~~~~~~~~~~~~CREATING TESTING CONTEXT") ctxTest = MinerContext.loadContext(params.ctxCacheTestFileName, rawCsvCommentsTest, rawCsvReviews, params.supportThresh) # HACK - replace filtered words with those of training context ctxTest.mFilteredWords = ctxTrain.mFilteredWords # TEST: Create features sets logging.getLogger("trainAndClassify").info( "~~~~~~~~~~~~~PREPARING TESTING FEATURES") featuresMapsTest = [] classifierPolicy[eClassifierCB.PrepareFeatures](ctxTest, featuresMapsTest, params.featuresBitMask) # TEST: Add CAR if desired if (params.featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.CAR): MinerFeaturesUtils.addFeaturesCAR(ctxTest, featuresMapsTest, params.CARMinSup, params.CARMinConf, params.CARCacheFileName) # TEST: Convert features set to classifier specific input logging.getLogger("trainAndClassify").info( "~~~~~~~~~~~~~CONVERTING TESTING INPUTS") classifierInputsTest = [] classifierPolicy[eClassifierCB.ClassifierInputs](ctxTest, featuresMapsTest, classifierInputsTest, True) logging.getLogger("trainAndClassify").info("~~~~~~~~~~~~~CLASSIFYING") classifier = classifierPolicy[eClassifierCB.Classify]( classifierInputsTrain, classifierInputsTest, params.bDebug, params.outDebugFileName, params.outDebugLabel) logging.getLogger("trainAndClassify").info("~~~~~~~~~~~~~WRITING OUTPUTS") writeOutput(ctxTest, featuresMapsTest, params.classifierType, classifier) logging.getLogger("trainAndClassify").info("~~~~~~~~~~~~~END")
def SvmPrepareFeatures( ctx, outFeaturesMaps ): logging.getLogger("Svm").info( "prepare features" ) MinerFeaturesUtils.initFeatures( ctx, outFeaturesMaps ) MinerFeaturesUtils.addFeaturesCommentLength( ctx, outFeaturesMaps ) MinerFeaturesUtils.addFeaturesHelpfulnessRatio( ctx, outFeaturesMaps ) MinerFeaturesUtils.addFeaturesPhrases( ctx, outFeaturesMaps ) MinerFeaturesUtils.addFeaturesWordExists( ctx, outFeaturesMaps ) MinerFeaturesUtils.addFeaturesAuthorFreqInReview(ctx, outFeaturesMaps) MinerFeaturesUtils.addFeaturesReviewAuthorMentioned(ctx, outFeaturesMaps) MinerFeaturesUtils.addFeaturesCommentAuthorMentioned( ctx, outFeaturesMaps ) MinerFeaturesUtils.addFeaturesCAR( ctx, outFeaturesMaps )
def trainAndClassify( params ): logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~BEGIN" ) classifierPolicy = getClassifierPolicy(params.classifierType) # Load raw CSV reviews rawCsvReviews = csv.DictReader(open(params.csvReviewsPath)) rawCsvReviews = [review for review in rawCsvReviews] # TRAIN: Load raw CSV comments rawCsvCommentsTrain = csv.DictReader(open(params.csvCommentsPathTrain)) rawCsvCommentsTrain = [comment for comment in rawCsvCommentsTrain] # TRAIN: Create context logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~CREATING TRAINING CONTEXT" ) ctxTrain = MinerContext.loadContext(params.ctxCacheTrainFileName, rawCsvCommentsTrain, rawCsvReviews, params.supportThresh) # TRAIN: Create features sets logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~PREPARING TRAINING FEATURES" ) featuresMapsTrain = [] classifierPolicy[ eClassifierCB.PrepareFeatures ](ctxTrain, featuresMapsTrain, params.featuresBitMask) # TRAIN: Add CAR if desired if ( params.featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.CAR ): MinerFeaturesUtils.addFeaturesCAR( ctxTrain, featuresMapsTrain, params.CARMinSup, params.CARMinConf, params.CARCacheFileName ) # TRAIN: Convert features set to classifier specific input logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~CONVERTING TRAINING INPUTS" ) classifierInputsTrain = [] classifierPolicy[ eClassifierCB.ClassifierInputs ](ctxTrain, featuresMapsTrain, classifierInputsTrain, True) # TEST: Load raw CSV comments rawCsvCommentsTest = csv.DictReader(open(params.csvCommentsPathTest)) rawCsvCommentsTest = [comment for comment in rawCsvCommentsTest] # TEST: Create context logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~CREATING TESTING CONTEXT" ) ctxTest = MinerContext.loadContext(params.ctxCacheTestFileName, rawCsvCommentsTest, rawCsvReviews, params.supportThresh) # HACK - replace filtered words with those of training context ctxTest.mFilteredWords = ctxTrain.mFilteredWords # TEST: Create features sets logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~PREPARING TESTING FEATURES" ) featuresMapsTest = [] classifierPolicy[ eClassifierCB.PrepareFeatures ](ctxTest, featuresMapsTest, params.featuresBitMask ) # TEST: Add CAR if desired if ( params.featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.CAR ): MinerFeaturesUtils.addFeaturesCAR( ctxTest, featuresMapsTest, params.CARMinSup, params.CARMinConf, params.CARCacheFileName ) # TEST: Convert features set to classifier specific input logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~CONVERTING TESTING INPUTS" ) classifierInputsTest = [] classifierPolicy[ eClassifierCB.ClassifierInputs ](ctxTest, featuresMapsTest, classifierInputsTest, True) logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~CLASSIFYING" ) classifier = classifierPolicy[ eClassifierCB.Classify ]( classifierInputsTrain, classifierInputsTest, params.bDebug, params.outDebugFileName, params.outDebugLabel ) logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~WRITING OUTPUTS" ) writeOutput(ctxTest, featuresMapsTest, params.classifierType, classifier) logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~END" )
def SvmPrepareFeatures(ctx, outFeaturesMaps): logging.getLogger("Svm").info("prepare features") MinerFeaturesUtils.initFeatures(ctx, outFeaturesMaps) MinerFeaturesUtils.addFeaturesCommentLength(ctx, outFeaturesMaps) MinerFeaturesUtils.addFeaturesHelpfulnessRatio(ctx, outFeaturesMaps) MinerFeaturesUtils.addFeaturesPhrases(ctx, outFeaturesMaps) MinerFeaturesUtils.addFeaturesWordExists(ctx, outFeaturesMaps) MinerFeaturesUtils.addFeaturesAuthorFreqInReview(ctx, outFeaturesMaps) MinerFeaturesUtils.addFeaturesReviewAuthorMentioned(ctx, outFeaturesMaps) MinerFeaturesUtils.addFeaturesCommentAuthorMentioned(ctx, outFeaturesMaps) MinerFeaturesUtils.addFeaturesCAR(ctx, outFeaturesMaps)
def NaiveBayesPrepareFeatures( ctx, outFeaturesMaps, featuresBitMask ): logging.getLogger("NaiveBayes").info( "prepare features" ) MinerFeaturesUtils.initFeatures( ctx, outFeaturesMaps ) if ( featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.wordExists ): MinerFeaturesUtils.addFeaturesWordExists( ctx, outFeaturesMaps ) if ( featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.commentLength ): MinerFeaturesUtils.addFeaturesCommentLength( ctx, outFeaturesMaps ) if ( featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.helpfullnessRatio ): MinerFeaturesUtils.addFeaturesHelpfulnessRatio( ctx, outFeaturesMaps ) if ( featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.authorFreqInReview ): MinerFeaturesUtils.addFeaturesAuthorFreqInReview(ctx, outFeaturesMaps) if ( featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.reviewAuthorMentioned ): MinerFeaturesUtils.addFeaturesReviewAuthorMentioned(ctx, outFeaturesMaps) if ( featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.commentAuthorMentioned ): MinerFeaturesUtils.addFeaturesCommentAuthorMentioned( ctx, outFeaturesMaps ) if ( featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.dist ): MinerFeaturesUtils.addFeaturesDist( ctx, outFeaturesMaps ) if ( featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.phrases ): MinerFeaturesUtils.addFeaturesPhrases( ctx, outFeaturesMaps )
def NaiveBayesPrepareFeatures(ctx, outFeaturesMaps, featuresBitMask): logging.getLogger("NaiveBayes").info("prepare features") MinerFeaturesUtils.initFeatures(ctx, outFeaturesMaps) if (featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.wordExists): MinerFeaturesUtils.addFeaturesWordExists(ctx, outFeaturesMaps) if (featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.commentLength): MinerFeaturesUtils.addFeaturesCommentLength(ctx, outFeaturesMaps) if (featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.helpfullnessRatio): MinerFeaturesUtils.addFeaturesHelpfulnessRatio(ctx, outFeaturesMaps) if (featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.authorFreqInReview): MinerFeaturesUtils.addFeaturesAuthorFreqInReview(ctx, outFeaturesMaps) if (featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.reviewAuthorMentioned): MinerFeaturesUtils.addFeaturesReviewAuthorMentioned( ctx, outFeaturesMaps) if (featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.commentAuthorMentioned): MinerFeaturesUtils.addFeaturesCommentAuthorMentioned( ctx, outFeaturesMaps) if (featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.dist): MinerFeaturesUtils.addFeaturesDist(ctx, outFeaturesMaps) if (featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.phrases): MinerFeaturesUtils.addFeaturesPhrases(ctx, outFeaturesMaps)