def SvmPrepareFeatures( ctx, outFeaturesMaps ): logging.getLogger("Svm").info( "prepare features" ) MinerFeaturesUtils.initFeatures( ctx, outFeaturesMaps ) MinerFeaturesUtils.addFeaturesCommentLength( ctx, outFeaturesMaps ) MinerFeaturesUtils.addFeaturesHelpfulnessRatio( ctx, outFeaturesMaps ) MinerFeaturesUtils.addFeaturesPhrases( ctx, outFeaturesMaps ) MinerFeaturesUtils.addFeaturesWordExists( ctx, outFeaturesMaps ) MinerFeaturesUtils.addFeaturesAuthorFreqInReview(ctx, outFeaturesMaps) MinerFeaturesUtils.addFeaturesReviewAuthorMentioned(ctx, outFeaturesMaps) MinerFeaturesUtils.addFeaturesCommentAuthorMentioned( ctx, outFeaturesMaps ) MinerFeaturesUtils.addFeaturesCAR( ctx, outFeaturesMaps )
def SvmPrepareFeatures(ctx, outFeaturesMaps): logging.getLogger("Svm").info("prepare features") MinerFeaturesUtils.initFeatures(ctx, outFeaturesMaps) MinerFeaturesUtils.addFeaturesCommentLength(ctx, outFeaturesMaps) MinerFeaturesUtils.addFeaturesHelpfulnessRatio(ctx, outFeaturesMaps) MinerFeaturesUtils.addFeaturesPhrases(ctx, outFeaturesMaps) MinerFeaturesUtils.addFeaturesWordExists(ctx, outFeaturesMaps) MinerFeaturesUtils.addFeaturesAuthorFreqInReview(ctx, outFeaturesMaps) MinerFeaturesUtils.addFeaturesReviewAuthorMentioned(ctx, outFeaturesMaps) MinerFeaturesUtils.addFeaturesCommentAuthorMentioned(ctx, outFeaturesMaps) MinerFeaturesUtils.addFeaturesCAR(ctx, outFeaturesMaps)
def trainAndClassify(params): logging.getLogger("trainAndClassify").info("~~~~~~~~~~~~~BEGIN") classifierPolicy = getClassifierPolicy(params.classifierType) # Load raw CSV reviews rawCsvReviews = csv.DictReader(open(params.csvReviewsPath)) rawCsvReviews = [review for review in rawCsvReviews] # TRAIN: Load raw CSV comments rawCsvCommentsTrain = csv.DictReader(open(params.csvCommentsPathTrain)) rawCsvCommentsTrain = [comment for comment in rawCsvCommentsTrain] # TRAIN: Create context logging.getLogger("trainAndClassify").info( "~~~~~~~~~~~~~CREATING TRAINING CONTEXT") ctxTrain = MinerContext.loadContext(params.ctxCacheTrainFileName, rawCsvCommentsTrain, rawCsvReviews, params.supportThresh) # TRAIN: Create features sets logging.getLogger("trainAndClassify").info( "~~~~~~~~~~~~~PREPARING TRAINING FEATURES") featuresMapsTrain = [] classifierPolicy[eClassifierCB.PrepareFeatures](ctxTrain, featuresMapsTrain, params.featuresBitMask) # TRAIN: Add CAR if desired if (params.featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.CAR): MinerFeaturesUtils.addFeaturesCAR(ctxTrain, featuresMapsTrain, params.CARMinSup, params.CARMinConf, params.CARCacheFileName) # TRAIN: Convert features set to classifier specific input logging.getLogger("trainAndClassify").info( "~~~~~~~~~~~~~CONVERTING TRAINING INPUTS") classifierInputsTrain = [] classifierPolicy[eClassifierCB.ClassifierInputs](ctxTrain, featuresMapsTrain, classifierInputsTrain, True) # TEST: Load raw CSV comments rawCsvCommentsTest = csv.DictReader(open(params.csvCommentsPathTest)) rawCsvCommentsTest = [comment for comment in rawCsvCommentsTest] # TEST: Create context logging.getLogger("trainAndClassify").info( "~~~~~~~~~~~~~CREATING TESTING CONTEXT") ctxTest = MinerContext.loadContext(params.ctxCacheTestFileName, rawCsvCommentsTest, rawCsvReviews, params.supportThresh) # HACK - replace filtered words with those of training context ctxTest.mFilteredWords = ctxTrain.mFilteredWords # TEST: Create features sets logging.getLogger("trainAndClassify").info( "~~~~~~~~~~~~~PREPARING TESTING FEATURES") featuresMapsTest = [] classifierPolicy[eClassifierCB.PrepareFeatures](ctxTest, featuresMapsTest, params.featuresBitMask) # TEST: Add CAR if desired if (params.featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.CAR): MinerFeaturesUtils.addFeaturesCAR(ctxTest, featuresMapsTest, params.CARMinSup, params.CARMinConf, params.CARCacheFileName) # TEST: Convert features set to classifier specific input logging.getLogger("trainAndClassify").info( "~~~~~~~~~~~~~CONVERTING TESTING INPUTS") classifierInputsTest = [] classifierPolicy[eClassifierCB.ClassifierInputs](ctxTest, featuresMapsTest, classifierInputsTest, True) logging.getLogger("trainAndClassify").info("~~~~~~~~~~~~~CLASSIFYING") classifier = classifierPolicy[eClassifierCB.Classify]( classifierInputsTrain, classifierInputsTest, params.bDebug, params.outDebugFileName, params.outDebugLabel) logging.getLogger("trainAndClassify").info("~~~~~~~~~~~~~WRITING OUTPUTS") writeOutput(ctxTest, featuresMapsTest, params.classifierType, classifier) logging.getLogger("trainAndClassify").info("~~~~~~~~~~~~~END")
def trainAndClassify( params ): logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~BEGIN" ) classifierPolicy = getClassifierPolicy(params.classifierType) # Load raw CSV reviews rawCsvReviews = csv.DictReader(open(params.csvReviewsPath)) rawCsvReviews = [review for review in rawCsvReviews] # TRAIN: Load raw CSV comments rawCsvCommentsTrain = csv.DictReader(open(params.csvCommentsPathTrain)) rawCsvCommentsTrain = [comment for comment in rawCsvCommentsTrain] # TRAIN: Create context logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~CREATING TRAINING CONTEXT" ) ctxTrain = MinerContext.loadContext(params.ctxCacheTrainFileName, rawCsvCommentsTrain, rawCsvReviews, params.supportThresh) # TRAIN: Create features sets logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~PREPARING TRAINING FEATURES" ) featuresMapsTrain = [] classifierPolicy[ eClassifierCB.PrepareFeatures ](ctxTrain, featuresMapsTrain, params.featuresBitMask) # TRAIN: Add CAR if desired if ( params.featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.CAR ): MinerFeaturesUtils.addFeaturesCAR( ctxTrain, featuresMapsTrain, params.CARMinSup, params.CARMinConf, params.CARCacheFileName ) # TRAIN: Convert features set to classifier specific input logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~CONVERTING TRAINING INPUTS" ) classifierInputsTrain = [] classifierPolicy[ eClassifierCB.ClassifierInputs ](ctxTrain, featuresMapsTrain, classifierInputsTrain, True) # TEST: Load raw CSV comments rawCsvCommentsTest = csv.DictReader(open(params.csvCommentsPathTest)) rawCsvCommentsTest = [comment for comment in rawCsvCommentsTest] # TEST: Create context logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~CREATING TESTING CONTEXT" ) ctxTest = MinerContext.loadContext(params.ctxCacheTestFileName, rawCsvCommentsTest, rawCsvReviews, params.supportThresh) # HACK - replace filtered words with those of training context ctxTest.mFilteredWords = ctxTrain.mFilteredWords # TEST: Create features sets logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~PREPARING TESTING FEATURES" ) featuresMapsTest = [] classifierPolicy[ eClassifierCB.PrepareFeatures ](ctxTest, featuresMapsTest, params.featuresBitMask ) # TEST: Add CAR if desired if ( params.featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.CAR ): MinerFeaturesUtils.addFeaturesCAR( ctxTest, featuresMapsTest, params.CARMinSup, params.CARMinConf, params.CARCacheFileName ) # TEST: Convert features set to classifier specific input logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~CONVERTING TESTING INPUTS" ) classifierInputsTest = [] classifierPolicy[ eClassifierCB.ClassifierInputs ](ctxTest, featuresMapsTest, classifierInputsTest, True) logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~CLASSIFYING" ) classifier = classifierPolicy[ eClassifierCB.Classify ]( classifierInputsTrain, classifierInputsTest, params.bDebug, params.outDebugFileName, params.outDebugLabel ) logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~WRITING OUTPUTS" ) writeOutput(ctxTest, featuresMapsTest, params.classifierType, classifier) logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~END" )