Ejemplo n.º 1
0
def trainAndClassify(params):
    logging.getLogger("trainAndClassify").info("~~~~~~~~~~~~~BEGIN")
    classifierPolicy = getClassifierPolicy(params.classifierType)

    # Load raw CSV reviews
    rawCsvReviews = csv.DictReader(open(params.csvReviewsPath))
    rawCsvReviews = [review for review in rawCsvReviews]

    # TRAIN: Load raw CSV comments
    rawCsvCommentsTrain = csv.DictReader(open(params.csvCommentsPathTrain))
    rawCsvCommentsTrain = [comment for comment in rawCsvCommentsTrain]

    # TRAIN: Create context
    logging.getLogger("trainAndClassify").info(
        "~~~~~~~~~~~~~CREATING TRAINING CONTEXT")
    ctxTrain = MinerContext.loadContext(params.ctxCacheTrainFileName,
                                        rawCsvCommentsTrain, rawCsvReviews,
                                        params.supportThresh)

    # TRAIN: Create features sets
    logging.getLogger("trainAndClassify").info(
        "~~~~~~~~~~~~~PREPARING TRAINING FEATURES")
    featuresMapsTrain = []
    classifierPolicy[eClassifierCB.PrepareFeatures](ctxTrain,
                                                    featuresMapsTrain,
                                                    params.featuresBitMask)

    # TRAIN: Add CAR if desired
    if (params.featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.CAR):
        MinerFeaturesUtils.addFeaturesCAR(ctxTrain, featuresMapsTrain,
                                          params.CARMinSup, params.CARMinConf,
                                          params.CARCacheFileName)

    # TRAIN: Convert features set to classifier specific input
    logging.getLogger("trainAndClassify").info(
        "~~~~~~~~~~~~~CONVERTING TRAINING INPUTS")
    classifierInputsTrain = []
    classifierPolicy[eClassifierCB.ClassifierInputs](ctxTrain,
                                                     featuresMapsTrain,
                                                     classifierInputsTrain,
                                                     True)

    # TEST: Load raw CSV comments
    rawCsvCommentsTest = csv.DictReader(open(params.csvCommentsPathTest))
    rawCsvCommentsTest = [comment for comment in rawCsvCommentsTest]

    # TEST: Create context
    logging.getLogger("trainAndClassify").info(
        "~~~~~~~~~~~~~CREATING TESTING CONTEXT")

    ctxTest = MinerContext.loadContext(params.ctxCacheTestFileName,
                                       rawCsvCommentsTest, rawCsvReviews,
                                       params.supportThresh)
    # HACK - replace filtered words with those of training context
    ctxTest.mFilteredWords = ctxTrain.mFilteredWords

    # TEST: Create features sets
    logging.getLogger("trainAndClassify").info(
        "~~~~~~~~~~~~~PREPARING TESTING FEATURES")
    featuresMapsTest = []
    classifierPolicy[eClassifierCB.PrepareFeatures](ctxTest, featuresMapsTest,
                                                    params.featuresBitMask)

    # TEST: Add CAR if desired
    if (params.featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.CAR):
        MinerFeaturesUtils.addFeaturesCAR(ctxTest, featuresMapsTest,
                                          params.CARMinSup, params.CARMinConf,
                                          params.CARCacheFileName)

    # TEST: Convert features set to classifier specific input
    logging.getLogger("trainAndClassify").info(
        "~~~~~~~~~~~~~CONVERTING TESTING INPUTS")
    classifierInputsTest = []
    classifierPolicy[eClassifierCB.ClassifierInputs](ctxTest, featuresMapsTest,
                                                     classifierInputsTest,
                                                     True)

    logging.getLogger("trainAndClassify").info("~~~~~~~~~~~~~CLASSIFYING")
    classifier = classifierPolicy[eClassifierCB.Classify](
        classifierInputsTrain, classifierInputsTest, params.bDebug,
        params.outDebugFileName, params.outDebugLabel)

    logging.getLogger("trainAndClassify").info("~~~~~~~~~~~~~WRITING OUTPUTS")
    writeOutput(ctxTest, featuresMapsTest, params.classifierType, classifier)
    logging.getLogger("trainAndClassify").info("~~~~~~~~~~~~~END")
Ejemplo n.º 2
0
def SvmPrepareFeatures( ctx, outFeaturesMaps ):
    logging.getLogger("Svm").info( "prepare features" )
    MinerFeaturesUtils.initFeatures( ctx, outFeaturesMaps )
    MinerFeaturesUtils.addFeaturesCommentLength( ctx, outFeaturesMaps )
    MinerFeaturesUtils.addFeaturesHelpfulnessRatio( ctx, outFeaturesMaps )
    MinerFeaturesUtils.addFeaturesPhrases( ctx, outFeaturesMaps )
    MinerFeaturesUtils.addFeaturesWordExists( ctx, outFeaturesMaps )
    MinerFeaturesUtils.addFeaturesAuthorFreqInReview(ctx, outFeaturesMaps)
    MinerFeaturesUtils.addFeaturesReviewAuthorMentioned(ctx, outFeaturesMaps)
    MinerFeaturesUtils.addFeaturesCommentAuthorMentioned( ctx, outFeaturesMaps )
    MinerFeaturesUtils.addFeaturesCAR( ctx, outFeaturesMaps )
Ejemplo n.º 3
0
def trainAndClassify( params ):
    logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~BEGIN" )
    classifierPolicy = getClassifierPolicy(params.classifierType)
    
    # Load raw CSV reviews
    rawCsvReviews = csv.DictReader(open(params.csvReviewsPath))
    rawCsvReviews = [review for review in rawCsvReviews]

    # TRAIN: Load raw CSV comments
    rawCsvCommentsTrain = csv.DictReader(open(params.csvCommentsPathTrain))
    rawCsvCommentsTrain = [comment for comment in rawCsvCommentsTrain]
                
    # TRAIN: Create context
    logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~CREATING TRAINING CONTEXT" )
    ctxTrain = MinerContext.loadContext(params.ctxCacheTrainFileName, rawCsvCommentsTrain, rawCsvReviews, params.supportThresh)

    # TRAIN: Create features sets    
    logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~PREPARING TRAINING FEATURES" )
    featuresMapsTrain = []
    classifierPolicy[ eClassifierCB.PrepareFeatures ](ctxTrain, featuresMapsTrain, params.featuresBitMask)
    
    # TRAIN: Add CAR if desired
    if ( params.featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.CAR ):
        MinerFeaturesUtils.addFeaturesCAR( ctxTrain, featuresMapsTrain, params.CARMinSup, params.CARMinConf, params.CARCacheFileName )

    # TRAIN: Convert features set to classifier specific input
    logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~CONVERTING TRAINING INPUTS" )
    classifierInputsTrain = []
    classifierPolicy[ eClassifierCB.ClassifierInputs ](ctxTrain, featuresMapsTrain, classifierInputsTrain, True)

    # TEST: Load raw CSV comments
    rawCsvCommentsTest = csv.DictReader(open(params.csvCommentsPathTest))
    rawCsvCommentsTest = [comment for comment in rawCsvCommentsTest]

    # TEST: Create context
    logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~CREATING TESTING CONTEXT" )
    
    ctxTest = MinerContext.loadContext(params.ctxCacheTestFileName, rawCsvCommentsTest, rawCsvReviews, params.supportThresh)
    # HACK - replace filtered words with those of training context
    ctxTest.mFilteredWords = ctxTrain.mFilteredWords

    # TEST: Create features sets    
    logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~PREPARING TESTING FEATURES" )
    featuresMapsTest = []
    classifierPolicy[ eClassifierCB.PrepareFeatures ](ctxTest, featuresMapsTest, params.featuresBitMask )
    
    # TEST: Add CAR if desired
    if ( params.featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.CAR ):
        MinerFeaturesUtils.addFeaturesCAR( ctxTest, featuresMapsTest, params.CARMinSup, params.CARMinConf, params.CARCacheFileName )
    
    # TEST: Convert features set to classifier specific input
    logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~CONVERTING TESTING INPUTS" )
    classifierInputsTest = []
    classifierPolicy[ eClassifierCB.ClassifierInputs ](ctxTest, featuresMapsTest, classifierInputsTest, True)

    logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~CLASSIFYING" )
    classifier = classifierPolicy[ eClassifierCB.Classify ]( classifierInputsTrain, classifierInputsTest, params.bDebug, params.outDebugFileName, params.outDebugLabel )

    logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~WRITING OUTPUTS" )
    writeOutput(ctxTest, featuresMapsTest, params.classifierType, classifier)
    logging.getLogger( "trainAndClassify" ).info( "~~~~~~~~~~~~~END" )
Ejemplo n.º 4
0
def SvmPrepareFeatures(ctx, outFeaturesMaps):
    logging.getLogger("Svm").info("prepare features")
    MinerFeaturesUtils.initFeatures(ctx, outFeaturesMaps)
    MinerFeaturesUtils.addFeaturesCommentLength(ctx, outFeaturesMaps)
    MinerFeaturesUtils.addFeaturesHelpfulnessRatio(ctx, outFeaturesMaps)
    MinerFeaturesUtils.addFeaturesPhrases(ctx, outFeaturesMaps)
    MinerFeaturesUtils.addFeaturesWordExists(ctx, outFeaturesMaps)
    MinerFeaturesUtils.addFeaturesAuthorFreqInReview(ctx, outFeaturesMaps)
    MinerFeaturesUtils.addFeaturesReviewAuthorMentioned(ctx, outFeaturesMaps)
    MinerFeaturesUtils.addFeaturesCommentAuthorMentioned(ctx, outFeaturesMaps)
    MinerFeaturesUtils.addFeaturesCAR(ctx, outFeaturesMaps)
def NaiveBayesPrepareFeatures( ctx, outFeaturesMaps, featuresBitMask ):
    logging.getLogger("NaiveBayes").info( "prepare features" )
    MinerFeaturesUtils.initFeatures( ctx, outFeaturesMaps )
    if ( featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.wordExists ):
        MinerFeaturesUtils.addFeaturesWordExists( ctx, outFeaturesMaps )
    if ( featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.commentLength ):
        MinerFeaturesUtils.addFeaturesCommentLength( ctx, outFeaturesMaps )
    if ( featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.helpfullnessRatio ):
        MinerFeaturesUtils.addFeaturesHelpfulnessRatio( ctx, outFeaturesMaps )
    if ( featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.authorFreqInReview ):
        MinerFeaturesUtils.addFeaturesAuthorFreqInReview(ctx, outFeaturesMaps)
    if ( featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.reviewAuthorMentioned ):
        MinerFeaturesUtils.addFeaturesReviewAuthorMentioned(ctx, outFeaturesMaps)
    if ( featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.commentAuthorMentioned ):
        MinerFeaturesUtils.addFeaturesCommentAuthorMentioned( ctx, outFeaturesMaps )
    if ( featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.dist ):
        MinerFeaturesUtils.addFeaturesDist( ctx, outFeaturesMaps )
    if ( featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.phrases ):
        MinerFeaturesUtils.addFeaturesPhrases( ctx, outFeaturesMaps )
Ejemplo n.º 6
0
def NaiveBayesPrepareFeatures(ctx, outFeaturesMaps, featuresBitMask):
    logging.getLogger("NaiveBayes").info("prepare features")
    MinerFeaturesUtils.initFeatures(ctx, outFeaturesMaps)
    if (featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.wordExists):
        MinerFeaturesUtils.addFeaturesWordExists(ctx, outFeaturesMaps)
    if (featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.commentLength):
        MinerFeaturesUtils.addFeaturesCommentLength(ctx, outFeaturesMaps)
    if (featuresBitMask
            & MinerFeaturesUtils.eFeaturesMaskBits.helpfullnessRatio):
        MinerFeaturesUtils.addFeaturesHelpfulnessRatio(ctx, outFeaturesMaps)
    if (featuresBitMask
            & MinerFeaturesUtils.eFeaturesMaskBits.authorFreqInReview):
        MinerFeaturesUtils.addFeaturesAuthorFreqInReview(ctx, outFeaturesMaps)
    if (featuresBitMask
            & MinerFeaturesUtils.eFeaturesMaskBits.reviewAuthorMentioned):
        MinerFeaturesUtils.addFeaturesReviewAuthorMentioned(
            ctx, outFeaturesMaps)
    if (featuresBitMask
            & MinerFeaturesUtils.eFeaturesMaskBits.commentAuthorMentioned):
        MinerFeaturesUtils.addFeaturesCommentAuthorMentioned(
            ctx, outFeaturesMaps)
    if (featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.dist):
        MinerFeaturesUtils.addFeaturesDist(ctx, outFeaturesMaps)
    if (featuresBitMask & MinerFeaturesUtils.eFeaturesMaskBits.phrases):
        MinerFeaturesUtils.addFeaturesPhrases(ctx, outFeaturesMaps)