예제 #1
0
    def score(self):

        self.logger.info('Scoring...')

        npXTest = np.array(self.X_test).astype(np.float32)

        y_pred = self.classifier.predict(npXTest)
        yy_pred = self.classifier.predict_proba(npXTest)[:,1]

        print('\n***')
        print(self.features.shape)
        print(self.test.shape)
        print('***\n')

        reportName = 'Cross Verification Data Report \t\t' + str(dt.datetime.now())

        scorer = Scorer()
        y_test = self.y_test
        rpt = scorer.score(
                y_test,
                y_pred,
                yy_pred,
                classifier=self.classifier,
                title=reportName,
                configuration=self.configuration )

        self.y_pred = y_pred
        self.yy_pred = yy_pred
        self.scoringrpt = rpt

        return(rpt)
예제 #2
0
    def predict(self):
        self.logger.info('Predicting...')

        submissionSample = self.configuration['submissionSample']
        submissionDir = self.configuration['submissionDir']

        timestamp = dt.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')

        submissionVFile = submissionDir + '/' + 'submission-values-' + timestamp + '.csv'
        submissionPFile = submissionDir + '/' + 'submission-probabilities-' + timestamp + '.csv'
        submissionLog = submissionDir + '/' + 'submission-' + timestamp + '.txt'

        npTest = np.array(self.test).astype(np.float32)

        y_pred = self.classifier.predict(npTest)
        yy_pred = self.classifier.predict_proba(npTest)[:,1]

        predictionrpt = None
        if self.hasExpected:

            self.logger.debug('Target is available... Scoring target')
            # The second column contains the actual values
            y_test = self.expected.iloc[:,1]

            scorer = Scorer()
            reportName = '\nTarget Data Prediction Report \t\t' + timestamp
            predictionrpt = scorer.score(
                    y_test,
                    y_pred,
                    yy_pred,
                    classifier=self.classifier,
                    title=reportName,
                    configuration=self.configuration )
            print(predictionrpt)

        sample = pd.read_csv(submissionSample)
        sample.QuoteConversion_Flag = y_pred
        sample.to_csv(submissionVFile, index=False)

        probabilities = pd.read_csv(submissionSample)
        probabilities.QuoteConversion_Flag = yy_pred
        probabilities.to_csv(submissionPFile, index=False)

        mfeatures, nfeatures= self.features.shape
        mtest, ntest = self.test.shape
        mxtrain, nxtrain= self.X_train.shape
        mxtest, nxtest = self.X_test.shape

        self.logger.debug('Saving submission information')
        with open(submissionLog, 'a') as f:
            f.write('Submission Report \t\t\t Generated at: {0}'.format(timestamp))
            f.write('\n\nData Statistics:')
            f.write('\n\Feature data: \trows: {0}, columns: {1}'.format(mfeatures, nfeatures))
            f.write('\n\tTest data: \t\trows: {0}, columns: {1}'.format(mtest, ntest))
            f.write('\n\nCross Validation Statistics:')
            f.write('\n\tTraining data: \trows: {0}, columns: {1}'.format(mxtrain, nxtrain))
            f.write('\n\tTest data: \t\trows: {0}, columns: {1}'.format(mxtest, nxtest))
            f.write('\n\nValues file:\t\t{0}'.format(submissionVFile))
            f.write('\nProbabilities file:\t{0}'.format(submissionPFile))
            f.write('\nProbabilities file:\t{0}'.format(submissionPFile))
            f.write('\n')
            f.write('{0}'.format(self.report()))

            if predictionrpt:
                f.write('\n\n{0}'.format(predictionrpt))

        return(submissionLog, submissionVFile, submissionPFile, self.classifier)