def score(self): self.logger.info('Scoring...') npXTest = np.array(self.X_test).astype(np.float32) y_pred = self.classifier.predict(npXTest) yy_pred = self.classifier.predict_proba(npXTest)[:,1] print('\n***') print(self.features.shape) print(self.test.shape) print('***\n') reportName = 'Cross Verification Data Report \t\t' + str(dt.datetime.now()) scorer = Scorer() y_test = self.y_test rpt = scorer.score( y_test, y_pred, yy_pred, classifier=self.classifier, title=reportName, configuration=self.configuration ) self.y_pred = y_pred self.yy_pred = yy_pred self.scoringrpt = rpt return(rpt)
def predict(self): self.logger.info('Predicting...') submissionSample = self.configuration['submissionSample'] submissionDir = self.configuration['submissionDir'] timestamp = dt.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') submissionVFile = submissionDir + '/' + 'submission-values-' + timestamp + '.csv' submissionPFile = submissionDir + '/' + 'submission-probabilities-' + timestamp + '.csv' submissionLog = submissionDir + '/' + 'submission-' + timestamp + '.txt' npTest = np.array(self.test).astype(np.float32) y_pred = self.classifier.predict(npTest) yy_pred = self.classifier.predict_proba(npTest)[:,1] predictionrpt = None if self.hasExpected: self.logger.debug('Target is available... Scoring target') # The second column contains the actual values y_test = self.expected.iloc[:,1] scorer = Scorer() reportName = '\nTarget Data Prediction Report \t\t' + timestamp predictionrpt = scorer.score( y_test, y_pred, yy_pred, classifier=self.classifier, title=reportName, configuration=self.configuration ) print(predictionrpt) sample = pd.read_csv(submissionSample) sample.QuoteConversion_Flag = y_pred sample.to_csv(submissionVFile, index=False) probabilities = pd.read_csv(submissionSample) probabilities.QuoteConversion_Flag = yy_pred probabilities.to_csv(submissionPFile, index=False) mfeatures, nfeatures= self.features.shape mtest, ntest = self.test.shape mxtrain, nxtrain= self.X_train.shape mxtest, nxtest = self.X_test.shape self.logger.debug('Saving submission information') with open(submissionLog, 'a') as f: f.write('Submission Report \t\t\t Generated at: {0}'.format(timestamp)) f.write('\n\nData Statistics:') f.write('\n\Feature data: \trows: {0}, columns: {1}'.format(mfeatures, nfeatures)) f.write('\n\tTest data: \t\trows: {0}, columns: {1}'.format(mtest, ntest)) f.write('\n\nCross Validation Statistics:') f.write('\n\tTraining data: \trows: {0}, columns: {1}'.format(mxtrain, nxtrain)) f.write('\n\tTest data: \t\trows: {0}, columns: {1}'.format(mxtest, nxtest)) f.write('\n\nValues file:\t\t{0}'.format(submissionVFile)) f.write('\nProbabilities file:\t{0}'.format(submissionPFile)) f.write('\nProbabilities file:\t{0}'.format(submissionPFile)) f.write('\n') f.write('{0}'.format(self.report())) if predictionrpt: f.write('\n\n{0}'.format(predictionrpt)) return(submissionLog, submissionVFile, submissionPFile, self.classifier)