def predict_prob(self, data, load_classifier=False): if load_classifier: self.classifier = DataHandling.load_data(Constants.model_path + 'classifier_NB.pickle') return self.classifier.predict_prob(data)
# fileNameTestData = "data/test.csv" # well. obviously we dont hand this one out topN = 5 termVectors = json.load(open(fileNameTermVectors)) train_data = pd.read_csv(fileNameTrainData) """:type: pd.DataFrame""" # testData = pd.read_csv(fileNameTestData) """ Test model on unseen data. After each prediction step, you may update you model. This is not mandatory though. """ # Loading the trained model in case we do not want to train on new data model = DataHandling.load_data(Constants.model_path + 'Model_NB.pickle') os_list = DataHandling.load_data(Constants.model_path + 'os_list.pickle') publisher_list = DataHandling.load_data(Constants.model_path + 'publisher_list.pickle') #In case there is a new termVectors file we use this to extract features, I assume it is because articles will be updated. feature_extraction = FeatureExtraction() [article_word_count, word_tfidf, publishers, article_numbers] = feature_extraction.prepare_dictionary_article(termVectors) # #For testing train data the same as test data testData = train_data article_popularity = DataHandling.load_data(Constants.model_path + 'article_popularity.pickle') train_data = DataHandling.load_data(Constants.model_path + 'train_data_with_article_distances.pickle') for (rowNum, row) in testData.iterrows(): inputFeatures = row[["Publisher", "Osfamily", "ItemSrc", "UserID", "UserClicksAd"]]