class InterestAnalyzer: def __init__(self): self.feature_builder = FeatureBuilder() def rebuild_features(self): print 'Parsing Facebook...' fbparser.parse({'in': TRAINING_DIRECTORIES['fb'], 'out': TRAINING_FILES['fb']}) print 'Parsing Twitter...' tweetparser.parse({'in': TRAINING_DIRECTORIES['tweets'], 'out': TRAINING_FILES['tweets']}) print 'Parsing LinkedIn...' linkedinparser.parse({'in': TRAINING_DIRECTORIES['linkedin'], 'out': TRAINING_FILES['linkedin']}) print 'Building features...' # build features for training data self.feature_builder.create_feature_vectors(TRAINING_FILES['linkedin'], TRAINING_FEATURE_FILES['linkedin'], 'linkedin') self.feature_builder.create_feature_vectors(TRAINING_FILES['tweets'], TRAINING_FEATURE_FILES['tweets'], 'tweets') self.feature_builder.create_feature_vectors(TRAINING_FILES['fb'], TRAINING_FEATURE_FILES['fb'], 'fb') def retrain_classifier(self): print 'Training classifier...' self.classifier = Classifier() def save_classifier(self): pickle.dump(self.classifier, open(CLASSIFIER_FILE, 'wb')) def load_classifier(self): self.classifier = pickle.load(open(CLASSIFIER_FILE, 'rb')) def classifier_predict(self): print 'Parsing Facebook...' fbparser.parse({'in': TESTING_DIRECTORIES['fb'], 'out': TESTING_FILES['fb']}) print 'Parsing Twitter...' tweetparser.parse({'in': TESTING_DIRECTORIES['tweets'], 'out': TESTING_FILES['tweets']}) print 'Parsing LinkedIn...' linkedinparser.parse({'in': TESTING_DIRECTORIES['linkedin'], 'out': TESTING_FILES['linkedin']}) print 'Building features...' self.feature_builder.create_feature_vectors(TESTING_FILES['linkedin'], TESTING_FEATURE_FILES['linkedin'], 'linkedin') self.feature_builder.create_feature_vectors(TESTING_FILES['tweets'], TESTING_FEATURE_FILES['tweets'], 'tweets') self.feature_builder.create_feature_vectors(TESTING_FILES['fb'], TESTING_FEATURE_FILES['fb'], 'fb') linkedin_testing_features = np.loadtxt(TESTING_FEATURE_FILES['linkedin'], delimiter=',') tweets_testing_features = np.loadtxt(TESTING_FEATURE_FILES['tweets'], delimiter=',') fb_testing_features = np.loadtxt(TESTING_FEATURE_FILES['fb'], delimiter=',') print 'Predicting labels...' print 'LinkedIn classifier:' print self.classifier.predict_testing_data('linkedin', linkedin_testing_features, TESTING_LABELS_FILE, 'results_l.txt') print 'Twitter classifier:' print self.classifier.predict_testing_data('tweets', tweets_testing_features, TESTING_LABELS_FILE, 'results_t.txt') tweets_result_labels = np.loadtxt('results_t.txt', delimiter=',') linkedin_result_labels = np.loadtxt('results_l.txt', delimiter=',') print 'Late fusion classifier:' print self.classifier.predict_late_fusion_testing_data([tweets_result_labels, linkedin_result_labels], TESTING_LABELS_FILE, 'result.txt')