def generateStatsToDetermineFixedWindowLength(): global maxLength currentDay = Settings.startTime while currentDay<=Settings.endTime: for noOfDays in Utilities.getClassifierLengthsByDay(currentDay, maxLength): classifier = FixedWindowClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=noOfDays) classifier.load() data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'classifier_length': noOfDays, 'metric': 'aucm', 'number_of_experts': Settings.numberOfExperts, 'data_type': DocumentType.typeRuuslUnigram, 'test_data_days': 1} data['value'] = classifier.getAUCM(TestDocuments(currentTime=currentDay+timedelta(days=1), numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=1).iterator()) Utilities.writeAsJsonToFile(data, Settings.stats_to_determine_fixed_window_length) currentDay+=timedelta(days=1)
def generateStatsToCompareDifferentDocumentTypes(): global maxLength, idealModelLength dataTypes = [DocumentType.typeRuuslUnigram, DocumentType.typeCharBigram, DocumentType.typeCharTrigram, DocumentType.typeRuuslBigram, DocumentType.typeRuuslTrigram, DocumentType.typeRuuslSparseBigram, DocumentType.typeRuuslUnigramNouns, DocumentType.typeRuuslUnigramWithMeta, DocumentType.typeRuuslUnigramNounsWithMeta] currentDay = Settings.startTime while currentDay<=Settings.endTime: noOfDaysList = list(set([idealModelLength]).intersection(set(Utilities.getClassifierLengthsByDay(currentDay, maxLength)))) for noOfDays in noOfDaysList: for dataType in dataTypes: print currentDay, noOfDays, dataType classifier = FixedWindowClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=dataType, noOfDays=noOfDays) classifier.load() data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'classifier_length': noOfDays, 'metric': 'aucm', 'number_of_experts': Settings.numberOfExperts, 'data_type': dataType, 'test_data_days': 1} data['value'] = classifier.getAUCM(TestDocuments(currentTime=currentDay+timedelta(days=1), numberOfExperts=Settings.numberOfExperts, dataType=dataType, noOfDays=1).iterator()) Utilities.writeAsJsonToFile(data, Settings.stats_to_compare_different_document_types) currentDay+=timedelta(days=1)
def generateStatsForDiminishingAUCM(): currentDay = datetime(2011, 3, 26) for i in range(5): print currentDay try: testDay = currentDay+timedelta(days=1) noOfDays = [1, 4, 8] for daysInFuture in range(1, 20): for noOfDay in noOfDays: classifier = FixedWindowClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=noOfDay) classifier.load() data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'test_day': datetime.strftime(testDay, Settings.twitter_api_time_format), 'classifier_length': noOfDay, 'metric': 'aucm', 'number_of_experts': Settings.numberOfExperts, 'data_type': DocumentType.typeRuuslUnigram, 'test_data_days': 1, 'no_of_days_in_future': daysInFuture} data['value'] = classifier.getAUCM(TestDocuments(currentTime=testDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=1).iterator()) Utilities.writeAsJsonToFile(data, Settings.stats_for_diminishing_aucm) testDay+=timedelta(days=1) except: pass currentDay+=timedelta(days=1)