def test_model(test_time_file, threshold = 10): df = pd.read_csv('./file/' + test_time_file, encoding="utf-8", parse_dates=True, lineterminator="\n") time_interval = time_explore.get_time_interval(test_time_file) print "test file time interval: " + str(time_interval) if time_interval == 0: time_interval = 1 test_background_model = background_model(new_time_interval = time_interval) test_background_model.read_data_frame(df) trained_background_model = background_model() trained_background_model.read_model_from_model_file() generator = key_burst(trained_background_model,test_background_model,threshold) hotwords = generator.detect_hot_words() hotwords_list = write_hotwords_to_file(hotwords) return hotwords_list
def generate_hotword_to_tweets(hotword_list, tweet_test_file): df_2 = pd.read_csv('./file/' + tweet_test_file, encoding="utf-8", parse_dates=True, lineterminator="\n") test_bm = background_model(new_time_interval=1) test_bm.read_data_frame(df_2) hotword_set = set(hotword_list) df_2.apply(lambda content: count(content, hotword_set), axis=1) return word_to_set
def test_model(test_time_file, threshold=10): df = pd.read_csv('./file/' + test_time_file, encoding="utf-8", parse_dates=True, lineterminator="\n") time_interval = time_explore.get_time_interval(test_time_file) print "test file time interval: " + str(time_interval) if time_interval == 0: time_interval = 1 test_background_model = background_model(new_time_interval=time_interval) test_background_model.read_data_frame(df) trained_background_model = background_model() trained_background_model.read_model_from_model_file() generator = key_burst(trained_background_model, test_background_model, threshold) hotwords = generator.detect_hot_words() hotwords_list = write_hotwords_to_file(hotwords) return hotwords_list
def train_save_model(train_time_file): df = pd.read_csv('./file/' + train_time_file, encoding="utf-8", parse_dates=True, lineterminator="\n") print "get the dataframe from train_file" time_interval = time_explore.get_time_interval(train_time_file) print "train file time interval: " + str(time_interval) # print "get the time interval" bm = background_model(new_time_interval = time_interval) print "initialize the background model" bm.read_data_frame(df) print "read in the dataframe" bm.write_model_to_model_file() print "write to file"
def train_save_model(train_time_file): df = pd.read_csv('./file/' + train_time_file, encoding="utf-8", parse_dates=True, lineterminator="\n") print "get the dataframe from train_file" time_interval = time_explore.get_time_interval(train_time_file) print "train file time interval: " + str(time_interval) # print "get the time interval" bm = background_model(new_time_interval=time_interval) print "initialize the background model" bm.read_data_frame(df) print "read in the dataframe" bm.write_model_to_model_file() print "write to file"