def main(): merged_data_tuple = get_merged() merged_data = merged_data_tuple[0] casesdataset = merged_data_tuple[1] deathsdataset = merged_data_tuple[2] # Parameters for location county = "King" state = "Washington" # For the county county_timeseries(casesdataset, "Cases", county, state) county_timeseries(deathsdataset, "Deaths", county, state) population_graphs(merged_data, place="US", type="Normalized", par="Density") # For the state state_timeseries(casesdataset, "Cases", state) state_timeseries(deathsdataset, "Deaths", state) # Plots the population graphs population_graphs(merged_data, place=state, type="Normalized", par="Density") get_statistics(merged_data, para='Density')
def filter_tags(tracks, tags, artist_threshold, directory=None): if directory is not None: util.mkdir_p(directory) tags_new_all = {} for category in tags: stats, total = statistics.get_statistics(category, tracks, tags) stats_filtered = stats[stats['artists'] >= artist_threshold] if directory is not None: statistics.write_statistics(category, stats_filtered, directory) tags_new_all[category] = set(stats_filtered['tag']) print("- {} tags: {} -> {}".format(category, len(stats), len(stats_filtered))) # TODO: replace with filter_subset tracks_to_delete = [] for track_id, track in tracks.items(): total_tags = 0 for category, tags_new in tags_new_all.items(): track[category] &= tags_new total_tags += len(track[category]) if total_tags == 0: tracks_to_delete.append(track_id) for track in tracks_to_delete: tracks.pop(track)
def test_comments_counter(): stats = get_statistics(comments, []) assert (stats["UCv3WZQIAXeprUopgMDWLvmQ"]["UC5DqQh9__HKLd_HpDAXxsVw"] ["verpkNic3SM"] == 2) assert (stats["UCSTJ4D8krCXQLq3_-V9ZYWg"]["UC5DqQh9__HKLd_HpDAXxsVw"] ["verpkNic3SM"] == 1)
def _write_overall_stats(tpms, tp_tpms, options): stats = t.get_stats(tpms, tp_tpms, statistics.get_statistics()) _add_parameter_values_to_stats(stats) stats_file_name = statistics.get_stats_file( ".", options[OUT_FILE_BASENAME]) statistics.write_stats_data(stats_file_name, stats, index=False)
def _write_stratified_stats(tpms, tp_tpms, non_zero, options): clsfr_stats = {} for classifier in classifiers.get_classifiers(): if classifier.produces_grouped_stats(): column_name = classifier.get_column_name() stats = t.get_grouped_stats( tpms, tp_tpms, column_name, statistics.get_statistics()) _add_parameter_values_to_stats(stats) clsfr_stats[classifier] = stats stats_file_name = statistics.get_stats_file( ".", options[OUT_FILE_BASENAME], classifier) statistics.write_stats_data(stats_file_name, stats) elif classifier.produces_distribution_plots(): for ascending in [True, False]: stats = t.get_distribution_stats( non_zero, tp_tpms, classifier, ascending) _add_parameter_values_to_stats(stats) stats_file_name = statistics.get_stats_file( ".", options[OUT_FILE_BASENAME], classifier, ascending) statistics.write_stats_data( stats_file_name, stats, index=False) return clsfr_stats
async def main(api_key: str, video_date: date): """ Запустить алгоритм выгрузки и анализа :param api_key: google API ключ :param video_date: отсечка по дате публикации видео :return: """ async with ClientSession() as session: # Взять список ботов bot_list_fetcher = AntiIraApi(session) bot_list = [bot.user for bot in await bot_list_fetcher.get_bot_list()] youtube_api = YouTubeApi(api_key, session) # Взять список каналов для анализа channels = AntiIraApi.get_channels_list() video_datetime = datetime(video_date.year, video_date.month, video_date.day) logging.info("Fetch channels...") # Скачать идентификаторы видео для каждого канала tasks = [ youtube_api.list_videos_by_channel(channel, video_datetime) for channel in channels ] videos_groups = await asyncio.gather(*tasks) for channel, videos_in_channel in zip(channels, videos_groups): logging.info( "Channel " + channel + " has " + str(len(videos_in_channel)) + " videos" ) videos: Iterable[ChannelVideo] = chain(*videos_groups) # Теперь добавить для анализа видео из списка videos.txt additional_video_ids = AntiIraApi.get_videos_list() # (и убрать из списка уже найденные на каналах видео) additional_video_ids = set(additional_video_ids) - { video.code for video in videos } videos = chain( videos, await youtube_api.list_videos_by_ids(additional_video_ids) ) logging.info("Fetch comments...") # Теперь найти комментарии под каждым видео tasks = [ youtube_api.list_comments_full_list(video.code, video.channel) for video in videos ] comments: Iterable[Comment] = chain(*await asyncio.gather(*tasks)) # Теперь собрать статистику по комментариям! stat = get_statistics(comments, bot_list) # И экспортировать её export_statistics(stat, datetime.now().strftime("stat_%Y-%m-%d_%H%M%S.csv")) logging.info("Done")
def test_export_to_csv(mocker): stats = get_statistics(comments, []) mocked_file = mocker.patch("builtins.open", mocker.mock_open()) export_statistics(stats, "fake.csv") mocked_file.assert_called_with("fake.csv", "w", encoding="utf-8") mocked_file().write.assert_any_call( "user_id\tchannel\tvideo\tcomments\r\n") mocked_file().write.assert_any_call( "UCv3WZQIAXeprUopgMDWLvmQ\tUC5DqQh9__HKLd_HpDAXxsVw\t\t2\r\n") mocked_file().write.assert_any_call( "UCSTJ4D8krCXQLq3_-V9ZYWg\tUC5DqQh9__HKLd_HpDAXxsVw\t\t1\r\n")
def filter_tags(tracks, tags, artist_threshold, directory=None): if directory is not None: util.mkdir_p(directory) tags_new_all = {} for category in tags: stats, total = statistics.get_statistics(category, tracks, tags) stats_filtered = stats[stats['artists'] >= artist_threshold] if directory is not None: statistics.write_statistics(category, stats_filtered, directory) tags_new_all[category] = set(stats_filtered['tag']) print("- {} tags: {} -> {}".format(category, len(stats), len(stats_filtered))) filter_subset(tracks, tags_new_all)
def filter_tags(tracks, tags, tag_threshold, directory=None, tags_file=None): if directory is not None: util.mkdir_p(directory) # TODO: refactor to properly handle and not disconnect category+tag tags_merged = {} tags_with_prefix = {} for category, category_tags in tags.items(): tags_merged.update(category_tags) if tags_file is not None: tags_with_prefix.update({ tag: category + commons.TAG_HYPHEN + tag for tag in category_tags }) stats, total = statistics.get_statistics('all', tracks, {'all': tags_merged}) stats = stats.sort_values(by='tracks', ascending=False) stats_filtered = stats[:tag_threshold] if directory is not None: statistics.write_statistics('all', stats_filtered, directory) if tags_file is not None: tag_list = stats_filtered['tag'].replace( tags_with_prefix).sort_values() tag_list.to_csv(tags_file, sep='\t', index=False, header=False) tags_top = set(stats_filtered['tag']) tracks_to_delete = [] for track_id, track in tracks.items(): total_tags = 0 for category in commons.CATEGORIES: track[category] &= tags_top total_tags += len(track[category]) if total_tags == 0: tracks_to_delete.append(track_id) for track in tracks_to_delete: tracks.pop(track)
size=num_terminals, replace=False) poi = terminals[0] # Spiders start_time = time.clock() alg = Spiders(graph=graph, terminals=terminals[1:], poi=poi, contracted_graph=suitability_graph, dist_paths_suitable_nodes= dist_paths_contracted_graph) steiner_tree, _ = alg.steiner_tree() cost, node_cost, num_suitable_nodes, num_steiner_nodes, num_leaves = \ get_statistics(steiner_tree, terminals, generator) line = [ "Spiders", seed, size * size, total_num_suitable_nodes, num_terminals, dijkstra_time_contracted_graph, sample + 1, time.clock() - start_time, cost - node_cost, num_suitable_nodes, num_steiner_nodes, num_leaves ] line.extend(terminals) results.append(line) # Dreyfus algorithm IMR start_time = time.clock() # alg = DreyfusIMR(graph, terminals, dist_paths) alg = DreyfusIMR(
# Загрузка сохраненной игры if savegame.load_save(player_name) is None: savegame.save(player_name) # Если сохраненной игры нет, создать ее save_data = savegame.load_save(player_name) _game_field = save_data.game_field curr_ball = save_data.curr_ball next_ball = save_data.next_ball is_ball_flying = save_data.is_ball_flying launched_ball = save_data.launched_ball misses_remaining = save_data.misses_remaining curr_score = save_data.curr_score curr_game_time = save_data.curr_game_time # Загрузка статистики if statistics.get_statistics(player_name) is None: statistics.add_statistics(player_name) # Если статистики нет, создать ее player_statistics = statistics.get_statistics(player_name) # Применение сложности difficulty = int(settings.get_settings().get('difficulty')) if difficulty == 0: colors = ('red', 'yellow', 'green') elif difficulty == 1: colors = ('red', 'yellow', 'green', 'blue', 'purple') elif difficulty == 2: colors = ('red', 'orange', 'yellow', 'green', 'blue', 'light_blue', 'purple') else: colors = ('red', 'yellow', 'green', 'blue', 'purple') medium_font = pygame.font.SysFont('Comic Sans MS', 20)
from deap import algorithms from toolbox import get_toolbox from statistics import get_statistics, get_stats_from_best_ind from storage import save_individual from constants import NGEN, NPOP, CXPB, MUTPB from memory_store import all_individuals, add_inds toolbox = get_toolbox() stats = get_statistics() best_individual = get_stats_from_best_ind() pop = toolbox.population(n=NPOP) for i in pop: i.fitness.values = toolbox.evaluate(i) # arquivo save_individual(i.fitness.values, i) # memoria # add_inds([i.fitness.values, i]) algorithms.eaSimple(pop, toolbox, cxpb=CXPB, mutpb=MUTPB, ngen=NGEN, stats=stats, halloffame=best_individual, verbose=True) print("Best ind: {}".format(best_individual[0]))
def retrieve_statistics(name, language): return get_statistics(name, language)
def test_ignore_comments_of_not_specified_users(): stats = get_statistics(comments, use_only_users=["UCSTJ4D8krCXQLq3_-V9ZYWg"]) assert len(stats.keys()) == 1 assert list(stats.keys())[0] == "UCSTJ4D8krCXQLq3_-V9ZYWg"
def test_ignore_comments_of_specified_users(): stats = get_statistics(comments, ignore_users=["UCv3WZQIAXeprUopgMDWLvmQ"]) assert "UCv3WZQIAXeprUopgMDWLvmQ" not in stats
def test_ignore_channel_comments_on_itself(): stats = get_statistics(comments, []) assert "UC5DqQh9__HKLd_HpDAXxsVw" not in stats
async def main( api_key: str, video_date: date, bot_groups: List[str], ignore_bots: bool, export_videos: bool = False, ): """ Запустить алгоритм выгрузки и анализа :param api_key: google API ключ :param video_date: отсечка по дате публикации видео :param bot_groups: группы ботов, статистику по которым нужно собрать :param ignore_bots: группы ботов, которых нужно игнорировать в статистике :param export_videos: экспортировать статистику по видео :return: """ async with ClientSession() as session: # Взять список ботов bot_list_fetcher = AntiIraApi(session) bot_list = [ bot.user for bot in await bot_list_fetcher.get_bot_list(bot_groups) ] youtube_api = YouTubeApi(api_key, session) # Взять список каналов для анализа channels = AntiIraApi.get_channels_list() video_datetime = datetime(video_date.year, video_date.month, video_date.day) logging.info("Fetch channels...") # Скачать идентификаторы видео для каждого канала tasks = [ youtube_api.list_videos_by_channel(channel, video_datetime) for channel in channels ] videos_groups = await asyncio.gather(*tasks) for channel, videos_in_channel in zip(channels, videos_groups): logging.info("Channel " + channel + " has " + str(len(videos_in_channel)) + " videos") videos: Iterable[ChannelVideo] = chain(*videos_groups) # Теперь добавить для анализа видео из списка videos.txt additional_video_ids = AntiIraApi.get_videos_list() # TODO: убирать из списка уже найденные на каналах видео videos = chain( videos, await youtube_api.list_videos_by_ids(additional_video_ids)) logging.info("Fetch comments...") # Теперь найти комментарии под каждым видео tasks = [ youtube_api.list_comments_full_list(video.code, video.channel, Settings.comments_limit()) for video in videos ] comments: Iterable[Comment] = list( chain(*await asyncio.gather(*tasks))) # Теперь собрать статистику по комментариям! if ignore_bots: stat = get_statistics(comments, ignore_users=bot_list) else: stat = get_statistics(comments, use_only_users=bot_list) # И экспортировать её export_statistics( stat, datetime.now().strftime("stat_%Y-%m-%d_%H%M%S.csv"), export_videos=export_videos, ) # И ещё сохранить сами тексты комментариев export_comments_text_statistics( get_comments_text_statistics(comments), datetime.now().strftime("comments_%Y-%m-%d_%H%M%S.csv"), ) logging.info("Done")
import prepare_database as prep import statistics as stat import classifier as c import pandas as pd output = "prepared.csv" prep.prepare_db("health_insurance.csv", output) df = pd.read_csv(output) stat.get_statistics(df) classifier = c.Classifier(df, 0.7, 'Response') knn3 = classifier.knn(3) knn5 = classifier.knn(5) knn11 = classifier.knn(11) dtc = classifier.dtc() bayes = classifier.gnb() neural = classifier.neural() forest = classifier.random_forrest() c.generate_diff_chart(dtc, knn3, knn5, knn11, bayes, neural, forest)