def handle(self, cmd: UpdateVersionDownloads): if not self._admin_password_checker.check(cmd.password): self._logger.info("Invalid password") raise InvalidAdminPassword(cmd.password) self._logger.info(f"Getting downloads from date {cmd.date}...") stats_result = self._stats_viewer.get_version_downloads(cmd.date) self._logger.info( f"Retrieved {stats_result.total_rows} downloads. Saving to db...") start_time = timeit.default_timer() batch_iterator = 0 total_batches = int(stats_result.total_rows / 250) for batch in self._batch(stats_result.rows, 250): self._logger.info(f"Batch {batch_iterator} of {total_batches}") batch_iterator += 1 projects = {} for row in batch: if row.project in projects: project = projects.get(row.project) else: project = self._project_repository.get( row.project, downloads_from=cmd.date) if project is None: project = Project(ProjectName(row.project), Downloads(0)) project.add_downloads( row.date, row.version, DayDownloads(row.downloads, row.pip_downloads)) projects[row.project] = project self._project_repository.save_projects(list(projects.values())) end_time = timeit.default_timer() self._logger.info( f"Total downloads updated. Total time + {(end_time - start_time):.4f} seconds" )
def transform_project_v2(project: Project) -> Dict: day_downloads = defaultdict(lambda: defaultdict(int)) for d in project.last_downloads(): day_downloads[d.date.isoformat()][d.version] = d.downloads.value return { "id": project.name.name, "total_downloads": project.total_downloads.value, "versions": natsorted(list(project.versions())), "downloads": day_downloads, }
def transform_project_v2(project: Project) -> Dict: day_downloads = defaultdict(lambda: defaultdict(int)) month_ago = datetime.now().date() - timedelta(days=90) last_downloads = project.last_downloads(month_ago) for d in last_downloads: day_downloads[d.date.isoformat()][d.version] = d.downloads.value return { "id": project.name.name, "total_downloads": project.total_downloads.value, "versions": natsorted(list(project.versions())), "downloads": day_downloads, }
def test_remove_old_data(): project = Project(ProjectName("random"), Downloads(10)) old_date = datetime.now().date() - timedelta(days=181) limit_date = datetime.now().date() - timedelta(days=180) now_date = datetime.now().date() project.add_downloads(old_date, "2.3.1", Downloads(10)) project.add_downloads(limit_date, "2.3.0", Downloads(20)) project.add_downloads(now_date, "2.3.2", Downloads(30)) assert project.total_downloads == Downloads(70) assert project.last_downloads() == [ ProjectVersionDownloads(limit_date, "2.3.0", Downloads(20)), ProjectVersionDownloads(now_date, "2.3.2", Downloads(30)), ] assert {"2.3.0", "2.3.2"}.issubset(project.versions())
def test_filter_date(): project = Project(ProjectName("random"), Downloads(10)) project.add_downloads(date(2020, 3, 9), "0.0.6", Downloads(20)) project.add_downloads(date(2020, 4, 10), "0.0.2", Downloads(10)) project.add_downloads(date(2020, 4, 10), "0.0.4", Downloads(10)) project.add_downloads(date(2020, 4, 11), "0.0.4", Downloads(10)) assert project.total_downloads == Downloads(60) assert project.last_downloads(date(2020, 4, 10)) == [ ProjectVersionDownloads(date(2020, 4, 10), "0.0.2", Downloads(10)), ProjectVersionDownloads(date(2020, 4, 10), "0.0.4", Downloads(10)), ProjectVersionDownloads(date(2020, 4, 11), "0.0.4", Downloads(10)), ] assert project.versions() == {"0.0.6", "0.0.2", "0.0.4"} assert project.min_date == date(2020, 3, 9)
def handle(self, cmd: ImportTotalDownloads): for batch_iterator, batch in enumerate(self._batch(cmd.file_path, 250), start=1): self._logger.info(f"Batch {batch_iterator}") projects = {} for row in batch: if row.project in projects: project = projects.get(row.project) else: project = self._project_repository.get(row.project) if project is None: project = Project(ProjectName(row.project), Downloads(0)) project.total_downloads = Downloads(row.total_downloads) projects[row.project] = project self._project_repository.save_projects(list(projects.values()))
def get(self, project_name: str) -> Optional[Project]: project_data = self._client.projects.find_one( {"name": project_name.strip().lower()}) if project_data is None: return None project = Project(ProjectName(project_data["name"]), Downloads(project_data["total_downloads"])) downloads = sorted(project_data["downloads"].items(), key=lambda x: x[0]) for date, version_downloads in downloads: for r in version_downloads: project.add_downloads(datetime.date.fromisoformat(date), r[0], Downloads(r[1])) # Don't count the downloads twice project.total_downloads -= Downloads(r[1]) return project
def handle(self, cmd: ImportDownloadsFile): reader = csv.reader(cmd.file, delimiter=",") next(reader) projects = [ Project(ProjectName(r[0]), Downloads(r[1])) for r in reader ] self._project_repository.save_projects(projects)
def _last_downloads(project: Project, days: int) -> Downloads: min_date = datetime.now().date() - timedelta(days=days) total_downloads = sum(d.downloads.value for d in project.last_downloads() if d.date >= min_date) return Downloads(total_downloads)
def test_retrieve_monthly_downloads(): project = Project(ProjectName("random"), Downloads(10)) with freezegun.freeze_time("2020-04-12"): project.add_downloads(date(2020, 3, 9), "0.0.1", Downloads(20)) project.add_downloads(date(2020, 4, 10), "0.0.1", Downloads(10)) project.add_downloads(date(2020, 4, 11), "0.0.1", Downloads(15)) assert project.month_downloads() == Downloads(25)
def find(self, project_name: ProjectName) -> Optional[Project]: with self._conn, self._conn.cursor() as cursor: cursor.execute( "SELECT name, downloads FROM projects WHERE name = %s", (project_name.name, )) data = cursor.fetchall() if len(data) == 0: return None return Project(ProjectName(data[0][0]), Downloads(data[0][1]))
def handle(self, cmd: UpdateDownloads): if not self._admin_password_checker.check(cmd.password): raise InvalidAdminPassword(cmd.password) pd = self._downloads_extractor.get_downloads(cmd.date) # Add new projects if they don't exist before self._project_repository.save_projects( [Project(p.name, Downloads(0)) for p in pd]) self._project_repository.save_day_downloads(pd) self._project_repository.update_downloads(pd)
def find_random_projects(self, nr_items: int = 10) -> List[Project]: with self._conn, self._conn.cursor() as cursor: cursor.execute( "SELECT name, downloads FROM projects ORDER BY random() LIMIT %s;", (nr_items, )) data = cursor.fetchall() return [ Project(ProjectName(row[0]), Downloads(row[1])) for row in data ]
def test_project_replace_downloads(): project = Project(ProjectName("random"), Downloads(10)) date = datetime.now().date() version = "2.3.1" project.add_downloads(date, version, DayDownloads(25, 25)) project.add_downloads(date, version, DayDownloads(5, 5)) assert project.total_downloads == Downloads(15) assert project.last_downloads() == [ ProjectVersionDownloads(date, version, Downloads(5), Downloads(5)) ] assert project.versions() == {version}
def test_update_min_date_when_no_other_downloads(): project = Project(ProjectName("random"), Downloads(10)) project.add_downloads(date(2019, 3, 9), "0.0.6", Downloads(20)) project.add_downloads(date(2020, 4, 10), "0.0.2", Downloads(10)) project.add_downloads(date(2020, 4, 10), "0.0.4", Downloads(10)) assert project.total_downloads == Downloads(50) assert project.last_downloads() == [ ProjectVersionDownloads(date(2020, 4, 10), "0.0.2", Downloads(10)), ProjectVersionDownloads(date(2020, 4, 10), "0.0.4", Downloads(10)), ] assert project.versions() == {"0.0.6", "0.0.2", "0.0.4"} assert project.min_date == date(2020, 4, 10)
def test_save_many_projects_with_new_format(mongo_client: MongoClient, repository: ProjectRepository): project = Project(ProjectName("climoji"), Downloads(100)) project.add_downloads(datetime.date(2020, 3, 31), "2.0", DayDownloads(40, 10)) project.add_downloads(datetime.date(2020, 3, 31), "2.0.1", DayDownloads(30, 10)) project.add_downloads(datetime.date(2020, 4, 1), "2.0", DayDownloads(20, 10)) repository.save_projects([project]) data = mongo_client.pepy_test.projects.find_one( {"name": project.name.name}) expected_data = { "name": "climoji", "total_downloads": 190, "monthly_downloads": 0 } for key, value in expected_data.items(): assert key in data assert value == data[key] downloads_data = sorted(mongo_client.pepy_test.project_downloads.find( {"project": project.name.name}), key=lambda x: x["date"]) expected_downloads_data = [ { "project": "climoji", "date": "2020-03-31", "downloads": [{ "version": "2.0", "downloads": 40 }, { "version": "2.0.1", "downloads": 30 }], }, { "project": "climoji", "date": "2020-04-01", "downloads": [{ "version": "2.0", "downloads": 20 }] }, ] assert len(expected_downloads_data) == len(downloads_data) for i in range(len(expected_downloads_data)): for key, value in expected_downloads_data[i].items(): assert key in downloads_data[i] assert value == downloads_data[i][key]
def handle(self, cmd: UpdateVersionDownloads): if not self._admin_password_checker.check(cmd.password): self._logger.info("Invalid password") raise InvalidAdminPassword(cmd.password) self._logger.info(f"Getting downloads from date {cmd.date}...") stats_result = self._stats_viewer.get_version_downloads(cmd.date) self._logger.info(f"Retrieved {stats_result.total_rows} downloads. Saving to db...") start_time = timeit.default_timer() for batch in self._batch(stats_result.rows, 1_000): projects = {} for row in batch: project = None if row.project in projects: project = projects.get(row.project) else: project = self._project_repository.get(row.project) if project is None: project = Project(ProjectName(row.project), Downloads(0)) project.add_downloads(row.date, row.version, Downloads(row.downloads)) projects[row.project] = project self._project_repository.save_projects(list(projects.values()))
def _get_downloads(self, project: Project, period: BadgePeriod, units: BadgeUnits) -> str: if period == BadgePeriod.total: downloads = project.total_downloads elif period == BadgePeriod.month: downloads = project.month_downloads() elif period == BadgePeriod.week: downloads = self._last_downloads(project, 7) else: raise Exception(f"{period} not valid") return self._downloads_formatter.format_with_units(downloads, units)
def get(self, project_name: str) -> Optional[Project]: normalized_name = ProjectName(project_name).name project_data = self._client.projects.find_one({"name": normalized_name}) if project_data is None: return None project = Project(ProjectName(project_data["name"]), Downloads(project_data["total_downloads"])) if "downloads" in project_data: downloads = sorted(project_data["downloads"].items(), key=lambda x: x[0]) for iso_date, version_downloads in downloads: for r in version_downloads: date = datetime.date.fromisoformat(iso_date) version = r[0] project.add_downloads(date, version, DayDownloads(r[1], 0)) project._repository_saved_downloads.add((iso_date, version)) # Don't count the downloads twice project.total_downloads -= Downloads(r[1]) else: raw_downloads = self._client.project_downloads.find({"project": normalized_name}) downloads = sorted(raw_downloads, key=lambda x: x["date"]) for day_downloads in downloads: for version_downloads in day_downloads["downloads"]: pip_downlods = version_downloads["pip_downloads"] if "pip_downlods" in version_downloads else 0 project.add_downloads( datetime.date.fromisoformat(day_downloads["date"]), version_downloads["version"], DayDownloads(version_downloads["downloads"], pip_downlods), ) # Don't count the downloads twice project.total_downloads -= Downloads(version_downloads["downloads"]) return project
def transform_project(project: Project) -> Dict: day_downloads = defaultdict(int) last_downloads = project.last_downloads() last_downloads.reverse() for d in last_downloads: day_downloads[d.date.isoformat()] += d.downloads.value downloads = {date: downloads for date, downloads in day_downloads.items()} return { "id": project.name.name, "total_downloads": project.total_downloads.value, "downloads": downloads, }
def transform_project(project: Project) -> Dict: day_downloads = defaultdict(int) month_ago = datetime.now().date() - timedelta(days=30) last_downloads = project.last_downloads(month_ago) last_downloads.reverse() for d in last_downloads: day_downloads[d.date.isoformat()] += d.downloads.value downloads = {date: downloads for date, downloads in day_downloads.items()} return { "id": project.name.name, "total_downloads": project.total_downloads.value, "downloads": downloads, }
def handle(self, cmd: UpdateDownloads): if not self._admin_password_checker.check(cmd.password): self._logger.info("Invalid password") raise InvalidAdminPassword(cmd.password) self._logger.info(f"Getting downloads from date {cmd.date}...") pd = self._downloads_extractor.get_downloads(cmd.date) self._logger.info(f"Retrieved {len(pd)} downloads. Saving to db...") # Add new projects if they don't exist before self._project_repository.save_projects( [Project(p.name, Downloads(0)) for p in pd]) self._logger.info("New projects saved") self._project_repository.save_day_downloads(pd) self._logger.info("Downloads saved") self._project_repository.update_downloads(pd) self._logger.info("Total downloads updated")
def _convert_downloads_to_raw(self, project: Project) -> dict: downloads_per_day = defaultdict(list) for download in project.last_downloads(): if not (download.date.isoformat(), download.version) in project._repository_saved_downloads: downloads_per_day[download.date.isoformat()].append( { "version": download.version, "downloads": download.downloads.value, "pip_downloads": download.pip_downloads.value, } ) result = {} for date, downloads in downloads_per_day.items(): result[date] = {"project": project.name.name, "date": date, "downloads": downloads} return result
def test_add_downloads_to_project(): project = Project(ProjectName("random"), Downloads(0)) date = datetime.now().date() version = "2.3.1" day_downloads = Downloads(10) project.add_downloads(date, version, day_downloads) assert project.total_downloads == day_downloads assert project.last_downloads() == [ ProjectVersionDownloads(date, version, day_downloads) ] assert project.versions() == {version}
def create(name: ProjectName = None, downloads: Downloads = None) -> Project: name = name or ProjectNameStub.create() downloads = downloads or DownloadsStub.create() return Project(name, downloads)
def test_add_project(): project = Project(ProjectName("random"), Downloads(0)) assert project.last_downloads() == []