def get(self, project_name: str) -> Optional[Project]: normalized_name = ProjectName(project_name).name project_data = self._client.projects.find_one({"name": normalized_name}) if project_data is None: return None project = Project(ProjectName(project_data["name"]), Downloads(project_data["total_downloads"])) if "downloads" in project_data: downloads = sorted(project_data["downloads"].items(), key=lambda x: x[0]) for iso_date, version_downloads in downloads: for r in version_downloads: date = datetime.date.fromisoformat(iso_date) version = r[0] project.add_downloads(date, version, DayDownloads(r[1], 0)) project._repository_saved_downloads.add((iso_date, version)) # Don't count the downloads twice project.total_downloads -= Downloads(r[1]) else: raw_downloads = self._client.project_downloads.find({"project": normalized_name}) downloads = sorted(raw_downloads, key=lambda x: x["date"]) for day_downloads in downloads: for version_downloads in day_downloads["downloads"]: pip_downlods = version_downloads["pip_downloads"] if "pip_downlods" in version_downloads else 0 project.add_downloads( datetime.date.fromisoformat(day_downloads["date"]), version_downloads["version"], DayDownloads(version_downloads["downloads"], pip_downlods), ) # Don't count the downloads twice project.total_downloads -= Downloads(version_downloads["downloads"]) return project
def generate(self, project_name: str, period: str, left_color: str, right_color: str, left_text: str, units: str) -> Badge: project = self._project_repository.get(project_name) if project is None: raise ProjectNotFoundException(project_name) badge_data = PersonalizedBadge( ProjectName(project_name), BadgePeriod[period], BadgeStyle( left_color=BadgeColor(left_color), right_color=BadgeColor(right_color), left_text=left_text, units=BadgeUnits[units], ), ) downloads = self._get_downloads(project, badge_data.period, badge_data.style.units) s = badge( left_text=badge_data.style.left_text, right_text=downloads, left_color=badge_data.style.left_color.value, right_color=badge_data.style.right_color.value, ) return Badge(badge_data.name.name, s)
def handle(self, cmd: ImportDownloadsFile): reader = csv.reader(cmd.file, delimiter=",") next(reader) projects = [ Project(ProjectName(r[0]), Downloads(r[1])) for r in reader ] self._project_repository.save_projects(projects)
def test_find_project_ignoring_case(project_repository: DBProjectRepository, project_view: DBProjectView): project = ProjectStub.create(name=ProjectName("pepy")) project_repository.save_projects([project]) result = project_view.find("PEpy") expected = ProjectProjection("pepy", project.downloads.value, []) assert result == expected
def test_find_project_ignoring_white_spaces(project_repository: DBProjectRepository, project_view: DBProjectView): project = ProjectStub.create(name=ProjectName('pepy')) project_repository.save_projects([project]) result = project_view.find(' pepy ') expected = ProjectProjection('pepy', project.downloads.value, []) assert result == expected
def project_action(project_name): project_name = ProjectName(project_name) project = container.project_provider.find(project_name) downloads = container.project_provider.last_downloads(project_name) return render_template("project.html", project=project, downloads=downloads)
def handle(self, cmd: UpdateVersionDownloads): if not self._admin_password_checker.check(cmd.password): self._logger.info("Invalid password") raise InvalidAdminPassword(cmd.password) self._logger.info(f"Getting downloads from date {cmd.date}...") stats_result = self._stats_viewer.get_version_downloads(cmd.date) self._logger.info( f"Retrieved {stats_result.total_rows} downloads. Saving to db...") start_time = timeit.default_timer() batch_iterator = 0 total_batches = int(stats_result.total_rows / 250) for batch in self._batch(stats_result.rows, 250): self._logger.info(f"Batch {batch_iterator} of {total_batches}") batch_iterator += 1 projects = {} for row in batch: if row.project in projects: project = projects.get(row.project) else: project = self._project_repository.get( row.project, downloads_from=cmd.date) if project is None: project = Project(ProjectName(row.project), Downloads(0)) project.add_downloads( row.date, row.version, DayDownloads(row.downloads, row.pip_downloads)) projects[row.project] = project self._project_repository.save_projects(list(projects.values())) end_time = timeit.default_timer() self._logger.info( f"Total downloads updated. Total time + {(end_time - start_time):.4f} seconds" )
def test_find_project_replacing_dots_with_dashes(project_repository: DBProjectRepository, project_view: DBProjectView): project = ProjectStub.create(name=ProjectName('pepy-rocks')) project_repository.save_projects([project]) result = project_view.find('pepy.rocks') expected = ProjectProjection('pepy-rocks', project.downloads.value, []) assert result == expected
def step_impl(context: Context, name: str): project = ProjectStub.create(ProjectName(name), Downloads(0)) for row in context.table: date = datetime.strptime(row["date"], "%Y-%m-%d").date() project.add_downloads(date, row["version"], Downloads(int(row["downloads"]))) context.container.project_repository.save(project)
def test_retrieve_project_with_old_format(mongo_client: MongoClient, repository: ProjectRepository): data = { "name": "climoji", "total_downloads": 1100, "downloads": { "2020-04-01": [["2.0", 30]], "2020-04-02": [["2.0", 10]], "2020-03-31": [["2.0", 40]], "2020-04-03": [["2.0", 30]], }, } query = {"name": "climoji"} mongo_client.pepy_test.projects.replace_one(query, data, upsert=True) result = repository.get("climoji") assert ProjectName("climoji") == result.name assert datetime.date(2020, 3, 31) == result.min_date assert Downloads(1100) == result.total_downloads expected_last_downloads = [ ProjectVersionDownloads(datetime.date(2020, 3, 31), "2.0", Downloads(40)), ProjectVersionDownloads(datetime.date(2020, 4, 1), "2.0", Downloads(30)), ProjectVersionDownloads(datetime.date(2020, 4, 2), "2.0", Downloads(10)), ProjectVersionDownloads(datetime.date(2020, 4, 3), "2.0", Downloads(30)), ] assert expected_last_downloads == result.last_downloads()
def test_retrieve_monthly_downloads(): project = Project(ProjectName("random"), Downloads(10)) with freezegun.freeze_time("2020-04-12"): project.add_downloads(date(2020, 3, 9), "0.0.1", Downloads(20)) project.add_downloads(date(2020, 4, 10), "0.0.1", Downloads(10)) project.add_downloads(date(2020, 4, 11), "0.0.1", Downloads(15)) assert project.month_downloads() == Downloads(25)
def test_retrieve_project_with_new_format(mongo_client: MongoClient, repository: ProjectRepository): data = { "name": "climoji", "total_downloads": 1100, } query = {"name": "climoji"} mongo_client.pepy_test.projects.replace_one(query, data, upsert=True) downloads_data = [ InsertOne({ "project": "climoji", "date": "2020-04-01", "downloads": [{ "version": "2.0", "downloads": 30 }] }), InsertOne({ "project": "climoji", "date": "2020-04-02", "downloads": [{ "version": "2.0", "downloads": 10 }] }), InsertOne({ "project": "climoji", "date": "2020-03-31", "downloads": [{ "version": "2.0", "downloads": 40 }] }), InsertOne({ "project": "climoji", "date": "2020-04-03", "downloads": [{ "version": "2.0", "downloads": 30 }] }), ] mongo_client.pepy_test.project_downloads.bulk_write(downloads_data) result = repository.get("climoji") assert ProjectName("climoji") == result.name assert datetime.date(2020, 3, 31) == result.min_date assert Downloads(1100) == result.total_downloads expected_last_downloads = [ ProjectVersionDownloads(datetime.date(2020, 3, 31), "2.0", Downloads(40)), ProjectVersionDownloads(datetime.date(2020, 4, 1), "2.0", Downloads(30)), ProjectVersionDownloads(datetime.date(2020, 4, 2), "2.0", Downloads(10)), ProjectVersionDownloads(datetime.date(2020, 4, 3), "2.0", Downloads(30)), ] assert expected_last_downloads == result.last_downloads()
def step_impl(context: Context): downloads = [] for row in context.table: date = datetime.strptime(row["date"], "%Y-%m-%d").date() downloads.append( ProjectDownloads(ProjectName(row["name"]), Downloads(row["downloads"]), date)) context.container.project_repository.save_day_downloads(downloads)
def find(self, project_name: ProjectName) -> Optional[Project]: with self._conn, self._conn.cursor() as cursor: cursor.execute( "SELECT name, downloads FROM projects WHERE name = %s", (project_name.name, )) data = cursor.fetchall() if len(data) == 0: return None return Project(ProjectName(data[0][0]), Downloads(data[0][1]))
def find_random_projects(self, nr_items: int = 10) -> List[Project]: with self._conn, self._conn.cursor() as cursor: cursor.execute( "SELECT name, downloads FROM projects ORDER BY random() LIMIT %s;", (nr_items, )) data = cursor.fetchall() return [ Project(ProjectName(row[0]), Downloads(row[1])) for row in data ]
def step_impl(context: Context, name: str): project_name = ProjectName(name) project = ProjectStub.create(project_name) context.container.project_repository.save_projects([project]) downloads = [] for row in context.table: date = datetime.strptime(row['date'], '%Y-%m-%d').date() downloads.append( ProjectDownloads(project_name, Downloads(row['downloads']), date)) context.container.project_repository.save_day_downloads(downloads)
def test_add_downloads_to_project(): project = Project(ProjectName("random"), Downloads(0)) date = datetime.now().date() version = "2.3.1" day_downloads = Downloads(10) project.add_downloads(date, version, day_downloads) assert project.total_downloads == day_downloads assert project.last_downloads() == [ ProjectVersionDownloads(date, version, day_downloads) ] assert project.versions() == {version}
def test_project_replace_downloads(): project = Project(ProjectName("random"), Downloads(10)) date = datetime.now().date() version = "2.3.1" project.add_downloads(date, version, DayDownloads(25, 25)) project.add_downloads(date, version, DayDownloads(5, 5)) assert project.total_downloads == Downloads(15) assert project.last_downloads() == [ ProjectVersionDownloads(date, version, Downloads(5), Downloads(5)) ] assert project.versions() == {version}
def test_update_min_date_when_no_other_downloads(): project = Project(ProjectName("random"), Downloads(10)) project.add_downloads(date(2019, 3, 9), "0.0.6", Downloads(20)) project.add_downloads(date(2020, 4, 10), "0.0.2", Downloads(10)) project.add_downloads(date(2020, 4, 10), "0.0.4", Downloads(10)) assert project.total_downloads == Downloads(50) assert project.last_downloads() == [ ProjectVersionDownloads(date(2020, 4, 10), "0.0.2", Downloads(10)), ProjectVersionDownloads(date(2020, 4, 10), "0.0.4", Downloads(10)), ] assert project.versions() == {"0.0.6", "0.0.2", "0.0.4"} assert project.min_date == date(2020, 4, 10)
def step_impl(context: Context, name: str): project_name = ProjectName(name) downloads = [] total_downloads = 0 for row in context.table: date = datetime.strptime(row["date"], "%Y-%m-%d").date() total_downloads += int(row["downloads"]) downloads.append( ProjectDownloads(project_name, Downloads(row["downloads"]), date)) project = ProjectStub.create(project_name, Downloads(total_downloads)) context.container.project_repository.save_projects([project]) context.container.project_repository.save_day_downloads(downloads)
def test_save_many_projects_with_new_format(mongo_client: MongoClient, repository: ProjectRepository): project = Project(ProjectName("climoji"), Downloads(100)) project.add_downloads(datetime.date(2020, 3, 31), "2.0", DayDownloads(40, 10)) project.add_downloads(datetime.date(2020, 3, 31), "2.0.1", DayDownloads(30, 10)) project.add_downloads(datetime.date(2020, 4, 1), "2.0", DayDownloads(20, 10)) repository.save_projects([project]) data = mongo_client.pepy_test.projects.find_one( {"name": project.name.name}) expected_data = { "name": "climoji", "total_downloads": 190, "monthly_downloads": 0 } for key, value in expected_data.items(): assert key in data assert value == data[key] downloads_data = sorted(mongo_client.pepy_test.project_downloads.find( {"project": project.name.name}), key=lambda x: x["date"]) expected_downloads_data = [ { "project": "climoji", "date": "2020-03-31", "downloads": [{ "version": "2.0", "downloads": 40 }, { "version": "2.0.1", "downloads": 30 }], }, { "project": "climoji", "date": "2020-04-01", "downloads": [{ "version": "2.0", "downloads": 20 }] }, ] assert len(expected_downloads_data) == len(downloads_data) for i in range(len(expected_downloads_data)): for key, value in expected_downloads_data[i].items(): assert key in downloads_data[i] assert value == downloads_data[i][key]
def test_remove_old_data(): project = Project(ProjectName("random"), Downloads(10)) old_date = datetime.now().date() - timedelta(days=181) limit_date = datetime.now().date() - timedelta(days=180) now_date = datetime.now().date() project.add_downloads(old_date, "2.3.1", Downloads(10)) project.add_downloads(limit_date, "2.3.0", Downloads(20)) project.add_downloads(now_date, "2.3.2", Downloads(30)) assert project.total_downloads == Downloads(70) assert project.last_downloads() == [ ProjectVersionDownloads(limit_date, "2.3.0", Downloads(20)), ProjectVersionDownloads(now_date, "2.3.2", Downloads(30)), ] assert {"2.3.0", "2.3.2"}.issubset(project.versions())
def test_filter_date(): project = Project(ProjectName("random"), Downloads(10)) project.add_downloads(date(2020, 3, 9), "0.0.6", Downloads(20)) project.add_downloads(date(2020, 4, 10), "0.0.2", Downloads(10)) project.add_downloads(date(2020, 4, 10), "0.0.4", Downloads(10)) project.add_downloads(date(2020, 4, 11), "0.0.4", Downloads(10)) assert project.total_downloads == Downloads(60) assert project.last_downloads(date(2020, 4, 10)) == [ ProjectVersionDownloads(date(2020, 4, 10), "0.0.2", Downloads(10)), ProjectVersionDownloads(date(2020, 4, 10), "0.0.4", Downloads(10)), ProjectVersionDownloads(date(2020, 4, 11), "0.0.4", Downloads(10)), ] assert project.versions() == {"0.0.6", "0.0.2", "0.0.4"} assert project.min_date == date(2020, 3, 9)
def handle(self, cmd: ImportTotalDownloads): for batch_iterator, batch in enumerate(self._batch(cmd.file_path, 250), start=1): self._logger.info(f"Batch {batch_iterator}") projects = {} for row in batch: if row.project in projects: project = projects.get(row.project) else: project = self._project_repository.get(row.project) if project is None: project = Project(ProjectName(row.project), Downloads(0)) project.total_downloads = Downloads(row.total_downloads) projects[row.project] = project self._project_repository.save_projects(list(projects.values()))
def get_downloads(self, date: datetime.date) -> List[ProjectDownloads]: QUERY = """ SELECT file.project as name, count(*) AS downloads FROM `the-psf.pypi.downloads{}` GROUP BY file.project """.format(date.strftime("%Y%m%d")) query_job = self.client.query(QUERY, location='US') query_result = query_job.result(self.TIMEOUT) result = [] for row in query_result: name = ProjectName(row['name']) downloads = Downloads(row['downloads']) result.append(ProjectDownloads(name, downloads, date)) return result
def get(self, project_name: str) -> Optional[Project]: project_data = self._client.projects.find_one( {"name": project_name.strip().lower()}) if project_data is None: return None project = Project(ProjectName(project_data["name"]), Downloads(project_data["total_downloads"])) downloads = sorted(project_data["downloads"].items(), key=lambda x: x[0]) for date, version_downloads in downloads: for r in version_downloads: project.add_downloads(datetime.date.fromisoformat(date), r[0], Downloads(r[1])) # Don't count the downloads twice project.total_downloads -= Downloads(r[1]) return project
def handle(self, cmd: UpdateVersionDownloads): if not self._admin_password_checker.check(cmd.password): self._logger.info("Invalid password") raise InvalidAdminPassword(cmd.password) self._logger.info(f"Getting downloads from date {cmd.date}...") stats_result = self._stats_viewer.get_version_downloads(cmd.date) self._logger.info(f"Retrieved {stats_result.total_rows} downloads. Saving to db...") start_time = timeit.default_timer() for batch in self._batch(stats_result.rows, 1_000): projects = {} for row in batch: project = None if row.project in projects: project = projects.get(row.project) else: project = self._project_repository.get(row.project) if project is None: project = Project(ProjectName(row.project), Downloads(0)) project.add_downloads(row.date, row.version, Downloads(row.downloads)) projects[row.project] = project self._project_repository.save_projects(list(projects.values()))
def test_add_project(): project = Project(ProjectName("random"), Downloads(0)) assert project.last_downloads() == []
def test_project_name_strip_spaces(): project = ProjectName(" some-project ") assert "some-project" == project.name
def badge_action(project_name): badge = container.badge_query.generate_badge(ProjectName(project_name)) return Response(badge.image, mimetype="image/svg+xml", headers={"Cache-Control": "max-age=86400"})