Beispiel #1
0
 def handle(self, cmd: UpdateVersionDownloads):
     if not self._admin_password_checker.check(cmd.password):
         self._logger.info("Invalid password")
         raise InvalidAdminPassword(cmd.password)
     self._logger.info(f"Getting downloads from date {cmd.date}...")
     stats_result = self._stats_viewer.get_version_downloads(cmd.date)
     self._logger.info(
         f"Retrieved {stats_result.total_rows} downloads. Saving to db...")
     start_time = timeit.default_timer()
     batch_iterator = 0
     total_batches = int(stats_result.total_rows / 250)
     for batch in self._batch(stats_result.rows, 250):
         self._logger.info(f"Batch {batch_iterator} of {total_batches}")
         batch_iterator += 1
         projects = {}
         for row in batch:
             if row.project in projects:
                 project = projects.get(row.project)
             else:
                 project = self._project_repository.get(
                     row.project, downloads_from=cmd.date)
             if project is None:
                 project = Project(ProjectName(row.project), Downloads(0))
             project.add_downloads(
                 row.date, row.version,
                 DayDownloads(row.downloads, row.pip_downloads))
             projects[row.project] = project
         self._project_repository.save_projects(list(projects.values()))
     end_time = timeit.default_timer()
     self._logger.info(
         f"Total downloads updated. Total time + {(end_time - start_time):.4f} seconds"
     )
Beispiel #2
0
def transform_project_v2(project: Project) -> Dict:
    day_downloads = defaultdict(lambda: defaultdict(int))
    for d in project.last_downloads():
        day_downloads[d.date.isoformat()][d.version] = d.downloads.value

    return {
        "id": project.name.name,
        "total_downloads": project.total_downloads.value,
        "versions": natsorted(list(project.versions())),
        "downloads": day_downloads,
    }
Beispiel #3
0
def transform_project_v2(project: Project) -> Dict:
    day_downloads = defaultdict(lambda: defaultdict(int))
    month_ago = datetime.now().date() - timedelta(days=90)
    last_downloads = project.last_downloads(month_ago)
    for d in last_downloads:
        day_downloads[d.date.isoformat()][d.version] = d.downloads.value

    return {
        "id": project.name.name,
        "total_downloads": project.total_downloads.value,
        "versions": natsorted(list(project.versions())),
        "downloads": day_downloads,
    }
Beispiel #4
0
def test_remove_old_data():
    project = Project(ProjectName("random"), Downloads(10))
    old_date = datetime.now().date() - timedelta(days=181)
    limit_date = datetime.now().date() - timedelta(days=180)
    now_date = datetime.now().date()
    project.add_downloads(old_date, "2.3.1", Downloads(10))
    project.add_downloads(limit_date, "2.3.0", Downloads(20))
    project.add_downloads(now_date, "2.3.2", Downloads(30))
    assert project.total_downloads == Downloads(70)
    assert project.last_downloads() == [
        ProjectVersionDownloads(limit_date, "2.3.0", Downloads(20)),
        ProjectVersionDownloads(now_date, "2.3.2", Downloads(30)),
    ]
    assert {"2.3.0", "2.3.2"}.issubset(project.versions())
Beispiel #5
0
def test_filter_date():
    project = Project(ProjectName("random"), Downloads(10))
    project.add_downloads(date(2020, 3, 9), "0.0.6", Downloads(20))
    project.add_downloads(date(2020, 4, 10), "0.0.2", Downloads(10))
    project.add_downloads(date(2020, 4, 10), "0.0.4", Downloads(10))
    project.add_downloads(date(2020, 4, 11), "0.0.4", Downloads(10))
    assert project.total_downloads == Downloads(60)
    assert project.last_downloads(date(2020, 4, 10)) == [
        ProjectVersionDownloads(date(2020, 4, 10), "0.0.2", Downloads(10)),
        ProjectVersionDownloads(date(2020, 4, 10), "0.0.4", Downloads(10)),
        ProjectVersionDownloads(date(2020, 4, 11), "0.0.4", Downloads(10)),
    ]
    assert project.versions() == {"0.0.6", "0.0.2", "0.0.4"}
    assert project.min_date == date(2020, 3, 9)
Beispiel #6
0
 def handle(self, cmd: ImportTotalDownloads):
     for batch_iterator, batch in enumerate(self._batch(cmd.file_path, 250),
                                            start=1):
         self._logger.info(f"Batch {batch_iterator}")
         projects = {}
         for row in batch:
             if row.project in projects:
                 project = projects.get(row.project)
             else:
                 project = self._project_repository.get(row.project)
             if project is None:
                 project = Project(ProjectName(row.project), Downloads(0))
             project.total_downloads = Downloads(row.total_downloads)
             projects[row.project] = project
         self._project_repository.save_projects(list(projects.values()))
Beispiel #7
0
 def get(self, project_name: str) -> Optional[Project]:
     project_data = self._client.projects.find_one(
         {"name": project_name.strip().lower()})
     if project_data is None:
         return None
     project = Project(ProjectName(project_data["name"]),
                       Downloads(project_data["total_downloads"]))
     downloads = sorted(project_data["downloads"].items(),
                        key=lambda x: x[0])
     for date, version_downloads in downloads:
         for r in version_downloads:
             project.add_downloads(datetime.date.fromisoformat(date), r[0],
                                   Downloads(r[1]))
             # Don't count the downloads twice
             project.total_downloads -= Downloads(r[1])
     return project
Beispiel #8
0
 def handle(self, cmd: ImportDownloadsFile):
     reader = csv.reader(cmd.file, delimiter=",")
     next(reader)
     projects = [
         Project(ProjectName(r[0]), Downloads(r[1])) for r in reader
     ]
     self._project_repository.save_projects(projects)
Beispiel #9
0
    def _last_downloads(project: Project, days: int) -> Downloads:
        min_date = datetime.now().date() - timedelta(days=days)
        total_downloads = sum(d.downloads.value
                              for d in project.last_downloads()
                              if d.date >= min_date)

        return Downloads(total_downloads)
Beispiel #10
0
def test_retrieve_monthly_downloads():
    project = Project(ProjectName("random"), Downloads(10))
    with freezegun.freeze_time("2020-04-12"):
        project.add_downloads(date(2020, 3, 9), "0.0.1", Downloads(20))
        project.add_downloads(date(2020, 4, 10), "0.0.1", Downloads(10))
        project.add_downloads(date(2020, 4, 11), "0.0.1", Downloads(15))
        assert project.month_downloads() == Downloads(25)
Beispiel #11
0
 def find(self, project_name: ProjectName) -> Optional[Project]:
     with self._conn, self._conn.cursor() as cursor:
         cursor.execute(
             "SELECT name, downloads FROM projects WHERE name = %s",
             (project_name.name, ))
         data = cursor.fetchall()
         if len(data) == 0:
             return None
         return Project(ProjectName(data[0][0]), Downloads(data[0][1]))
Beispiel #12
0
 def handle(self, cmd: UpdateDownloads):
     if not self._admin_password_checker.check(cmd.password):
         raise InvalidAdminPassword(cmd.password)
     pd = self._downloads_extractor.get_downloads(cmd.date)
     # Add new projects if they don't exist before
     self._project_repository.save_projects(
         [Project(p.name, Downloads(0)) for p in pd])
     self._project_repository.save_day_downloads(pd)
     self._project_repository.update_downloads(pd)
Beispiel #13
0
 def find_random_projects(self, nr_items: int = 10) -> List[Project]:
     with self._conn, self._conn.cursor() as cursor:
         cursor.execute(
             "SELECT name, downloads FROM projects ORDER BY random() LIMIT %s;",
             (nr_items, ))
         data = cursor.fetchall()
         return [
             Project(ProjectName(row[0]), Downloads(row[1])) for row in data
         ]
Beispiel #14
0
def test_project_replace_downloads():
    project = Project(ProjectName("random"), Downloads(10))
    date = datetime.now().date()
    version = "2.3.1"
    project.add_downloads(date, version, DayDownloads(25, 25))
    project.add_downloads(date, version, DayDownloads(5, 5))
    assert project.total_downloads == Downloads(15)
    assert project.last_downloads() == [
        ProjectVersionDownloads(date, version, Downloads(5), Downloads(5))
    ]
    assert project.versions() == {version}
Beispiel #15
0
def test_update_min_date_when_no_other_downloads():
    project = Project(ProjectName("random"), Downloads(10))
    project.add_downloads(date(2019, 3, 9), "0.0.6", Downloads(20))
    project.add_downloads(date(2020, 4, 10), "0.0.2", Downloads(10))
    project.add_downloads(date(2020, 4, 10), "0.0.4", Downloads(10))
    assert project.total_downloads == Downloads(50)
    assert project.last_downloads() == [
        ProjectVersionDownloads(date(2020, 4, 10), "0.0.2", Downloads(10)),
        ProjectVersionDownloads(date(2020, 4, 10), "0.0.4", Downloads(10)),
    ]
    assert project.versions() == {"0.0.6", "0.0.2", "0.0.4"}
    assert project.min_date == date(2020, 4, 10)
Beispiel #16
0
def test_save_many_projects_with_new_format(mongo_client: MongoClient,
                                            repository: ProjectRepository):
    project = Project(ProjectName("climoji"), Downloads(100))
    project.add_downloads(datetime.date(2020, 3, 31), "2.0",
                          DayDownloads(40, 10))
    project.add_downloads(datetime.date(2020, 3, 31), "2.0.1",
                          DayDownloads(30, 10))
    project.add_downloads(datetime.date(2020, 4, 1), "2.0",
                          DayDownloads(20, 10))
    repository.save_projects([project])

    data = mongo_client.pepy_test.projects.find_one(
        {"name": project.name.name})
    expected_data = {
        "name": "climoji",
        "total_downloads": 190,
        "monthly_downloads": 0
    }
    for key, value in expected_data.items():
        assert key in data
        assert value == data[key]
    downloads_data = sorted(mongo_client.pepy_test.project_downloads.find(
        {"project": project.name.name}),
                            key=lambda x: x["date"])
    expected_downloads_data = [
        {
            "project":
            "climoji",
            "date":
            "2020-03-31",
            "downloads": [{
                "version": "2.0",
                "downloads": 40
            }, {
                "version": "2.0.1",
                "downloads": 30
            }],
        },
        {
            "project": "climoji",
            "date": "2020-04-01",
            "downloads": [{
                "version": "2.0",
                "downloads": 20
            }]
        },
    ]
    assert len(expected_downloads_data) == len(downloads_data)
    for i in range(len(expected_downloads_data)):
        for key, value in expected_downloads_data[i].items():
            assert key in downloads_data[i]
            assert value == downloads_data[i][key]
Beispiel #17
0
 def handle(self, cmd: UpdateVersionDownloads):
     if not self._admin_password_checker.check(cmd.password):
         self._logger.info("Invalid password")
         raise InvalidAdminPassword(cmd.password)
     self._logger.info(f"Getting downloads from date {cmd.date}...")
     stats_result = self._stats_viewer.get_version_downloads(cmd.date)
     self._logger.info(f"Retrieved {stats_result.total_rows} downloads. Saving to db...")
     start_time = timeit.default_timer()
     for batch in self._batch(stats_result.rows, 1_000):
         projects = {}
         for row in batch:
             project = None
             if row.project in projects:
                 project = projects.get(row.project)
             else:
                 project = self._project_repository.get(row.project)
             if project is None:
                 project = Project(ProjectName(row.project), Downloads(0))
             project.add_downloads(row.date, row.version, Downloads(row.downloads))
             projects[row.project] = project
         self._project_repository.save_projects(list(projects.values()))
Beispiel #18
0
    def _get_downloads(self, project: Project, period: BadgePeriod,
                       units: BadgeUnits) -> str:
        if period == BadgePeriod.total:
            downloads = project.total_downloads
        elif period == BadgePeriod.month:
            downloads = project.month_downloads()
        elif period == BadgePeriod.week:
            downloads = self._last_downloads(project, 7)
        else:
            raise Exception(f"{period} not valid")

        return self._downloads_formatter.format_with_units(downloads, units)
Beispiel #19
0
 def get(self, project_name: str) -> Optional[Project]:
     normalized_name = ProjectName(project_name).name
     project_data = self._client.projects.find_one({"name": normalized_name})
     if project_data is None:
         return None
     project = Project(ProjectName(project_data["name"]), Downloads(project_data["total_downloads"]))
     if "downloads" in project_data:
         downloads = sorted(project_data["downloads"].items(), key=lambda x: x[0])
         for iso_date, version_downloads in downloads:
             for r in version_downloads:
                 date = datetime.date.fromisoformat(iso_date)
                 version = r[0]
                 project.add_downloads(date, version, DayDownloads(r[1], 0))
                 project._repository_saved_downloads.add((iso_date, version))
                 # Don't count the downloads twice
                 project.total_downloads -= Downloads(r[1])
     else:
         raw_downloads = self._client.project_downloads.find({"project": normalized_name})
         downloads = sorted(raw_downloads, key=lambda x: x["date"])
         for day_downloads in downloads:
             for version_downloads in day_downloads["downloads"]:
                 pip_downlods = version_downloads["pip_downloads"] if "pip_downlods" in version_downloads else 0
                 project.add_downloads(
                     datetime.date.fromisoformat(day_downloads["date"]),
                     version_downloads["version"],
                     DayDownloads(version_downloads["downloads"], pip_downlods),
                 )
                 # Don't count the downloads twice
                 project.total_downloads -= Downloads(version_downloads["downloads"])
     return project
Beispiel #20
0
def transform_project(project: Project) -> Dict:
    day_downloads = defaultdict(int)
    last_downloads = project.last_downloads()
    last_downloads.reverse()
    for d in last_downloads:
        day_downloads[d.date.isoformat()] += d.downloads.value
    downloads = {date: downloads for date, downloads in day_downloads.items()}

    return {
        "id": project.name.name,
        "total_downloads": project.total_downloads.value,
        "downloads": downloads,
    }
Beispiel #21
0
def transform_project(project: Project) -> Dict:
    day_downloads = defaultdict(int)
    month_ago = datetime.now().date() - timedelta(days=30)
    last_downloads = project.last_downloads(month_ago)
    last_downloads.reverse()
    for d in last_downloads:
        day_downloads[d.date.isoformat()] += d.downloads.value
    downloads = {date: downloads for date, downloads in day_downloads.items()}

    return {
        "id": project.name.name,
        "total_downloads": project.total_downloads.value,
        "downloads": downloads,
    }
Beispiel #22
0
 def handle(self, cmd: UpdateDownloads):
     if not self._admin_password_checker.check(cmd.password):
         self._logger.info("Invalid password")
         raise InvalidAdminPassword(cmd.password)
     self._logger.info(f"Getting downloads from date {cmd.date}...")
     pd = self._downloads_extractor.get_downloads(cmd.date)
     self._logger.info(f"Retrieved {len(pd)} downloads. Saving to db...")
     # Add new projects if they don't exist before
     self._project_repository.save_projects(
         [Project(p.name, Downloads(0)) for p in pd])
     self._logger.info("New projects saved")
     self._project_repository.save_day_downloads(pd)
     self._logger.info("Downloads saved")
     self._project_repository.update_downloads(pd)
     self._logger.info("Total downloads updated")
Beispiel #23
0
 def _convert_downloads_to_raw(self, project: Project) -> dict:
     downloads_per_day = defaultdict(list)
     for download in project.last_downloads():
         if not (download.date.isoformat(), download.version) in project._repository_saved_downloads:
             downloads_per_day[download.date.isoformat()].append(
                 {
                     "version": download.version,
                     "downloads": download.downloads.value,
                     "pip_downloads": download.pip_downloads.value,
                 }
             )
     result = {}
     for date, downloads in downloads_per_day.items():
         result[date] = {"project": project.name.name, "date": date, "downloads": downloads}
     return result
Beispiel #24
0
def test_add_downloads_to_project():
    project = Project(ProjectName("random"), Downloads(0))
    date = datetime.now().date()
    version = "2.3.1"
    day_downloads = Downloads(10)
    project.add_downloads(date, version, day_downloads)
    assert project.total_downloads == day_downloads
    assert project.last_downloads() == [
        ProjectVersionDownloads(date, version, day_downloads)
    ]
    assert project.versions() == {version}
Beispiel #25
0
 def create(name: ProjectName = None, downloads: Downloads = None) -> Project:
     name = name or ProjectNameStub.create()
     downloads = downloads or DownloadsStub.create()
     return Project(name, downloads)
Beispiel #26
0
def test_add_project():
    project = Project(ProjectName("random"), Downloads(0))
    assert project.last_downloads() == []