Beispiel #1
0
 def get(self, project_name: str) -> Optional[Project]:
     normalized_name = ProjectName(project_name).name
     project_data = self._client.projects.find_one({"name": normalized_name})
     if project_data is None:
         return None
     project = Project(ProjectName(project_data["name"]), Downloads(project_data["total_downloads"]))
     if "downloads" in project_data:
         downloads = sorted(project_data["downloads"].items(), key=lambda x: x[0])
         for iso_date, version_downloads in downloads:
             for r in version_downloads:
                 date = datetime.date.fromisoformat(iso_date)
                 version = r[0]
                 project.add_downloads(date, version, DayDownloads(r[1], 0))
                 project._repository_saved_downloads.add((iso_date, version))
                 # Don't count the downloads twice
                 project.total_downloads -= Downloads(r[1])
     else:
         raw_downloads = self._client.project_downloads.find({"project": normalized_name})
         downloads = sorted(raw_downloads, key=lambda x: x["date"])
         for day_downloads in downloads:
             for version_downloads in day_downloads["downloads"]:
                 pip_downlods = version_downloads["pip_downloads"] if "pip_downlods" in version_downloads else 0
                 project.add_downloads(
                     datetime.date.fromisoformat(day_downloads["date"]),
                     version_downloads["version"],
                     DayDownloads(version_downloads["downloads"], pip_downlods),
                 )
                 # Don't count the downloads twice
                 project.total_downloads -= Downloads(version_downloads["downloads"])
     return project
Beispiel #2
0
def step_impl(context: Context, name: str):
    project = ProjectStub.create(ProjectName(name), Downloads(0))
    for row in context.table:
        date = datetime.strptime(row["date"], "%Y-%m-%d").date()
        project.add_downloads(date, row["version"],
                              Downloads(int(row["downloads"])))
    context.container.project_repository.save(project)
Beispiel #3
0
def test_retrieve_monthly_downloads():
    project = Project(ProjectName("random"), Downloads(10))
    with freezegun.freeze_time("2020-04-12"):
        project.add_downloads(date(2020, 3, 9), "0.0.1", Downloads(20))
        project.add_downloads(date(2020, 4, 10), "0.0.1", Downloads(10))
        project.add_downloads(date(2020, 4, 11), "0.0.1", Downloads(15))
        assert project.month_downloads() == Downloads(25)
Beispiel #4
0
def test_retrieve_project_with_old_format(mongo_client: MongoClient,
                                          repository: ProjectRepository):
    data = {
        "name": "climoji",
        "total_downloads": 1100,
        "downloads": {
            "2020-04-01": [["2.0", 30]],
            "2020-04-02": [["2.0", 10]],
            "2020-03-31": [["2.0", 40]],
            "2020-04-03": [["2.0", 30]],
        },
    }
    query = {"name": "climoji"}
    mongo_client.pepy_test.projects.replace_one(query, data, upsert=True)

    result = repository.get("climoji")
    assert ProjectName("climoji") == result.name
    assert datetime.date(2020, 3, 31) == result.min_date
    assert Downloads(1100) == result.total_downloads
    expected_last_downloads = [
        ProjectVersionDownloads(datetime.date(2020, 3, 31), "2.0",
                                Downloads(40)),
        ProjectVersionDownloads(datetime.date(2020, 4, 1), "2.0",
                                Downloads(30)),
        ProjectVersionDownloads(datetime.date(2020, 4, 2), "2.0",
                                Downloads(10)),
        ProjectVersionDownloads(datetime.date(2020, 4, 3), "2.0",
                                Downloads(30)),
    ]
    assert expected_last_downloads == result.last_downloads()
Beispiel #5
0
def test_retrieve_project_with_new_format(mongo_client: MongoClient,
                                          repository: ProjectRepository):
    data = {
        "name": "climoji",
        "total_downloads": 1100,
    }
    query = {"name": "climoji"}
    mongo_client.pepy_test.projects.replace_one(query, data, upsert=True)
    downloads_data = [
        InsertOne({
            "project": "climoji",
            "date": "2020-04-01",
            "downloads": [{
                "version": "2.0",
                "downloads": 30
            }]
        }),
        InsertOne({
            "project": "climoji",
            "date": "2020-04-02",
            "downloads": [{
                "version": "2.0",
                "downloads": 10
            }]
        }),
        InsertOne({
            "project": "climoji",
            "date": "2020-03-31",
            "downloads": [{
                "version": "2.0",
                "downloads": 40
            }]
        }),
        InsertOne({
            "project": "climoji",
            "date": "2020-04-03",
            "downloads": [{
                "version": "2.0",
                "downloads": 30
            }]
        }),
    ]
    mongo_client.pepy_test.project_downloads.bulk_write(downloads_data)

    result = repository.get("climoji")
    assert ProjectName("climoji") == result.name
    assert datetime.date(2020, 3, 31) == result.min_date
    assert Downloads(1100) == result.total_downloads
    expected_last_downloads = [
        ProjectVersionDownloads(datetime.date(2020, 3, 31), "2.0",
                                Downloads(40)),
        ProjectVersionDownloads(datetime.date(2020, 4, 1), "2.0",
                                Downloads(30)),
        ProjectVersionDownloads(datetime.date(2020, 4, 2), "2.0",
                                Downloads(10)),
        ProjectVersionDownloads(datetime.date(2020, 4, 3), "2.0",
                                Downloads(30)),
    ]
    assert expected_last_downloads == result.last_downloads()
Beispiel #6
0
def test_add_downloads_to_project():
    project = Project(ProjectName("random"), Downloads(0))
    date = datetime.now().date()
    version = "2.3.1"
    day_downloads = Downloads(10)
    project.add_downloads(date, version, day_downloads)
    assert project.total_downloads == day_downloads
    assert project.last_downloads() == [
        ProjectVersionDownloads(date, version, day_downloads)
    ]
    assert project.versions() == {version}
Beispiel #7
0
def test_project_replace_downloads():
    project = Project(ProjectName("random"), Downloads(10))
    date = datetime.now().date()
    version = "2.3.1"
    project.add_downloads(date, version, DayDownloads(25, 25))
    project.add_downloads(date, version, DayDownloads(5, 5))
    assert project.total_downloads == Downloads(15)
    assert project.last_downloads() == [
        ProjectVersionDownloads(date, version, Downloads(5), Downloads(5))
    ]
    assert project.versions() == {version}
Beispiel #8
0
def step_impl(context: Context, name: str):
    project_name = ProjectName(name)
    downloads = []
    total_downloads = 0
    for row in context.table:
        date = datetime.strptime(row["date"], "%Y-%m-%d").date()
        total_downloads += int(row["downloads"])
        downloads.append(
            ProjectDownloads(project_name, Downloads(row["downloads"]), date))
    project = ProjectStub.create(project_name, Downloads(total_downloads))
    context.container.project_repository.save_projects([project])
    context.container.project_repository.save_day_downloads(downloads)
Beispiel #9
0
 def handle(self, cmd: ImportTotalDownloads):
     for batch_iterator, batch in enumerate(self._batch(cmd.file_path, 250),
                                            start=1):
         self._logger.info(f"Batch {batch_iterator}")
         projects = {}
         for row in batch:
             if row.project in projects:
                 project = projects.get(row.project)
             else:
                 project = self._project_repository.get(row.project)
             if project is None:
                 project = Project(ProjectName(row.project), Downloads(0))
             project.total_downloads = Downloads(row.total_downloads)
             projects[row.project] = project
         self._project_repository.save_projects(list(projects.values()))
Beispiel #10
0
def test_save_many_projects_with_new_format(mongo_client: MongoClient,
                                            repository: ProjectRepository):
    project = Project(ProjectName("climoji"), Downloads(100))
    project.add_downloads(datetime.date(2020, 3, 31), "2.0", Downloads(40))
    project.add_downloads(datetime.date(2020, 3, 31), "2.0.1", Downloads(30))
    project.add_downloads(datetime.date(2020, 4, 1), "2.0", Downloads(20))
    repository.save_projects([project])

    data = mongo_client.pepy_test.projects.find_one(
        {"name": project.name.name})
    expected_data = {
        "name": "climoji",
        "total_downloads": 190,
        'monthly_downloads': 0
    }
    for key, value in expected_data.items():
        assert key in data
        assert value == data[key]
    downloads_data = sorted(mongo_client.pepy_test.project_downloads.find(
        {"project": project.name.name}),
                            key=lambda x: x['date'])
    expected_downloads_data = [{
        "project":
        "climoji",
        "date":
        "2020-03-31",
        "downloads": [{
            "version": "2.0",
            "downloads": 40
        }, {
            "version": "2.0.1",
            "downloads": 30
        }]
    }, {
        "project":
        "climoji",
        "date":
        "2020-04-01",
        "downloads": [{
            "version": "2.0",
            "downloads": 20
        }]
    }]
    assert len(expected_downloads_data) == len(downloads_data)
    for i in range(len(expected_downloads_data)):
        for key, value in expected_downloads_data[i].items():
            assert key in downloads_data[i]
            assert value == downloads_data[i][key]
Beispiel #11
0
 def handle(self, cmd: ImportDownloadsFile):
     reader = csv.reader(cmd.file, delimiter=",")
     next(reader)
     projects = [
         Project(ProjectName(r[0]), Downloads(r[1])) for r in reader
     ]
     self._project_repository.save_projects(projects)
Beispiel #12
0
    def _last_downloads(project: Project, days: int) -> Downloads:
        min_date = datetime.now().date() - timedelta(days=days)
        total_downloads = sum(d.downloads.value
                              for d in project.last_downloads()
                              if d.date >= min_date)

        return Downloads(total_downloads)
Beispiel #13
0
 def get(self, project_name: str) -> Optional[Project]:
     project_data = self._client.projects.find_one(
         {"name": project_name.strip().lower()})
     if project_data is None:
         return None
     project = Project(ProjectName(project_data["name"]),
                       Downloads(project_data["total_downloads"]))
     downloads = sorted(project_data["downloads"].items(),
                        key=lambda x: x[0])
     for date, version_downloads in downloads:
         for r in version_downloads:
             project.add_downloads(datetime.date.fromisoformat(date), r[0],
                                   Downloads(r[1]))
             # Don't count the downloads twice
             project.total_downloads -= Downloads(r[1])
     return project
Beispiel #14
0
 def handle(self, cmd: UpdateVersionDownloads):
     if not self._admin_password_checker.check(cmd.password):
         self._logger.info("Invalid password")
         raise InvalidAdminPassword(cmd.password)
     self._logger.info(f"Getting downloads from date {cmd.date}...")
     stats_result = self._stats_viewer.get_version_downloads(cmd.date)
     self._logger.info(
         f"Retrieved {stats_result.total_rows} downloads. Saving to db...")
     start_time = timeit.default_timer()
     batch_iterator = 0
     total_batches = int(stats_result.total_rows / 250)
     for batch in self._batch(stats_result.rows, 250):
         self._logger.info(f"Batch {batch_iterator} of {total_batches}")
         batch_iterator += 1
         projects = {}
         for row in batch:
             if row.project in projects:
                 project = projects.get(row.project)
             else:
                 project = self._project_repository.get(
                     row.project, downloads_from=cmd.date)
             if project is None:
                 project = Project(ProjectName(row.project), Downloads(0))
             project.add_downloads(
                 row.date, row.version,
                 DayDownloads(row.downloads, row.pip_downloads))
             projects[row.project] = project
         self._project_repository.save_projects(list(projects.values()))
     end_time = timeit.default_timer()
     self._logger.info(
         f"Total downloads updated. Total time + {(end_time - start_time):.4f} seconds"
     )
Beispiel #15
0
def step_impl(context: Context):
    downloads = []
    for row in context.table:
        date = datetime.strptime(row["date"], "%Y-%m-%d").date()
        downloads.append(
            ProjectDownloads(ProjectName(row["name"]),
                             Downloads(row["downloads"]), date))
    context.container.project_repository.save_day_downloads(downloads)
Beispiel #16
0
def test_filter_date():
    project = Project(ProjectName("random"), Downloads(10))
    project.add_downloads(date(2020, 3, 9), "0.0.6", Downloads(20))
    project.add_downloads(date(2020, 4, 10), "0.0.2", Downloads(10))
    project.add_downloads(date(2020, 4, 10), "0.0.4", Downloads(10))
    project.add_downloads(date(2020, 4, 11), "0.0.4", Downloads(10))
    assert project.total_downloads == Downloads(60)
    assert project.last_downloads(date(2020, 4, 10)) == [
        ProjectVersionDownloads(date(2020, 4, 10), "0.0.2", Downloads(10)),
        ProjectVersionDownloads(date(2020, 4, 10), "0.0.4", Downloads(10)),
        ProjectVersionDownloads(date(2020, 4, 11), "0.0.4", Downloads(10)),
    ]
    assert project.versions() == {"0.0.6", "0.0.2", "0.0.4"}
    assert project.min_date == date(2020, 3, 9)
Beispiel #17
0
 def handle(self, cmd: UpdateDownloads):
     if not self._admin_password_checker.check(cmd.password):
         raise InvalidAdminPassword(cmd.password)
     pd = self._downloads_extractor.get_downloads(cmd.date)
     # Add new projects if they don't exist before
     self._project_repository.save_projects(
         [Project(p.name, Downloads(0)) for p in pd])
     self._project_repository.save_day_downloads(pd)
     self._project_repository.update_downloads(pd)
Beispiel #18
0
 def find_random_projects(self, nr_items: int = 10) -> List[Project]:
     with self._conn, self._conn.cursor() as cursor:
         cursor.execute(
             "SELECT name, downloads FROM projects ORDER BY random() LIMIT %s;",
             (nr_items, ))
         data = cursor.fetchall()
         return [
             Project(ProjectName(row[0]), Downloads(row[1])) for row in data
         ]
Beispiel #19
0
 def find(self, project_name: ProjectName) -> Optional[Project]:
     with self._conn, self._conn.cursor() as cursor:
         cursor.execute(
             "SELECT name, downloads FROM projects WHERE name = %s",
             (project_name.name, ))
         data = cursor.fetchall()
         if len(data) == 0:
             return None
         return Project(ProjectName(data[0][0]), Downloads(data[0][1]))
Beispiel #20
0
def step_impl(context: Context, name: str):
    project_name = ProjectName(name)
    project = ProjectStub.create(project_name)
    context.container.project_repository.save_projects([project])
    downloads = []
    for row in context.table:
        date = datetime.strptime(row['date'], '%Y-%m-%d').date()
        downloads.append(
            ProjectDownloads(project_name, Downloads(row['downloads']), date))
    context.container.project_repository.save_day_downloads(downloads)
Beispiel #21
0
 def last_downloads(self,
                    project_name: ProjectName,
                    days: int = 30) -> List[ProjectDownloads]:
     with self._conn, self._conn.cursor() as cursor:
         query = "SELECT date, downloads FROM downloads_per_day WHERE name = %s ORDER BY date DESC LIMIT %s"
         cursor.execute(query, (project_name.name, days))
         data = cursor.fetchall()
         return [
             ProjectDownloads(project_name, Downloads(row[1]), row[0])
             for row in data
         ]
Beispiel #22
0
 def handle(self, cmd: UpdateVersionDownloads):
     if not self._admin_password_checker.check(cmd.password):
         self._logger.info("Invalid password")
         raise InvalidAdminPassword(cmd.password)
     self._logger.info(f"Getting downloads from date {cmd.date}...")
     stats_result = self._stats_viewer.get_version_downloads(cmd.date)
     self._logger.info(f"Retrieved {stats_result.total_rows} downloads. Saving to db...")
     start_time = timeit.default_timer()
     for batch in self._batch(stats_result.rows, 1_000):
         projects = {}
         for row in batch:
             project = None
             if row.project in projects:
                 project = projects.get(row.project)
             else:
                 project = self._project_repository.get(row.project)
             if project is None:
                 project = Project(ProjectName(row.project), Downloads(0))
             project.add_downloads(row.date, row.version, Downloads(row.downloads))
             projects[row.project] = project
         self._project_repository.save_projects(list(projects.values()))
Beispiel #23
0
def test_update_min_date_when_no_other_downloads():
    project = Project(ProjectName("random"), Downloads(10))
    project.add_downloads(date(2019, 3, 9), "0.0.6", Downloads(20))
    project.add_downloads(date(2020, 4, 10), "0.0.2", Downloads(10))
    project.add_downloads(date(2020, 4, 10), "0.0.4", Downloads(10))
    assert project.total_downloads == Downloads(50)
    assert project.last_downloads() == [
        ProjectVersionDownloads(date(2020, 4, 10), "0.0.2", Downloads(10)),
        ProjectVersionDownloads(date(2020, 4, 10), "0.0.4", Downloads(10)),
    ]
    assert project.versions() == {"0.0.6", "0.0.2", "0.0.4"}
    assert project.min_date == date(2020, 4, 10)
Beispiel #24
0
 def handle(self, cmd: UpdateDownloads):
     if not self._admin_password_checker.check(cmd.password):
         self._logger.info("Invalid password")
         raise InvalidAdminPassword(cmd.password)
     self._logger.info(f"Getting downloads from date {cmd.date}...")
     pd = self._downloads_extractor.get_downloads(cmd.date)
     self._logger.info(f"Retrieved {len(pd)} downloads. Saving to db...")
     # Add new projects if they don't exist before
     self._project_repository.save_projects(
         [Project(p.name, Downloads(0)) for p in pd])
     self._logger.info("New projects saved")
     self._project_repository.save_day_downloads(pd)
     self._logger.info("Downloads saved")
     self._project_repository.update_downloads(pd)
     self._logger.info("Total downloads updated")
Beispiel #25
0
    def get_downloads(self, date: datetime.date) -> List[ProjectDownloads]:
        QUERY = """
            SELECT file.project as name, count(*) AS downloads
            FROM `the-psf.pypi.downloads{}`
            GROUP BY file.project
        """.format(date.strftime("%Y%m%d"))

        query_job = self.client.query(QUERY, location='US')
        query_result = query_job.result(self.TIMEOUT)
        result = []
        for row in query_result:
            name = ProjectName(row['name'])
            downloads = Downloads(row['downloads'])
            result.append(ProjectDownloads(name, downloads, date))

        return result
Beispiel #26
0
def test_remove_old_data():
    project = Project(ProjectName("random"), Downloads(10))
    old_date = datetime.now().date() - timedelta(days=181)
    limit_date = datetime.now().date() - timedelta(days=180)
    now_date = datetime.now().date()
    project.add_downloads(old_date, "2.3.1", Downloads(10))
    project.add_downloads(limit_date, "2.3.0", Downloads(20))
    project.add_downloads(now_date, "2.3.2", Downloads(30))
    assert project.total_downloads == Downloads(70)
    assert project.last_downloads() == [
        ProjectVersionDownloads(limit_date, "2.3.0", Downloads(20)),
        ProjectVersionDownloads(now_date, "2.3.2", Downloads(30)),
    ]
    assert {"2.3.0", "2.3.2"}.issubset(project.versions())
Beispiel #27
0
def test_do_not_touch_already_saved_data(mongo_client: MongoClient,
                                         repository: ProjectRepository):
    # Used for performance reasons
    data = {
        "name": "climoji",
        "total_downloads": 1100,
    }
    query = {"name": "climoji"}
    mongo_client.pepy_test.projects.replace_one(query, data, upsert=True)
    downloads_data = [
        InsertOne({
            "project": "climoji",
            "date": "2020-04-01",
            "downloads": [{
                "version": "2.0",
                "downloads": 30
            }]
        }),
    ]
    mongo_client.pepy_test.project_downloads.bulk_write(downloads_data)

    project = repository.get("climoji")
    project.add_downloads(datetime.date(2020, 4, 1), "2.0", Downloads(1))
    repository.save(project)
    downloads_data = sorted(mongo_client.pepy_test.project_downloads.find(
        {"project": project.name.name}),
                            key=lambda x: x['date'])
    expected_downloads_data = [{
        "project":
        "climoji",
        "date":
        "2020-04-01",
        "downloads": [{
            "version": "2.0",
            "downloads": 30
        }]
    }]
    assert len(expected_downloads_data) == len(downloads_data)
    for i in range(len(expected_downloads_data)):
        for key, value in expected_downloads_data[i].items():
            assert key in downloads_data[i]
            assert value == downloads_data[i][key]
Beispiel #28
0
def test_downloads_format_thousands(
        downloads_formatter: DownloadsNumberFormatter):
    downloads = Downloads(12132)
    assert "12k" == downloads_formatter.format(downloads)
Beispiel #29
0
def test_downloads_format_less_than_thousands(
        downloads_formatter: DownloadsNumberFormatter):
    downloads = Downloads(121)
    assert "121" == downloads_formatter.format(downloads)
Beispiel #30
0
def test_downloads_format_0(downloads_formatter: DownloadsNumberFormatter):
    downloads = Downloads(0)
    assert "0" == downloads_formatter.format(downloads)