Example #1
0
def test_get_origin_visit_latest_snapshot(mocker, origin):
    origin_visits = get_origin_visits(origin)
    first_visit = origin_visits[0]
    latest_visit = origin_visits[-1]
    mock_get_origin_visits = mocker.patch(
        "swh.web.common.origin_visits.get_origin_visits")
    mock_get_origin_visits.return_value = origin_visits

    visit = get_origin_visit(origin, snapshot_id=latest_visit["snapshot"])
    assert visit == latest_visit
    assert not mock_get_origin_visits.called

    visit = get_origin_visit(origin, snapshot_id=first_visit["snapshot"])
    assert visit == first_visit
    assert mock_get_origin_visits.called
Example #2
0
def _lookup_revision_id_by(origin_id, branch_name, timestamp):
    def _get_snapshot_branch(snapshot, branch_name):
        snapshot = lookup_snapshot(visit['snapshot'],
                                   branches_from=branch_name,
                                   branches_count=10)
        branch = None
        if branch_name in snapshot['branches']:
            branch = snapshot['branches'][branch_name]
        return branch

    visit = get_origin_visit({'id': origin_id}, visit_ts=timestamp)
    branch = _get_snapshot_branch(visit['snapshot'], branch_name)
    rev_id = None
    if branch and branch['target_type'] == 'revision':
        rev_id = branch['target']
    elif branch and branch['target_type'] == 'alias':
        branch = _get_snapshot_branch(visit['snapshot'], branch['target'])
        if branch and branch['target_type'] == 'revision':
            rev_id = branch['target']

    if not rev_id:
        raise NotFoundExc('Revision for origin %s and branch %s not found.'
                          % (origin_id, branch_name))

    return rev_id
Example #3
0
def test_get_origin_visit_return_first_valid_partial_visit(
        archive_data, new_origin, new_snapshots):
    visits = []

    archive_data.origin_add([new_origin])
    # create 6 visits, the first three have full status but null snapshot
    # while the last three have partial status with valid snapshot
    for i, snp in enumerate(new_snapshots):
        visit_date = now() + timedelta(days=i * 10)
        visit = archive_data.origin_visit_add([
            OriginVisit(
                origin=new_origin.url,
                date=visit_date,
                type="git",
            )
        ])[0]
        archive_data.snapshot_add([new_snapshots[i]])
        visit_status = OriginVisitStatus(
            origin=new_origin.url,
            visit=visit.visit,
            date=visit_date + timedelta(minutes=5),
            status="full" if i < 3 else "partial",
            snapshot=new_snapshots[i].id if i > 2 else None,
        )
        if i > 2:
            archive_data.origin_visit_status_add([visit_status])

        visits.append(visit.visit)

    # should return the last visit
    expected_visit = archive_data.origin_visit_get_by(new_origin.url,
                                                      visits[-1])
    assert get_origin_visit((OriginInfo(url=new_origin.url))) == expected_visit
Example #4
0
def test_get_origin_visit_non_resolvable_snapshots(archive_data, new_origin,
                                                   new_snapshots):
    visits = []
    archive_data.origin_add([new_origin])
    # create 6 full visits, the first three have resolvable snapshots
    # while the last three have non resolvable snapshots
    for i, snp in enumerate(new_snapshots):
        visit_date = now() + timedelta(days=i * 10)
        visit = archive_data.origin_visit_add([
            OriginVisit(
                origin=new_origin.url,
                date=visit_date,
                type="git",
            )
        ])[0]
        archive_data.snapshot_add([new_snapshots[i]])
        visit_status = OriginVisitStatus(
            origin=new_origin.url,
            visit=visit.visit,
            date=visit_date + timedelta(minutes=5),
            status="full",
            snapshot=new_snapshots[i].id,
        )
        if i < 3:
            archive_data.origin_visit_status_add([visit_status])
        visits.append(visit.visit)

    # should return the third visit
    expected_visit = archive_data.origin_visit_get_by(new_origin.url,
                                                      visits[2])
    assert get_origin_visit((OriginInfo(url=new_origin.url))) == expected_visit
Example #5
0
def _lookup_revision_id_by(origin, branch_name, timestamp):
    def _get_snapshot_branch(snapshot, branch_name):
        snapshot = lookup_snapshot(
            visit["snapshot"],
            branches_from=branch_name,
            branches_count=10,
            branch_name_exclude_prefix=None,
        )
        branch = None
        if branch_name in snapshot["branches"]:
            branch = snapshot["branches"][branch_name]
        return branch

    if isinstance(origin, int):
        origin = {"id": origin}
    elif isinstance(origin, str):
        origin = {"url": origin}
    else:
        raise TypeError('"origin" must be an int or a string.')

    from swh.web.common.origin_visits import get_origin_visit

    visit = get_origin_visit(origin, visit_ts=timestamp)
    branch = _get_snapshot_branch(visit["snapshot"], branch_name)
    rev_id = None
    if branch and branch["target_type"] == "revision":
        rev_id = branch["target"]
    elif branch and branch["target_type"] == "alias":
        branch = _get_snapshot_branch(visit["snapshot"], branch["target"])
        if branch and branch["target_type"] == "revision":
            rev_id = branch["target"]

    if not rev_id:
        raise NotFoundExc("Revision for origin %s and branch %s not found." %
                          (origin.get("url"), branch_name))

    return rev_id
Example #6
0
def get_origin_visit_snapshot(origin_info,
                              visit_ts=None,
                              visit_id=None,
                              snapshot_id=None):
    """Returns the lists of branches and releases
    associated to a swh origin for a given visit.
    The visit is expressed by a timestamp. In the latter case,
    the closest visit from the provided timestamp will be used.
    If no visit parameter is provided, it returns the list of branches
    found for the latest visit.
    That list is put in  cache in order to speedup the navigation
    in the swh-web/browse ui.

    .. warning:: At most 1000 branches contained in the snapshot
        will be returned for performance reasons.

    Args:
        origin_info (dict): a dict filled with origin information
            (id, url, type)
        visit_ts (int or str): an ISO date string or Unix timestamp to parse
        visit_id (int): optional visit id for disambiguation in case
            several visits have the same timestamp

    Returns:
        A tuple with two members. The first one is a list of dict describing
        the origin branches for the given visit.
        The second one is a list of dict describing the origin releases
        for the given visit.

    Raises:
        NotFoundExc if the origin or its visit are not found
    """

    visit_info = get_origin_visit(origin_info, visit_ts, visit_id, snapshot_id)

    return get_snapshot_content(visit_info['snapshot'])
Example #7
0
def test_get_snapshot_context_with_origin(archive_data, origin):

    origin_visits = get_origin_visits(origin)

    timestamp = format_utc_iso_date(origin_visits[0]["date"],
                                    "%Y-%m-%dT%H:%M:%SZ")
    visit_id = origin_visits[1]["visit"]

    for browse_context, kwargs in (
        ("content", {
            "origin_url": origin["url"],
            "path": "/some/path"
        }),
        ("directory", {
            "origin_url": origin["url"]
        }),
        ("log", {
            "origin_url": origin["url"]
        }),
        (
            "directory",
            {
                "origin_url": origin["url"],
                "timestamp": timestamp,
            },
        ),
        (
            "directory",
            {
                "origin_url": origin["url"],
                "visit_id": visit_id,
            },
        ),
    ):

        visit_id = kwargs["visit_id"] if "visit_id" in kwargs else None
        visit_ts = kwargs["timestamp"] if "timestamp" in kwargs else None
        visit_info = get_origin_visit({"url": kwargs["origin_url"]},
                                      visit_ts=visit_ts,
                                      visit_id=visit_id)

        snapshot = visit_info["snapshot"]

        snapshot_context = get_snapshot_context(**kwargs,
                                                browse_context=browse_context)

        query_params = dict(kwargs)

        branches, releases, _ = get_snapshot_content(snapshot)
        releases = list(reversed(releases))
        revision_id = None
        root_directory = None
        for branch in branches:
            if branch["name"] == "HEAD":
                revision_id = branch["revision"]
                root_directory = branch["directory"]
            branch["url"] = reverse(
                f"browse-origin-{browse_context}",
                query_params={
                    "branch": branch["name"],
                    **query_params
                },
            )
        for release in releases:
            release["url"] = reverse(
                f"browse-origin-{browse_context}",
                query_params={
                    "release": release["name"],
                    **query_params
                },
            )

        query_params.pop("path", None)

        branches_url = reverse("browse-origin-branches",
                               query_params=query_params)
        releases_url = reverse("browse-origin-releases",
                               query_params=query_params)
        origin_visits_url = reverse(
            "browse-origin-visits",
            query_params={"origin_url": kwargs["origin_url"]})
        is_empty = not branches and not releases
        snapshot_swhid = gen_swhid("snapshot", snapshot)
        snapshot_sizes = archive_data.snapshot_count_branches(snapshot)

        visit_info["url"] = reverse("browse-origin-directory",
                                    query_params=query_params)
        visit_info["formatted_date"] = format_utc_iso_date(visit_info["date"])

        if "path" in kwargs:
            query_params["path"] = kwargs["path"]

        expected = SnapshotContext(
            branch="HEAD",
            branch_alias=True,
            branches=branches,
            branches_url=branches_url,
            is_empty=is_empty,
            origin_info={"url": origin["url"]},
            origin_visits_url=origin_visits_url,
            release=None,
            release_alias=False,
            release_id=None,
            query_params=query_params,
            releases=releases,
            releases_url=releases_url,
            revision_id=revision_id,
            revision_info=_get_revision_info(archive_data, revision_id),
            root_directory=root_directory,
            snapshot_id=snapshot,
            snapshot_sizes=snapshot_sizes,
            snapshot_swhid=snapshot_swhid,
            url_args={},
            visit_info=visit_info,
        )

        if revision_id:
            expected["revision_info"]["revision_url"] = gen_revision_url(
                revision_id, snapshot_context)

        assert snapshot_context == expected

        _check_branch_release_revision_parameters(archive_data, expected,
                                                  browse_context, kwargs,
                                                  branches, releases)
Example #8
0
    def test_get_origin_visit(self, mock_origin_visits):
        origin_info = {
            'id': 2,
            'type': 'git',
            'url': 'https://github.com/foo/bar',
        }
        visits = \
            [{'status': 'full',
              'date': '2015-07-09T21:09:24+00:00',
              'visit': 1,
              'origin': origin_info['id']},
             {'status': 'full',
              'date': '2016-02-23T18:05:23.312045+00:00',
              'visit': 2,
              'origin': origin_info['id']},
             {'status': 'full',
              'date': '2016-03-28T01:35:06.554111+00:00',
              'visit': 3,
              'origin': origin_info['id']},
             {'status': 'full',
              'date': '2016-06-18T01:22:24.808485+00:00',
              'visit': 4,
              'origin': origin_info['id']},
             {'status': 'full',
              'date': '2016-08-14T12:10:00.536702+00:00',
              'visit': 5,
              'origin': origin_info['id']}]
        mock_origin_visits.return_value = visits

        visit_id = 12
        with self.assertRaises(NotFoundExc) as cm:
            visit = get_origin_visit(origin_info, visit_id=visit_id)
        exception_text = cm.exception.args[0]
        self.assertIn('Visit with id %s' % visit_id, exception_text)
        self.assertIn('type %s' % origin_info['type'], exception_text)
        self.assertIn('url %s' % origin_info['url'], exception_text)

        visit = get_origin_visit(origin_info, visit_id=2)
        self.assertEqual(visit, visits[1])

        visit = get_origin_visit(origin_info,
                                 visit_ts='2016-02-23T18:05:23.312045+00:00')
        self.assertEqual(visit, visits[1])

        visit = get_origin_visit(origin_info, visit_ts='2016-02-20')
        self.assertEqual(visit, visits[1])

        visit = get_origin_visit(origin_info, visit_ts='2016-06-18T01:22')
        self.assertEqual(visit, visits[3])

        visit = get_origin_visit(origin_info, visit_ts='2016-06-18 01:22')
        self.assertEqual(visit, visits[3])

        visit = get_origin_visit(origin_info, visit_ts=1466208000)
        self.assertEqual(visit, visits[3])

        visit = get_origin_visit(origin_info, visit_ts='2014-01-01')
        self.assertEqual(visit, visits[0])

        visit = get_origin_visit(origin_info, visit_ts='2018-01-01')
        self.assertEqual(visit, visits[-1])
Example #9
0
def test_get_origin_visit(mocker, snapshots):
    mock_origin_visits = mocker.patch(
        "swh.web.common.origin_visits.get_origin_visits")
    origin_info = {
        "url": "https://github.com/foo/bar",
    }
    visits = [
        {
            "status": "full",
            "date": "2015-07-09T21:09:24+00:00",
            "visit": 1,
            "origin": "https://github.com/foo/bar",
            "type": "git",
            "snapshot": hash_to_hex(snapshots[0].id),
        },
        {
            "status": "full",
            "date": "2016-02-23T18:05:23.312045+00:00",
            "visit": 2,
            "origin": "https://github.com/foo/bar",
            "type": "git",
            "snapshot": hash_to_hex(snapshots[1].id),
        },
        {
            "status": "full",
            "date": "2016-03-28T01:35:06.554111+00:00",
            "visit": 3,
            "origin": "https://github.com/foo/bar",
            "type": "git",
            "snapshot": hash_to_hex(snapshots[2].id),
        },
        {
            "status": "full",
            "date": "2016-06-18T01:22:24.808485+00:00",
            "visit": 4,
            "origin": "https://github.com/foo/bar",
            "type": "git",
            "snapshot": hash_to_hex(snapshots[3].id),
        },
        {
            "status": "full",
            "date": "2016-08-14T12:10:00.536702+00:00",
            "visit": 5,
            "origin": "https://github.com/foo/bar",
            "type": "git",
            "snapshot": hash_to_hex(snapshots[4].id),
        },
    ]
    mock_origin_visits.return_value = visits

    visit_id = 12
    with pytest.raises(NotFoundExc) as e:
        visit = get_origin_visit(origin_info, visit_id=visit_id)

    assert e.match("Visit with id %s" % visit_id)
    assert e.match("url %s" % origin_info["url"])

    visit = get_origin_visit(origin_info, visit_id=2)
    assert visit == visits[1]

    visit = get_origin_visit(origin_info,
                             visit_ts="2016-02-23T18:05:23.312045+00:00")
    assert visit == visits[1]

    visit = get_origin_visit(origin_info, visit_ts="2016-02-20")
    assert visit == visits[1]

    visit = get_origin_visit(origin_info, visit_ts="2016-06-18T01:22")
    assert visit == visits[3]

    visit = get_origin_visit(origin_info, visit_ts="2016-06-18 01:22")
    assert visit == visits[3]

    visit = get_origin_visit(origin_info, visit_ts="2014-01-01")
    assert visit == visits[0]

    visit = get_origin_visit(origin_info, visit_ts="2018-01-01")
    assert visit == visits[-1]
Example #10
0
def get_snapshot_context(snapshot_id=None,
                         origin_type=None,
                         origin_url=None,
                         timestamp=None,
                         visit_id=None):
    """
    Utility function to compute relevant information when navigating
    the archive in a snapshot context. The snapshot is either
    referenced by its id or it will be retrieved from an origin visit.

    Args:
        snapshot_id (str): hexadecimal representation of a snapshot identifier,
            all other parameters will be ignored if it is provided
        origin_type (str): the origin type (git, svn, deposit, ...)
        origin_url (str): the origin_url (e.g. https://github.com/(user)/(repo)/)
        timestamp (str): a datetime string for retrieving the closest
            visit of the origin
        visit_id (int): optional visit id for disambiguation in case
            of several visits with the same timestamp

    Returns:
        A dict with the following entries:
            * origin_info: dict containing origin information
            * visit_info: dict containing visit information
            * branches: the list of branches for the origin found
              during the visit
            * releases: the list of releases for the origin found
              during the visit
            * origin_browse_url: the url to browse the origin
            * origin_branches_url: the url to browse the origin branches
            * origin_releases_url': the url to browse the origin releases
            * origin_visit_url: the url to browse the snapshot of the origin
              found during the visit
            * url_args: dict containing url arguments to use when browsing in
              the context of the origin and its visit

    Raises:
        NotFoundExc: if no snapshot is found for the visit of an origin.
    """ # noqa
    origin_info = None
    visit_info = None
    url_args = None
    query_params = {}
    branches = []
    releases = []
    browse_url = None
    visit_url = None
    branches_url = None
    releases_url = None
    swh_type = 'snapshot'
    if origin_url:
        swh_type = 'origin'
        origin_info = get_origin_info(origin_url, origin_type)

        visit_info = get_origin_visit(origin_info, timestamp, visit_id,
                                      snapshot_id)
        fmt_date = format_utc_iso_date(visit_info['date'])
        visit_info['fmt_date'] = fmt_date
        snapshot_id = visit_info['snapshot']

        if not snapshot_id:
            raise NotFoundExc('No snapshot associated to the visit of origin '
                              '%s on %s' % (origin_url, fmt_date))

        # provided timestamp is not necessarily equals to the one
        # of the retrieved visit, so get the exact one in order
        # use it in the urls generated below
        if timestamp:
            timestamp = visit_info['date']

        branches, releases = \
            get_origin_visit_snapshot(origin_info, timestamp, visit_id,
                                      snapshot_id)

        url_args = {
            'origin_type': origin_type,
            'origin_url': origin_info['url']
        }

        query_params = {'visit_id': visit_id}

        browse_url = reverse('browse-origin-visits', url_args=url_args)

        if timestamp:
            url_args['timestamp'] = format_utc_iso_date(
                timestamp, '%Y-%m-%dT%H:%M:%S')
        visit_url = reverse('browse-origin-directory',
                            url_args=url_args,
                            query_params=query_params)
        visit_info['url'] = visit_url

        branches_url = reverse('browse-origin-branches',
                               url_args=url_args,
                               query_params=query_params)

        releases_url = reverse('browse-origin-releases',
                               url_args=url_args,
                               query_params=query_params)
    elif snapshot_id:
        branches, releases = get_snapshot_content(snapshot_id)
        url_args = {'snapshot_id': snapshot_id}
        browse_url = reverse('browse-snapshot', url_args=url_args)
        branches_url = reverse('browse-snapshot-branches', url_args=url_args)

        releases_url = reverse('browse-snapshot-releases', url_args=url_args)

    releases = list(reversed(releases))

    snapshot_size = service.lookup_snapshot_size(snapshot_id)

    is_empty = sum(snapshot_size.values()) == 0

    swh_snp_id = persistent_identifier('snapshot', snapshot_id)

    return {
        'swh_type': swh_type,
        'swh_object_id': swh_snp_id,
        'snapshot_id': snapshot_id,
        'snapshot_size': snapshot_size,
        'is_empty': is_empty,
        'origin_info': origin_info,
        # keep track if the origin type was provided as url argument
        'origin_type': origin_type,
        'visit_info': visit_info,
        'branches': branches,
        'releases': releases,
        'branch': None,
        'release': None,
        'browse_url': browse_url,
        'branches_url': branches_url,
        'releases_url': releases_url,
        'url_args': url_args,
        'query_params': query_params
    }