Beispiel #1
0
def lookup_origin(origin: OriginInfo) -> OriginInfo:
    """Return information about the origin matching dict origin.

    Args:
        origin: origin's dict with 'url' key

    Returns:
        origin information as dict.

    """
    origin_urls = [origin["url"]]
    if origin["url"]:
        # handle case when user provided an origin url with a trailing
        # slash while the url in storage does not have it (e.g. GitHub)
        if origin["url"].endswith("/"):
            origin_urls.append(origin["url"][:-1])
        # handle case when user provided an origin url without a trailing
        # slash while the url in storage have it (e.g. Debian source package)
        else:
            origin_urls.append(f"{origin['url']}/")
        try:
            # handle case where the "://" character sequence was mangled into ":/"
            parsed_url = urlparse(origin["url"])
            if (parsed_url.scheme and not parsed_url.netloc
                    and origin["url"].startswith(f"{parsed_url.scheme}:/") and
                    not origin["url"].startswith(f"{parsed_url.scheme}://")):
                origin_urls.append(origin["url"].replace(
                    f"{parsed_url.scheme}:/", f"{parsed_url.scheme}://"))
        except Exception:
            pass
    origins = [o for o in storage.origin_get(origin_urls) if o is not None]
    if not origins:
        msg = "Origin with url %s not found!" % origin["url"]
        raise NotFoundExc(msg)
    return converters.from_origin(origins[0].to_dict())
Beispiel #2
0
def lookup_origins_by_sha1s(
        sha1s: List[str]) -> Iterator[Optional[OriginInfo]]:
    """Lookup origins from the sha1 hash values of their URLs.

    Args:
        sha1s: list of sha1s hexadecimal representation

    Yields:
        origin information as dict
    """
    sha1s_bytes = [hashutil.hash_to_bytes(sha1) for sha1 in sha1s]
    origins = storage.origin_get_by_sha1(sha1s_bytes)
    for origin in origins:
        yield converters.from_origin(origin)
def test_from_origin():
    origin_input = {
        "id": 9,
        "type": "ftp",
        "url": "rsync://ftp.gnu.org/gnu/octave",
    }

    expected_origin = {
        "id": 9,
        "type": "ftp",
        "url": "rsync://ftp.gnu.org/gnu/octave",
    }

    actual_origin = converters.from_origin(origin_input)

    assert actual_origin == expected_origin
Beispiel #4
0
def lookup_origins(page_token: Optional[str],
                   limit: int = 100) -> PagedResult[OriginInfo]:
    """Get list of archived software origins in a paginated way.

    Origins are sorted by id before returning them

    Args:
        origin_from (int): The minimum id of the origins to return
        origin_count (int): The maximum number of origins to return

    Returns:
        Page of OriginInfo

    """
    page = storage.origin_list(page_token=page_token, limit=limit)
    return PagedResult(
        [converters.from_origin(o.to_dict()) for o in page.results],
        next_page_token=page.next_page_token,
    )
Beispiel #5
0
    def test_from_origin(self):
        # given
        origin_input = {
            'id': 9,
            'type': 'ftp',
            'url': 'rsync://ftp.gnu.org/gnu/octave',
        }

        expected_origin = {
            'id': 9,
            'type': 'ftp',
            'url': 'rsync://ftp.gnu.org/gnu/octave',
        }

        # when
        actual_origin = converters.from_origin(origin_input)

        # then
        self.assertEqual(actual_origin, expected_origin)
Beispiel #6
0
def lookup_origin(origin):
    """Return information about the origin matching dict origin.

    Args:
        origin: origin's dict with keys either 'id' or
        ('type' AND 'url')

    Returns:
        origin information as dict.

    """
    origin_info = storage.origin_get(origin)
    if not origin_info:
        if 'id' in origin and origin['id']:
            msg = 'Origin with id %s not found!' % origin['id']
        else:
            msg = 'Origin with type %s and url %s not found!' % \
                (origin['type'], origin['url'])
        raise NotFoundExc(msg)
    return converters.from_origin(origin_info)
Beispiel #7
0
def search_origin_metadata(fulltext, limit=50):
    """Search for origins whose metadata match a provided string pattern.

    Args:
        fulltext: the string pattern to search for in origin metadata
        offset: number of found origins to skip before returning results
        limit: the maximum number of found origins to return

    Returns:
        list of origin metadata as dict.

    """
    matches = idx_storage.origin_intrinsic_metadata_search_fulltext(
        conjunction=[fulltext], limit=limit)
    results = []
    for match in matches:
        match['from_revision'] = hashutil.hash_to_hex(match['from_revision'])
        result = converters.from_origin(
            storage.origin_get({'id': match.pop('id')}))
        result['metadata'] = match
        results.append(result)
    return results
Beispiel #8
0
 def origin_get(self, origin_urls):
     origins = self.storage.origin_get(origin_urls)
     return [converters.from_origin(o.to_dict()) for o in origins]
Beispiel #9
0
def search_origin(
    url_pattern: str,
    limit: int = 50,
    with_visit: bool = False,
    visit_types: Optional[List[str]] = None,
    page_token: Optional[str] = None,
) -> Tuple[List[OriginInfo], Optional[str]]:
    """Search for origins whose urls contain a provided string pattern
    or match a provided regular expression.

    Args:
        url_pattern: the string pattern to search for in origin urls
        limit: the maximum number of found origins to return
        with_visit: Whether origins with no visit are to be filtered out
        visit_types: Only origins having any of the provided visit types
            (e.g. git, svn, pypi) will be returned
        page_token: opaque string used to get the next results of a search

    Returns:
        list of origin information as dict.

    """
    if page_token:
        assert isinstance(page_token, str)

    if search:
        page_result = search.origin_search(
            url_pattern=url_pattern,
            page_token=page_token,
            with_visit=with_visit,
            visit_types=visit_types,
            limit=limit,
        )
        origins = [
            converters.from_origin(ori_dict)
            for ori_dict in page_result.results
        ]
    else:
        # Fallback to swh-storage if swh-search is not configured
        search_words = [re.escape(word) for word in url_pattern.split()]
        if len(search_words) >= 7:
            url_pattern = ".*".join(search_words)
        else:
            pattern_parts = []
            for permut in itertools.permutations(search_words):
                pattern_parts.append(".*".join(permut))
            url_pattern = "|".join(pattern_parts)

        page_result = storage.origin_search(
            url_pattern,
            page_token=page_token,
            with_visit=with_visit,
            limit=limit,
            visit_types=visit_types,
            regexp=True,
        )
        origins = [
            converters.from_origin(ori.to_dict())
            for ori in page_result.results
        ]

    return (origins, page_result.next_page_token)
Beispiel #10
0
 def origin_get(cls, origin_info):
     origin = cls.storage.origin_get(origin_info)
     return converters.from_origin(origin)