Esempio n. 1
0
    def test_single_element_filter(self):
        # Verify the effectiveness of single-valued matchers
        id1, filename1, author1, universe1 = uuid4(), URI("01.png"), "bdhnd", "fotwf"
        id2, filename2, author2, universe2 = uuid4(), URI("02.png"), "shndl", None
        id3, filename3, author3, universe3 = uuid4(), URI("03.png"), "okn", "ph"

        el1 = ImageMetadata(id1, filename1, author1, universe1, None, None)
        el2 = ImageMetadata(id2, filename2, author2, universe2, None, None)
        el3 = ImageMetadata(id3, filename3, author3, universe3, None, None)

        filter_builder = FilterBuilder()

        # Test constraints satisfied
        filter_builder.filename_constraint(filename1.path.name) \
            .filename_constraint(filename2.path.name) \
            .filename_constraint(filename3.path.name)
        filename_filter = filter_builder.get_filename_filter()
        self.assertTrue(filename_filter(el1))
        self.assertTrue(filename_filter(el2))
        self.assertTrue(filename_filter(el3))

        # Test implicit exclusion
        filter_builder.author_constraint(author1)
        author_filter = filter_builder.get_author_filter()
        self.assertTrue(author_filter(el1))
        self.assertFalse(author_filter(el2))
        self.assertFalse(author_filter(el3))

        # Test explicit exclusion
        filter_builder.id_constraint(str(id2), True)
        id_filter = filter_builder.get_id_filter()
        self.assertTrue(id_filter(el1))
        self.assertFalse(id_filter(el2))
        self.assertTrue(id_filter(el3))
Esempio n. 2
0
    def test_collective_disjunctive_filter(self):
        # Verify the effectiveness of multi-valued matchers, when evaluated in disjunction
        chars1 = ["al", "john", "jack"]
        chars2 = ["jm", "jr"]
        chars3 = ["jr"]

        el1 = ImageMetadata(uuid4(), URI("a.png"), "ghi", None, chars1, None)
        el2 = ImageMetadata(uuid4(), URI("b.png"), "nsh", None, chars2, None)
        el3 = ImageMetadata(uuid4(), URI("c.png"), "ShT", None, chars3, None)

        filter_builder = FilterBuilder()

        # Test disjunctive filtering with inclusion
        f = filter_builder.character_constraint("jm").character_constraint("al").characters_as_disjunctive(True) \
            .get_character_filter()
        self.assertTrue(f(el1))
        self.assertTrue(f(el2))
        self.assertFalse(f(el3))

        # Test disjunctive filtering with exclusion
        f = filter_builder.character_constraint("jack", True).get_character_filter()
        self.assertTrue(f(el1))
        self.assertTrue(f(el2))
        self.assertTrue(f(el3))

        filter_builder = FilterBuilder()
        f = filter_builder.characters_as_disjunctive(True).character_constraint("john", True) \
            .character_constraint("jack", True).get_character_filter()
        self.assertFalse(f(el1))
        self.assertTrue(f(el2))
        self.assertTrue(f(el3))
Esempio n. 3
0
def load_meta(img_file: Path) -> ImageMetadata:
    """
    Load the metadata tuple for a given image file.

    If no metadata file is present, or it is currently inaccessible, return a blank metadata tuple.

    :arg img_file: a path pointing to a managed image for which we want to load metadata
    :return: the associated metadata as a tuple, or a blank metadata tuple
    """

    meta_file = _construct_metadata_path(img_file)

    if meta_file.exists():
        try:
            with meta_file.open() as mf:
                metadata = parse_xml(mf.read())

                # Check if 'file' is a valid URI, otherwise make it so (for retro-compatibility with older schema)
                if metadata.file.scheme is None:
                    metadata = _old_to_new_schema(img_file, metadata)
        except (OSError, ParseError):
            metadata = ImageMetadata(uuid3(NAMESPACE_URL, str(URI(img_file))),
                                     URI(img_file), None, None, None, None)
    else:
        metadata = ImageMetadata(uuid3(NAMESPACE_URL, str(URI(img_file))),
                                 URI(img_file), None, None, None, None)

    return metadata
Esempio n. 4
0
    def test_none_match_collective(self):
        # Check the effects of None constraints on multi-valued matchers
        el1 = ImageMetadata(uuid4(), URI('aaa'), None, None, None, None)
        el2 = ImageMetadata(uuid4(), URI('yyy'), None, None, None, ["fta"])

        none_filter = FilterBuilder().tag_constraint(None).get_tag_filter()
        self.assertTrue(none_filter(el1))
        self.assertFalse(none_filter(el2))
Esempio n. 5
0
    def test_none_match_single(self):
        # Check the effects of None constraints on single-valued matchers
        el1 = ImageMetadata(uuid4(), URI('fff'), None, 'u', None, None)
        el2 = ImageMetadata(uuid4(), URI('zzz'), None, None, None, None)

        none_filter = FilterBuilder().universe_constraint(None).get_universe_filter()
        self.assertFalse(none_filter(el1))
        self.assertTrue(none_filter(el2))
Esempio n. 6
0
    def test_empty_filter_collective(self):
        # Check the effects of the absence of constraints on multi-valued matchers
        el1 = ImageMetadata(uuid4(), URI('xxx'), None, None, None, ["nl", "ll"])
        el2 = ImageMetadata(uuid4(), URI('kkk'), None, None, None, None)

        empty_filter = FilterBuilder().get_tag_filter()
        self.assertTrue(empty_filter(el1))
        self.assertTrue(empty_filter(el2))
Esempio n. 7
0
    def test_empty_filter_single(self):
        # Check the effects of the absence of constraints on single-valued matchers
        el1 = ImageMetadata(uuid4(), URI('xxx'), None, None, None, None)
        el2 = ImageMetadata(uuid4(), URI('kkk'), None, None, None, None)

        empty_filter = FilterBuilder().get_id_filter()
        self.assertTrue(empty_filter(el1))
        self.assertTrue(empty_filter(el2))
def test_issue_003_path_like_division_trailing():
	base = URI("http://example.com/foo/")
	assert str(base) == "http://example.com/foo/"
	assert str(base / "bar.html") == "http://example.com/foo/bar.html"
	
	base = URI("http://example.com/foo")
	assert str(base) == "http://example.com/foo"
	assert str(base / "bar.html") == "http://example.com/bar.html"
def test_issue_003_path_on_path_division():
	base = URI("http://ats.example.com/job/listing")
	
	# scrape the listing, identify a job URL from that listing
	target = URI("detail/sample-job")  # oh no, it's relative!
	
	# And it's resolved.
	assert str(base / target) == "http://ats.example.com/job/detail/sample-job"
Esempio n. 10
0
def test_mixin_add_link() -> None:
    """Can add a new entry."""
    sut = LinksMixin()
    sut.add_link(key="link1", link=Link(href=URI("/test1")))
    sut.add_link(key="link2", link=Link(href=URI("/test2")))
    assert sut.links is not None
    assert sut.links["link1"] is not None
    assert sut.links["link1"].href == URI("/test1")
    assert sut.links["link2"] is not None
    assert sut.links["link2"].href == URI("/test2")
Esempio n. 11
0
def test_link_init() -> None:
    """Can init a new link."""
    sut = Link(href=URI("/test"))
    assert sut is not None
    assert sut.href == URI("/test")

    sut = Link(href=URI("/test"), rel=["self"])
    assert sut is not None
    assert sut.href == URI("/test")
    assert sut.rel == ["self"]
Esempio n. 12
0
    def to_uri(set_uri: str, scheme=None):
        AssertUtils.assert_not_null("to_uri", set_uri)

        if scheme:
            if not set_uri.startswith("/"):
                set_uri = "/" + set_uri
            uri_p = URI(
                scheme=scheme,
                path=set_uri.replace(" ", "%20"))
        else:
            uri_p = URI(set_uri.replace(" ", "%20"))
        return uri_p if uri_p.scheme else URI(
            scheme="string",
            path=set_uri.replace(" ", "%20"))
Esempio n. 13
0
def _old_to_new_schema(img_path: Path, old_meta: ImageMetadata):
    return ImageMetadata(img_id=old_meta.img_id,
                         file=URI(img_path),
                         author=old_meta.author,
                         universe=old_meta.universe,
                         characters=old_meta.characters,
                         tags=old_meta.tags)
Esempio n. 14
0
def parse_xml(data: str) -> ImageMetadata:
    """Parse an XML containing image metadata.

    :param data: a string containing valid image metadata
    :return: an image metadata object"""

    image_elem = ElTree.fromstring(data)
    img_id = image_elem.get('id')
    file = image_elem.get('file')

    # If we were presented with a legacy XML not containing 'file', use the legacy name 'filename'
    if file is None:
        file = image_elem.get('filename')

    author = image_elem.find("./author")
    universe = image_elem.find("./universe")
    characters = [
        char.text for char in image_elem.findall("./characters/character")
    ]
    tags = [tag.text for tag in image_elem.findall("./tags/tag")]

    return ImageMetadata(
        img_id=UUID(img_id),
        file=URI(file),
        author=author.text if author is not None else None,
        universe=universe.text if universe is not None else None,
        characters=characters if len(characters) != 0 else None,
        tags=tags if len(tags) != 0 else None)
Esempio n. 15
0
def test_issue_003_path_like_division_operators():
	base = URI("http://example.com/foo/bar.html")
	assert str(base / "baz.html") == 'http://example.com/foo/baz.html'
	assert str(base // "cdn.example.com" / "baz.html") == 'http://cdn.example.com/baz.html'
	assert str(base / "/diz") == 'http://example.com/diz'
	assert str(base / "#diz") == 'http://example.com/foo/bar.html#diz'
	assert str(base / "https://example.com") == 'https://example.com/'
Esempio n. 16
0
class CloudAppClient:
    base: ClassVar[URI] = URI('https://my.cl.ly')
    serialization: ClassVar[
        str] = "application/json"  # Used for Accept and Content-Type headers.

    session: Session

    def __init__(self) -> None:
        """Initialize the client interface."""

        super().__init__()

        self.session = Session()

        self.session.headers['User-Agent'] = 'Ruby.CloudApp.API'
        self.session.headers['Accept'] = self.serialization
        self.session.headers['Content-Type'] = self.serialization

        if 'CLOUDAPP_USER' in environ:
            self.authenticate(environ['CLOUDAPP_USER'],
                              environ['CLOUDAPP_PASSWORD'])

    def authenticate(self, email: str, password: str):  # -> CloudAppClient:
        """Preserve authentication credentials for later use by RPC calls."""

        self.session.headers.pop('Authorization', None)
        self.session.auth = HTTPDigestAuth(email, password)

        return self

    # Internal Mechanisms

    def __call__(self, path: str, method='get', **params):
        """Issue an API call."""
        uri: URI = self.base / path
        return self.session.request(method, uri, params)

    def __getitem__(self, slug: str) -> Drop:
        """Retrieve a Drop by slug."""

        return Drop(slug, self)

    def __iter__(self) -> Iterable[Drop]:
        """Iterate all known drops."""

        return Drop[self]

    def _parse_errors(self, result):
        if isinstance(result, Mapping):
            return [f"{k}: {v}" for k, v in result.items()]

        if isinstance(result, str):
            return [result]

        if isinstance(result, Collection):
            return result

        return []
Esempio n. 17
0
    def meta_extractor(v: View) -> ImageMetadata:
        characters = v.get_characters()
        characters = characters.split(', ') if characters is not None else None

        tags = v.get_tags()
        tags = tags.split(', ') if tags is not None else None

        return ImageMetadata(v.image_id, URI(v._image_path), v.get_author(),
                             v.get_universe(), characters, tags)
Esempio n. 18
0
def test_mixin_init() -> None:
    """Can init a new mixin."""
    sut = LinksMixin()
    assert sut is not None

    link = Link(href=URI("/test"))
    sut = LinksMixin(links={"self": link})
    assert sut is not None
    assert sut.links is not None
    assert sut.links["self"] == link
Esempio n. 19
0
    def write(self) -> None:
        """
        Persist the updated metadata.

        :raise OSError: when the metadata file couldn't be opened
        """

        meta_obj = ImageMetadata(self._id, URI(self._image_path), self.author,
                                 self.universe, self.characters, self.tags)
        write_meta(meta_obj, self._image_path)
Esempio n. 20
0
    def test_legacy_load(self):
        image_uri = URI(self.test_path / "test.png")
        with (self.test_path / "test.xml").open('w') as f:
            f.write(
                "<image id=\"97ed6183-73a0-46ea-b51d-0721b0fbd357\" filename=\"test.png\"></image>"
            )

        loaded = load_meta(Path(image_uri.path))
        self.assertEqual(
            ImageMetadata(UUID('97ed6183-73a0-46ea-b51d-0721b0fbd357'),
                          image_uri, None, None, None, None), loaded)
Esempio n. 21
0
    def test_collective_conjunctive_filter(self):
        # Verify the effectiveness of multi-valued matchers, when evaluated in conjunction
        tags1 = ["y", "an", "hry"]
        tags2 = ["an", "mb", "sty", "rp"]
        tags3 = ["ll", "vnl"]

        el1 = ImageMetadata(uuid4(), URI("a.png"), "ghi", None, None, tags1)
        el2 = ImageMetadata(uuid4(), URI("b.png"), "nsh", None, None, tags2)
        el3 = ImageMetadata(uuid4(), URI("c.png"), "ShT", None, None, tags3)

        filter_builder = FilterBuilder()

        # Test conjunctive filtering with inclusion
        f = filter_builder.tag_constraint("an").get_tag_filter()
        self.assertTrue(f(el1))
        self.assertTrue(f(el2))
        self.assertFalse(f(el3))

        # Test conjunctive filtering with exclusion
        f = filter_builder.tag_constraint("y", True).get_tag_filter()
        self.assertFalse(f(el1))
        self.assertTrue(f(el2))
        self.assertFalse((f(el3)))
Esempio n. 22
0
    def test_load_actual(self):
        image_uri = URI(self.test_path / "test.png")
        with (self.test_path / "test.xml").open('w') as f:
            f.write(
                "<image id=\"97ed6183-73a0-46ea-b51d-0721b0fbd357\" file=\"" +
                str(image_uri) + "\">" +
                "<author>a</author><universe>u</universe>" +
                "<characters><character>x</character><character>y</character></characters>"
                + "<tags><tag>f</tag><tag>a</tag></tags></image>")

        self.assertEqual(
            ImageMetadata(UUID('97ed6183-73a0-46ea-b51d-0721b0fbd357'),
                          image_uri, "a", "u", ["x", "y"], ["f", "a"]),
            load_meta(Path(self.test_dir.name) / "test.png"))
Esempio n. 23
0
    def test_metadata_read(self):
        # Write some metadata for one of the images
        meta1 = ImageMetadata(
            img_id=UUID('f32ed6ad-1162-4ea6-b243-1e6c91fb7eda'),
            file=URI(self.test_path / '01.png'),
            author="a",
            universe="p",
            characters=["x", "y"],
            tags=["t", "f"])
        write_meta(meta1, (self.test_path / '01.png'))

        specimen = GtkView(self.test_path)

        # Collect metadata from the specimen
        results = dict()
        for _ in range(0, 2):
            specimen.load_next()
            results[specimen.filename] = TestView.meta_extractor(specimen)

        self.assertEqual(meta1, results["01.png"])
        self.assertEqual(
            ImageMetadata(results["02.png"].img_id,
                          URI(self.test_path / "02.png"), None, None, None,
                          None), results["02.png"])
Esempio n. 24
0
    def test_store(self):
        image_uri = URI(self.test_path / "test.png")
        write_meta(
            ImageMetadata(UUID('97ed6183-73a0-46ea-b51d-0721b0fbd357'),
                          image_uri, "a", "u", ["x", "y"], ["f", "a"]),
            Path(self.test_dir.name) / "test.png")

        with (self.test_path / "test.xml") as f:
            result = f.read_text()

        self.assertEqual(
            "<image id=\"97ed6183-73a0-46ea-b51d-0721b0fbd357\" file=\"" +
            str(image_uri) + "\">" +
            "<author>a</author><universe>u</universe>" +
            "<characters><character>x</character><character>y</character></characters>"
            + "<tags><tag>f</tag><tag>a</tag></tags></image>", result)
 def split_uri(uri_str):
     """Get the scheme (+namespace if not a URL), and value from URI."""
     uri = URI(uri_str)
     if uri.scheme.name in ['http', 'https']:
         scheme = uri.scheme.name
         # we're replacing the scheme instead of using heirarchical
         # to preserve query strings
         value = uri_str.replace(uri.scheme.name + '://', '', 1)
     else:
         # e.g. uri.heirarchical = 'doi:10.11647/obp.0130';
         # we are asumming the path only contains one colon
         namespace, value = uri.heirarchical.split(':', 1)
         scheme = ''.join([uri.scheme.name, ':', namespace])
         if namespace == "isbn":
             # we store hyphenless isbn numbers - remove hyphens from input
             value = value.replace("-", "")
     # we store lowercased URIs - let's lower input
     return [scheme.lower(), value.lower()]
Esempio n. 26
0
		def inner(environ:WSGIEnvironment, start_response:WSGIStartResponse):
			try:
				request: Request = Request(environ)  # This will be remembered and re-used as a singleton later.
				uri: URI = URI(request.url)
			
			except Exception as e:  # Protect against de-serialization errors.
				return HTTPBadRequest(f"Encountered error de-serializing the request: {e!r}")(environ, start_response)
			
			# https://docs.pylonsproject.org/projects/webob/en/stable/api/request.html#webob.request.BaseRequest.client_addr
			# Ref: https://www.nginx.com/resources/wiki/start/topics/examples/forwarded/
			client: str = request.client_addr
			
			try:
				# Immediately reject known bad actors.
				if inet_aton(request.client_addr) in self.blacklist:
					return HTTPClose()(environ, start_response)  # No need to re-blacklist.
				
				# Validate the heuristic rules.
				for heuristic in self.heuristics:
					try:
						heuristic(environ, uri)
					except HTTPClose as e:
						log.error(f"{heuristic} {e.args[0].lower()}")
						raise
				
				# Invoke the wrapped application if everything seems OK.  Note that this pattern of wrapping permits
				# your application to raise HTTPClose if wishing to blacklist the active connection for any reason.
				return app(environ, start_response)
			
			except HTTPClose as e:
				if request.client_addr not in self.exempt:
					log.warning(f"Blacklisting: {request.client_addr}")
					self.blacklist.add(inet_aton(request.client_addr))
				
				if not __debug__: e = HTTPClose()  # Do not disclose the reason in production environments.
				elif ': ' in e.args[0]:  # XXX: Not currently effective.
					left, _, right = e.args[0].partition(': ')
					e.args = (f"<strong>{left}:</strong> <tt>{escape(right)}</tt>", )
				
				return e(environ, start_response)
Esempio n. 27
0
    def test_metadata_update(self):
        target_filename = '02.png'
        specimen = GtkView(self.test_path)

        # Scan until we find our target
        specimen.load_next()
        while specimen.filename != target_filename:
            specimen.load_next()

        # Verify that no metadata is present
        self.assertIsNone(specimen.get_tags())

        # Set metadata and check coherence
        specimen.set_author(":DD")
        specimen.set_universe("\tu")
        specimen.set_characters("3, f,p\n")
        specimen.set_tags("fa,s \vjo,\u200dl")
        specimen.write()

        self.assertEqual(
            ImageMetadata(specimen.image_id,
                          URI(self.test_path / target_filename), ":DD", "u",
                          ["3", "f", "p"], ["fa", "s jo", "l"]),
            load_meta(self.test_path / target_filename))
Esempio n. 28
0
 def __init__(self, uri: URI, expected_outputs: List[str]):
     self.uri = URI(uri)
     self.expected_outputs = expected_outputs
     super().__init__()
 def test_http_get(self):
     content = self.sucker.get_contents(URI("https://www.qq.com"),
                                        {"method": "get"})[0]
     self.assertEqual(content.get_header("content-type"),
                      "text/html; charset=GB2312")
     self.assertEqual(content.get_status_code(), 200)
Esempio n. 30
0
class Drop:
    base: ClassVar[URI] = URI(
        'https://cl.ly/'
    )  # The ID is appended to this as the first path element.
    _storage: str = '{self.uploaded.year}/{self.uploaded.month}/{self.uploaded.day}/{self.id}--{self.slug}--{self.type}--{self.original}'  # Name (format string) to use when saving locally.

    id: int  # The internal integer identifier.
    type: str  # The meta-type (bulk grouping) of the uploaded drop.
    slug: str  # The URL slug used to access this drop.

    name: str  # Current file name.
    original: str  # Original uploaded file name.
    target: Optional[URI]  # Target URI if a "short link" redirection.
    content: Optional[URI]  # Content URI otherwise.

    size: int  # Total file size.
    views: int  # View counter.
    uploaded: datetime  # The date and time of initial upload.
    favourite: bool = False  # Has this been marked as a favourite?

    index: Optional[int]
    total: Optional[int]

    _data: Dict
    _stats: URI

    _json_map: Dict[str, str] = {
        'id': 'id',
        'slug': 'slug',
        'created_at': 'uploaded',
        'item_type': 'type',
        'name': 'name',
        'redirect_url': 'target',
        ('file_name', 'name'): 'original',
        'view_counter': 'views',
        ('source_url', 'remote_url'): 'content',
        'stats_url': '_stats',
        'content_length': 'size',
        'favourite': 'favourite',
    }

    def __repr__(self) -> str:
        return f"Drop({'⚠️ ' if self.favourite else ''}{self.slug}, {self.type}, '{self.original}', size={self.size}, uploaded={self.uploaded.isoformat()})"

    def __init__(self, slug, api, json: bool = False) -> None:
        self._api = proxy(api)

        if json:
            self._apply(slug)
            return

        self.slug = slug

        result = api.session.get(self.uri)

        if result.status_code != codes.ok:
            raise ValueError(
                f"Received {result.status_code!s} attempting to retrieve drop metadata."
            )

        self._apply(result.json())

    def __class_getitem__(Drop, api) -> Generator:
        """Fetch an iterator of all available drops over the given authenticated API instance.
		
		Args: page, per_page, type (image, bookmark, text, archive, audio, video, unknown), deleted
		"""

        result = api('/v3/items').json()

        #__import__('pudb').set_trace()

        counter = count()

        while result.get('links', {}).get('next_url', {}).get('href', None):
            for record in result['data']:
                try:
                    drop = Drop(record['slug'], api)
                except ValueError:
                    yield record
                    continue

                drop.index = next(counter)
                drop.total = result['meta']['count']
                yield drop

            result = api.session.get(
                result['links']['next_url']['href']).json()

    @property
    def uri(self) -> URI:
        return self.base / self.slug

    def save(self, path: Optional[Path] = None):
        if not path:
            path = self._storage.format(self=self)

        target = Path(path).absolute()
        target.parent.mkdir(parents=True, exist_ok=True)

        # First, write out the .info.json for this drop.
        with target.with_suffix('.info.json').open('w',
                                                   encoding='utf-8') as out:
            out.write(dumps(self._data, indent=4, sort_keys=True))

        if self.type == 'bookmark':
            target = target.with_suffix('.webloc')
            target.write_bytes(plist({'URL': self.target}, fmt=FMT_BINARY))

        else:
            if not target.exists() or (self.size
                                       and getsize(target) != self.size):
                with target.open('wb', buffering=8192) as out:
                    with self._api.session.get(self.content,
                                               stream=True) as req:
                        stream(req.raw, out)

        uploaded = mktime(self.uploaded.timetuple())
        utime(target, (uploaded, uploaded))

    def _apply(self, metadata) -> None:
        self._process(metadata)
        self._data = metadata

        for origin, destination in self._json_map.items():
            if isinstance(origin, tuple):
                origins = origin
            else:
                origins = (origin, )

            for origin in origins:
                if metadata.get(origin, None) is not None:
                    setattr(self, destination, metadata.get(origin))
                    break
            else:
                setattr(self, destination, None)

    def _process(self, data: dict) -> None:
        """Perform minor additional typecasting or cleanup work after retrieval of a drop's metadata."""

        for key, value in data.items():
            if not isinstance(value, str): continue

            if key in ('file_name', ):
                value = unquote(value)
            elif key.endswith('_at') and value:
                try:
                    value = datetime.strptime(
                        value.rstrip('Z'),
                        '%Y-%m-%dT%H:%M:%S')  # Try with optional trailing Z.
                except ValueError:
                    value = datetime.strptime(
                        value, '%Y-%m-%d')  # Attempt without time component.

            data[key] = value