Exemple #1
0
 def testURI(self):
     sample = rdf_standard.URI(transport="http",
                               host="google.com",
                               path="/index",
                               query="q=hi",
                               fragment="anchor1")
     self.assertEqual(sample.transport, "http")
     self.assertEqual(sample.host, "google.com")
     self.assertEqual(sample.path, "/index")
     self.assertEqual(sample.query, "q=hi")
     self.assertEqual(sample.fragment, "anchor1")
Exemple #2
0
    def testHumanReadable(self):
        sample = rdf_standard.URI()
        url = "http://google.com:443/search?query=hi#anchor2"
        sample.ParseFromHumanReadable(url)

        self.assertEqual(sample.transport, "http")
        self.assertEqual(sample.host, "google.com:443")
        self.assertEqual(sample.path, "/search")
        self.assertEqual(sample.query, "query=hi")
        self.assertEqual(sample.fragment, "anchor2")

        self.assertEqual(sample.SerializeToHumanReadable(), url)
Exemple #3
0
    def testByteString(self):
        raw_uri = "http://gógiel.pl:1337/znajdź?frazę=🦋#nagłówek"

        uri = rdf_standard.URI()
        uri.ParseFromBytes(raw_uri.encode("utf-8"))

        self.assertEqual(uri.transport, "http")
        self.assertEqual(uri.host, "gógiel.pl:1337")
        self.assertEqual(uri.path, "/znajdź")
        self.assertEqual(uri.query, "frazę=🦋")
        self.assertEqual(uri.fragment, "nagłówek")

        self.assertEqual(uri.FromSerializedBytes(uri.SerializeToBytes()), uri)
Exemple #4
0
    def testURI(self):
        sample = rdf_standard.URI(transport="http",
                                  host="google.com",
                                  path="/index",
                                  query="q=hi",
                                  fragment="anchor1")
        self.assertEqual(sample.transport, "http")
        self.assertEqual(sample.host, "google.com")
        self.assertEqual(sample.path, "/index")
        self.assertEqual(sample.query, "q=hi")
        self.assertEqual(sample.fragment, "anchor1")

        url = "http://google.com/index?q=hi#anchor1"
        self.assertEqual(sample.SerializeToString(), url)
Exemple #5
0
  def Parse(self, stat, file_obj, unused_knowledge_base):
    uris_to_parse = self.FindPotentialURIs(file_obj)
    uris = []

    for url_to_parse in uris_to_parse:
      url = rdf_standard.URI()
      url.ParseFromString(url_to_parse)

      # if no transport then url_to_parse wasn't actually a valid URL
      # either host or path also have to exist for this to be a valid URL
      if url.transport and (url.host or url.path):
        uris.append(url)

    filename = stat.pathspec.path
    cfg = {"filename": filename, "uris": uris}
    yield rdf_protodict.AttributedDict(**cfg)
Exemple #6
0
  def ParseFile(self, knowledge_base, pathspec, filedesc):
    del knowledge_base  # Unused.

    uris_to_parse = self.FindPotentialURIs(filedesc)
    uris = []

    for url_to_parse in uris_to_parse:
      url = rdf_standard.URI()
      url.ParseFromHumanReadable(url_to_parse)

      # if no transport then url_to_parse wasn't actually a valid URL
      # either host or path also have to exist for this to be a valid URL
      if url.transport and (url.host or url.path):
        uris.append(url)

    filename = pathspec.path
    cfg = {"filename": filename, "uris": uris}
    yield rdf_protodict.AttributedDict(**cfg)
Exemple #7
0
 def GenerateSample(self, number=0):
     return rdf_standard.URI(transport="http",
                             host="%s.example.com" % number)