Exemple #1
0
    def Stat(self, path):
        directory, filename = posixpath.split(path)
        directory += '/'
        if self._revision is not None:
            # |stat_fetch| uses viewvc which uses pathrev= for version.
            directory += '?pathrev=%s' % self._revision

        try:
            result = self._stat_fetcher.Fetch(directory)
        except Exception as e:
            raise FileSystemError('Error fetching %s for Stat: %s' %
                                  (path, traceback.format_exc()))

        if result.status_code == 404:
            raise FileNotFoundError(
                'Got 404 when fetching %s for Stat, content %s' %
                (path, result.content))
        if result.status_code != 200:
            raise FileNotFoundError(
                'Got %s when fetching %s for Stat, content %s' %
                (result.status_code, path, result.content))

        stat_info = _CreateStatInfo(result.content)
        if stat_info.version is None:
            raise FileSystemError('Failed to find version of dir %s' %
                                  directory)
        if path.endswith('/'):
            return stat_info
        if filename not in stat_info.child_versions:
            raise FileNotFoundError(
                '%s from %s was not in child versions for Stat' %
                (filename, path))
        return StatInfo(stat_info.child_versions[filename])
Exemple #2
0
def _CreateStatInfo(html):
    parent_version = None
    child_versions = {}

    # Try all of the tables until we find the ones that contain the data (the
    # directory and file versions are in different tables).
    for table in _ParseHTML(html).getElementsByTagName('table'):
        # Within the table there is a list of files. However, there may be some
        # things beforehand; a header, "parent directory" list, etc. We will deal
        # with that below by being generous and just ignoring such rows.
        rows = table.getElementsByTagName('tr')

        for row in rows:
            cells = row.getElementsByTagName('td')

            # The version of the directory will eventually appear in the soup of
            # table rows, like this:
            #
            # <tr>
            #   <td>Directory revision:</td>
            #   <td><a href=... title="Revision 214692">214692</a> (of...)</td>
            # </tr>
            #
            # So look out for that.
            if len(cells) == 2 and _InnerText(
                    cells[0]) == 'Directory revision:':
                links = cells[1].getElementsByTagName('a')
                if len(links) != 2:
                    raise FileSystemError(
                        'ViewVC assumption invalid: directory ' +
                        'revision content did not have 2 <a> ' +
                        ' elements, instead %s' % _InnerText(cells[1]))
                this_parent_version = _InnerText(links[0])
                int(this_parent_version)  # sanity check
                if parent_version is not None:
                    raise FileSystemError(
                        'There was already a parent version %s, and ' +
                        ' we just found a second at %s' %
                        (parent_version, this_parent_version))
                parent_version = this_parent_version

            # The version of each file is a list of rows with 5 cells: name, version,
            # age, author, and last log entry. Maybe the columns will change; we're
            # at the mercy viewvc, but this constant can be easily updated.
            if len(cells) != 5:
                continue
            name_element, version_element, _, __, ___ = cells

            name = _InnerText(
                name_element)  # note: will end in / for directories
            try:
                version = int(_InnerText(version_element))
            except StandardError:
                continue
            child_versions[name] = str(version)

        if parent_version and child_versions:
            break

    return StatInfo(parent_version, child_versions)
Exemple #3
0
    def Get(self):
        for path, future in self._fetches:
            try:
                result = future.Get()
            except Exception as e:
                raise FileSystemError('Error fetching %s for Get: %s' %
                                      (path, traceback.format_exc()))

            if result.status_code == 404:
                raise FileNotFoundError(
                    'Got 404 when fetching %s for Get, content %s' %
                    (path, result.content))
            if result.status_code != 200:
                raise FileSystemError(
                    'Got %s when fetching %s for Get, content %s' %
                    (result.status_code, path, result.content))

            if path.endswith('/'):
                self._value[path] = self._ListDir(result.content)
            elif not self._binary:
                self._value[path] = ToUnicode(result.content)
            else:
                self._value[path] = result.content
        if self._error is not None:
            raise self._error
        return self._value
Exemple #4
0
    def fetch_from_blobstore():
      '''Returns a Future which resolves to the _GithubZipFile for this repo
      fetched from blobstore.
      '''
      blob = self._blobstore.Get(repo_url, _GITHUB_REPOS_NAMESPACE)
      if blob is None:
        return FileSystemError.RaiseInFuture(
            'No blob for %s found in datastore' % repo_key)

      repo_zip = _GithubZipFile.Create(repo_key, blob)
      if repo_zip is None:
        return FileSystemError.RaiseInFuture(
            'Blob for %s was corrupted in blobstore!?' % repo_key)

      return Future(value=repo_zip)
        def resolve():
            try:
                result = result_future.Get()
            except Exception as e:
                exc_type = FileNotFoundError if IsDownloadError(
                    e) else FileSystemError
                raise exc_type(
                    '%s fetching %s for Stat: %s' %
                    (type(e).__name__, path, traceback.format_exc()))

            if result.status_code == 404:
                raise FileNotFoundError('Got 404 when fetching %s for Stat, '
                                        'content %s' % (path, result.content))
            if result.status_code != 200:
                raise FileNotFoundError(
                    'Got %s when fetching %s for Stat, content %s' %
                    (result.status_code, path, result.content))

            stat_info = _CreateStatInfo(result.content)
            if stat_info.version is None:
                raise FileSystemError('Failed to find version of dir %s' %
                                      directory)
            if path == '' or path.endswith('/'):
                return stat_info
            if filename not in stat_info.child_versions:
                raise FileNotFoundError(
                    '%s from %s was not in child versions for Stat' %
                    (filename, path))
            return StatInfo(stat_info.child_versions[filename])
 def resolve():
     value = {}
     for path, future in fetches:
         try:
             result = future.Get()
         except Exception as e:
             if skip_not_found and IsDownloadError(e): continue
             exc_type = (FileNotFoundError
                         if IsDownloadError(e) else FileSystemError)
             raise exc_type(
                 '%s fetching %s for Get: %s' %
                 (type(e).__name__, path, traceback.format_exc()))
         if result.status_code == 404:
             if skip_not_found: continue
             raise FileNotFoundError(
                 'Got 404 when fetching %s for Get, content %s' %
                 (path, result.content))
         if result.status_code != 200:
             raise FileSystemError(
                 'Got %s when fetching %s for Get, content %s' %
                 (result.status_code, path, result.content))
         if path.endswith('/'):
             value[path] = list_dir(result.content)
         else:
             value[path] = result.content
     return value
      def get_zip(github_zip):
        try:
          blob = github_zip.content
        except urlfetch.DownloadError:
          raise FileSystemError('Failed to download repo %s file from %s' %
                                (repo_key, repo_url))

        repo_zip = _GithubZipFile.Create(repo_key, blob)
        if repo_zip is None:
          raise FileSystemError('Blob for %s was fetched corrupted from %s' %
                                (repo_key, repo_url))

        self._blobstore.Set(self._repo_url, blob, _GITHUB_REPOS_NAMESPACE)
        self._up_to_date_cache.Set(repo_key, True)
        self._stat_cache.Set(repo_key, version)
        return repo_zip
Exemple #8
0
 def stat(content):
   stat_info = _CreateStatInfo(content)
   if stat_info.version is None:
     raise FileSystemError('Failed to find version of dir %s' % dir_)
   if IsDirectory(path):
     return stat_info
   if filename not in stat_info.child_versions:
     raise FileNotFoundError(
         '%s from %s was not in child versions for Stat' % (filename, path))
   return StatInfo(stat_info.child_versions[filename])
Exemple #9
0
 def get_content(result):
   if result.status_code == 404:
     if skip_not_found:
       return None
     raise FileNotFoundError('Got 404 when fetching %s for Get from %s' %
                             (path, self._base_url))
   if result.status_code != 200:
     raise FileSystemError(
         'Got %s when fetching %s for Get from %s, content %s' %
         (result.status_code, path, self._base_url, result.content))
   return result.content
  def _FetchLiveVersion(self, username, password):
    '''Fetches the current repository version from github.com and returns it.
    The version is a 'sha' hash value.
    '''
    # TODO(kalman): Do this asynchronously (use FetchAsync).
    result = self._fetcher.Fetch(
        'commits/HEAD', username=username, password=password)

    try:
      return json.loads(result.content)['sha']
    except (KeyError, ValueError):
      raise FileSystemError('Error parsing JSON from repo %s: %s' %
                            (self._repo_url, traceback.format_exc()))