def Stat(self, path): directory, filename = posixpath.split(path) directory += '/' if self._revision is not None: # |stat_fetch| uses viewvc which uses pathrev= for version. directory += '?pathrev=%s' % self._revision try: result = self._stat_fetcher.Fetch(directory) except Exception as e: raise FileSystemError('Error fetching %s for Stat: %s' % (path, traceback.format_exc())) if result.status_code == 404: raise FileNotFoundError( 'Got 404 when fetching %s for Stat, content %s' % (path, result.content)) if result.status_code != 200: raise FileNotFoundError( 'Got %s when fetching %s for Stat, content %s' % (result.status_code, path, result.content)) stat_info = _CreateStatInfo(result.content) if stat_info.version is None: raise FileSystemError('Failed to find version of dir %s' % directory) if path.endswith('/'): return stat_info if filename not in stat_info.child_versions: raise FileNotFoundError( '%s from %s was not in child versions for Stat' % (filename, path)) return StatInfo(stat_info.child_versions[filename])
def _CreateStatInfo(html): parent_version = None child_versions = {} # Try all of the tables until we find the ones that contain the data (the # directory and file versions are in different tables). for table in _ParseHTML(html).getElementsByTagName('table'): # Within the table there is a list of files. However, there may be some # things beforehand; a header, "parent directory" list, etc. We will deal # with that below by being generous and just ignoring such rows. rows = table.getElementsByTagName('tr') for row in rows: cells = row.getElementsByTagName('td') # The version of the directory will eventually appear in the soup of # table rows, like this: # # <tr> # <td>Directory revision:</td> # <td><a href=... title="Revision 214692">214692</a> (of...)</td> # </tr> # # So look out for that. if len(cells) == 2 and _InnerText( cells[0]) == 'Directory revision:': links = cells[1].getElementsByTagName('a') if len(links) != 2: raise FileSystemError( 'ViewVC assumption invalid: directory ' + 'revision content did not have 2 <a> ' + ' elements, instead %s' % _InnerText(cells[1])) this_parent_version = _InnerText(links[0]) int(this_parent_version) # sanity check if parent_version is not None: raise FileSystemError( 'There was already a parent version %s, and ' + ' we just found a second at %s' % (parent_version, this_parent_version)) parent_version = this_parent_version # The version of each file is a list of rows with 5 cells: name, version, # age, author, and last log entry. Maybe the columns will change; we're # at the mercy viewvc, but this constant can be easily updated. if len(cells) != 5: continue name_element, version_element, _, __, ___ = cells name = _InnerText( name_element) # note: will end in / for directories try: version = int(_InnerText(version_element)) except StandardError: continue child_versions[name] = str(version) if parent_version and child_versions: break return StatInfo(parent_version, child_versions)
def Get(self): for path, future in self._fetches: try: result = future.Get() except Exception as e: raise FileSystemError('Error fetching %s for Get: %s' % (path, traceback.format_exc())) if result.status_code == 404: raise FileNotFoundError( 'Got 404 when fetching %s for Get, content %s' % (path, result.content)) if result.status_code != 200: raise FileSystemError( 'Got %s when fetching %s for Get, content %s' % (result.status_code, path, result.content)) if path.endswith('/'): self._value[path] = self._ListDir(result.content) elif not self._binary: self._value[path] = ToUnicode(result.content) else: self._value[path] = result.content if self._error is not None: raise self._error return self._value
def fetch_from_blobstore(): '''Returns a Future which resolves to the _GithubZipFile for this repo fetched from blobstore. ''' blob = self._blobstore.Get(repo_url, _GITHUB_REPOS_NAMESPACE) if blob is None: return FileSystemError.RaiseInFuture( 'No blob for %s found in datastore' % repo_key) repo_zip = _GithubZipFile.Create(repo_key, blob) if repo_zip is None: return FileSystemError.RaiseInFuture( 'Blob for %s was corrupted in blobstore!?' % repo_key) return Future(value=repo_zip)
def resolve(): try: result = result_future.Get() except Exception as e: exc_type = FileNotFoundError if IsDownloadError( e) else FileSystemError raise exc_type( '%s fetching %s for Stat: %s' % (type(e).__name__, path, traceback.format_exc())) if result.status_code == 404: raise FileNotFoundError('Got 404 when fetching %s for Stat, ' 'content %s' % (path, result.content)) if result.status_code != 200: raise FileNotFoundError( 'Got %s when fetching %s for Stat, content %s' % (result.status_code, path, result.content)) stat_info = _CreateStatInfo(result.content) if stat_info.version is None: raise FileSystemError('Failed to find version of dir %s' % directory) if path == '' or path.endswith('/'): return stat_info if filename not in stat_info.child_versions: raise FileNotFoundError( '%s from %s was not in child versions for Stat' % (filename, path)) return StatInfo(stat_info.child_versions[filename])
def resolve(): value = {} for path, future in fetches: try: result = future.Get() except Exception as e: if skip_not_found and IsDownloadError(e): continue exc_type = (FileNotFoundError if IsDownloadError(e) else FileSystemError) raise exc_type( '%s fetching %s for Get: %s' % (type(e).__name__, path, traceback.format_exc())) if result.status_code == 404: if skip_not_found: continue raise FileNotFoundError( 'Got 404 when fetching %s for Get, content %s' % (path, result.content)) if result.status_code != 200: raise FileSystemError( 'Got %s when fetching %s for Get, content %s' % (result.status_code, path, result.content)) if path.endswith('/'): value[path] = list_dir(result.content) else: value[path] = result.content return value
def get_zip(github_zip): try: blob = github_zip.content except urlfetch.DownloadError: raise FileSystemError('Failed to download repo %s file from %s' % (repo_key, repo_url)) repo_zip = _GithubZipFile.Create(repo_key, blob) if repo_zip is None: raise FileSystemError('Blob for %s was fetched corrupted from %s' % (repo_key, repo_url)) self._blobstore.Set(self._repo_url, blob, _GITHUB_REPOS_NAMESPACE) self._up_to_date_cache.Set(repo_key, True) self._stat_cache.Set(repo_key, version) return repo_zip
def stat(content): stat_info = _CreateStatInfo(content) if stat_info.version is None: raise FileSystemError('Failed to find version of dir %s' % dir_) if IsDirectory(path): return stat_info if filename not in stat_info.child_versions: raise FileNotFoundError( '%s from %s was not in child versions for Stat' % (filename, path)) return StatInfo(stat_info.child_versions[filename])
def get_content(result): if result.status_code == 404: if skip_not_found: return None raise FileNotFoundError('Got 404 when fetching %s for Get from %s' % (path, self._base_url)) if result.status_code != 200: raise FileSystemError( 'Got %s when fetching %s for Get from %s, content %s' % (result.status_code, path, self._base_url, result.content)) return result.content
def _FetchLiveVersion(self, username, password): '''Fetches the current repository version from github.com and returns it. The version is a 'sha' hash value. ''' # TODO(kalman): Do this asynchronously (use FetchAsync). result = self._fetcher.Fetch( 'commits/HEAD', username=username, password=password) try: return json.loads(result.content)['sha'] except (KeyError, ValueError): raise FileSystemError('Error parsing JSON from repo %s: %s' % (self._repo_url, traceback.format_exc()))