def fstat_from_server(depot_path, upper, lower, nearby=None): import requests if not o4_config.fstat_server(): raise Exception('fstat_server is not configured') depot_path = depot_path.replace('//', '').replace('/...', '') url = f'{o4_config.fstat_server()}/o4-http/fstat/{upper}/{depot_path}' if nearby: url += f'?nearby={nearby}' server = requests.get(url, stream=True, allow_redirects=False, auth=o4_config.fstat_server_auth(), verify=o4_config.fstat_server_cert()) if server.status_code == 404: raise Exception(f'Unknown fstat request: {url}') if server.status_code // 100 == 3: redir = server.headers['Location'].split('/') cl = int(redir[redir.index('o4-http') + 2]) # Throws ValueError on miss raise FstatRedirection(cl) if server.status_code != 200: print(f'*** WARNING: Status {server.status_code} from {url}', file=sys.stderr) raise FstatServerError() for buf in gzip.GzipFile(fileobj=server.raw): cl, line = fstat_cl(buf.decode('utf8')) if cl: if lower is not None and cl < lower: return yield line
def fstat_iter(depot_path, to_changelist, from_changelist=0, cache_dir='.o4'): ''' Return the needed fstat data by combining three possible sources: perforce, the fstat server, and local fstat cache files. Note that the local files and the fstat server are guaranteed to return lines in (descending) changelist order, while the Perforce data may not be. The three sources are ordered [fstat server, perforce, fstat server, local]; each one may or may not be used, and the fstat server will not be used twice. In the order read, each subset will contain only changelist numbers less than all that have been read in previous subsets. The local cache file created should not have more than one entry for any filename. Such duplication may come about due to a file having been changed in more than one of the changelist subsets being queried; a row for a file that has been seen already (and thus, at a higher changelist) must be ignored. Beware: do not break out of the returned generator! This will prevent local cache files from being created, causing superfluous access to perforce and/or fstat server. ''' from tempfile import mkstemp from o4_pyforce import P4TimeoutError, P4Error to_changelist, from_changelist = int(to_changelist), int(from_changelist) cache_cl, cache_fname = get_fstat_cache(to_changelist, cache_dir) updated = [] all_filenames = set() CLR = '%c[2K\r' % chr(27) summary = {'Perforce': None, 'Fstat server': None, 'Local cache': None} try: fout = temp_fname = None highest_written_cl = 0 _first = _last = 0 # These are local and re-used in various blocks below fh, temp_fname = mkstemp(dir=cache_dir) os.close(fh) fout = gzip.open(temp_fname, 'wt', encoding='utf8', compresslevel=9) print( "# COLUMNS: F_CHANGELIST, F_PATH, F_REVISION, F_FILE_SIZE, F_CHECKSUM", file=fout) if cache_cl == to_changelist: print(f'*** INFO: Satisfied from local cache {cache_fname}', file=sys.stderr) for cl, line in fstat_from_csv(cache_fname, fstat_cl): if not cl: continue if cl < from_changelist: break yield line return missing_range = (to_changelist, cache_cl + 1) o4server_range = (None, None) if o4_config.fstat_server(): _first = _last = 0 try: for line in fstat_from_server(depot_path, missing_range[0], missing_range[1], o4_config.fstat_server_nearby()): cl, path, line = fstat_cl_path(line) if not cl: continue _last = cl _first = _first or cl all_filenames.add(path) print(line, file=fout) if from_changelist < cl <= to_changelist: yield line summary['Fstat server'] = (missing_range, (int(_first), int(_last))) missing_range = (None, None) except FstatRedirection as e: print( f'*** INFO: Fstat server redirected to changelist {e.cl}', file=sys.stderr) if e.cl > to_changelist: print( f'*** WARNING: Fstat server redirected to {e.cl} which is greater', f'than {to_changelist}.', file=sys.stderr) print( ' Please contact [email protected].', file=sys.stderr) elif e.cl > cache_cl: missing_range = (to_changelist, e.cl + 1) o4server_range = (e.cl, cache_cl + 1) except FstatServerError as e: summary['Fstat server'] = (missing_range, (0, 0)) highest_written_cl = max(highest_written_cl, int(_first)) perforce_filenames = dict() if missing_range[0]: retry = 3 while retry: retry -= 1 try: for f in fstat_from_perforce(depot_path, missing_range[0], missing_range[1]): if f[F_PATH] and f[F_PATH] not in all_filenames: if from_changelist < int( f[F_CHANGELIST]) <= to_changelist: yield fstat_join(f) f[0] = int(f[0]) perforce_filenames[f[F_PATH]] = f break except P4Error as e: done = False for a in e.args: fix = False if 'Too many rows scanned' in a.get('data', ''): if cache_cl: msg = f"Maxrowscan occurred, ignoring cache {cache_fname}." msg += ' This is probably due to a bad Root in your clientspec;' msg += ' if not, contact the Perforce admins and let them know.' print(f"{CLR}*** WARNING: {msg}", file=sys.stderr) fix = True missing_range = (to_changelist, None) retry += 1 elif 'Request too large' in a.get('data', ''): msg = f"*** ERROR: 'Request too large'. {depot_path} may be too broad." if depot_path == '//...': msg += ' This is almost certainly due to a bad Root in your clientspec.' else: msg += ' This may be due to a bad Root in your clientspec.' sys.exit(f"{CLR}{msg}") elif 'no such file' in a.get('data', ''): print( f"{CLR}*** INFO: Empty changelist range ({missing_range}).", file=sys.stderr) # Just an empty range of changelists, we are done done = True break if not fix: raise if done: break except P4TimeoutError: perforce_filenames.clear() print( f"{CLR}*** WARNING: ({retry+1}/3) P4 Timeout while getting fstat", file=sys.stderr) else: sys.exit(f"{CLR}*** ERROR: " f"Too many P4 Timeouts for p4 fstat" f"{depot_path}@{from_changelist},@{to_changelist}") all_filenames.update(perforce_filenames.keys()) if perforce_filenames: perforce_rows = sorted(perforce_filenames.values(), reverse=True) summary['Perforce'] = (missing_range, (int(perforce_rows[0][F_CHANGELIST]), int(perforce_rows[-1][F_CHANGELIST]))) highest_written_cl = max(highest_written_cl, int(perforce_rows[0][F_CHANGELIST])) for f in perforce_rows: print(fstat_join(f), file=fout) del perforce_filenames if o4server_range[0]: _first = _last = 0 for line in fstat_from_server(depot_path, o4server_range[0], o4server_range[1]): cl, path, line = fstat_cl_path(line) if not cl: continue _last = cl _first = _first or cl if path not in all_filenames: all_filenames.add(path) print(line, file=fout) if from_changelist < cl <= to_changelist: yield line summary['Fstat server'] = (o4server_range, (int(_first), int(_last))) highest_written_cl = max(highest_written_cl, int(_first)) if cache_cl: _first = _last = 0 for cl, path, line in fstat_from_csv(cache_fname, fstat_cl_path): if not cl: continue _last = cl _first = _first or cl if path not in all_filenames: print(line, file=fout) if from_changelist < cl <= to_changelist: yield line else: all_filenames.remove(path) summary['Local cache'] = ((cache_cl, 1), (int(_first), int(_last))) highest_written_cl = max(highest_written_cl, int(_first)) fout.close() fout = None if highest_written_cl: os.chmod(temp_fname, 0o444) os.rename(temp_fname, f'{cache_dir}/{highest_written_cl}.fstat.gz') finally: if fout: fout.close() try: if temp_fname: os.unlink(temp_fname) except FileNotFoundError: pass from texttable import Texttable table = Texttable() table.set_cols_align(['l', 'l', 'l']) table.set_header_align(['l', 'l', 'l']) table.header(['Fstat source', 'Requested', 'Provided']) table.set_chars(['-', '|', '+', '-']) table.set_deco(table.HEADER) for k in 'Perforce', 'Fstat server', 'Local cache': data = summary[k] if summary[k] else ('Not used', '') if summary[k]: v = summary[k] data = ('{:10,} - {:10,}'.format( (v[0][0] or 0), (v[0][1] or 0)), '{:10,} - {:10,}'.format( (v[1][0] or 0), (v[1][1] or 0))) else: data = ('Not used', '') table.add_row([k, data[0], data[1]]) table = '\n'.join('*** INFO: ' + row for row in table.draw().split('\n')) print(table, file=sys.stderr)