def list_or_tuple_of(sub_schema): 'validates either a list or tuple of sub_schemas' return schema.Or((sub_schema,), [sub_schema])
def validate(data): def is_file(path): return path.is_file() def to_fixture(data): return Fixture(**data) def to_step(data): return Step(**data) def guard_to_condition(guard): return Condition(guard["guard"]) def to_test(data): data["config_file"] = data.pop("config-file") return Test(**data) def absolute_path(path): absolute = Path(os.path.expanduser(path)) if not absolute.is_absolute(): absolute = (SET_DIR / path).resolve() return absolute def replace_path(raw_command): return raw_command.replace("@.", str(SET_DIR)) def to_command(raw_command): return shlex.split(replace_path(raw_command)) def to_result(raw_result): return Result[raw_result.upper()] fixture = schema.Schema( schema.And( { "enter": schema.And(str, len), "exit": schema.And(str, len) }, schema.Use(to_fixture), )) fixtures = schema.Schema({schema.And(str, len): fixture}) step = schema.Schema( schema.And( { "command": schema.And(schema.Const(schema.And(str, len)), schema.Use(to_command)), schema.Optional("input", default=None): schema.And(schema.Use(absolute_path), is_file), schema.Optional("transformation", default=None): schema.Use(replace_path), schema.Optional("expected_result", default=Result.SUCCESS): schema.Use(to_result), }, schema.Use(to_step), )) guard = schema.Schema( schema.And({"guard": str}, schema.Use(guard_to_condition))) test = schema.Schema( schema.And( { schema.Optional("tags", default=None): [str], schema.Optional("condition", default=None): schema.Use(Condition), schema.Optional("config-file", default=None): schema.Use(absolute_path), schema.Optional("fixture", default=None): str, "steps": [schema.Or(step, guard)], }, schema.Use(to_test), )) tests = schema.Schema({schema.And(str, len): test}) sch = schema.Schema({ schema.Optional("config-file", default=None): schema.Use(absolute_path), schema.Optional("fixtures", default=None): fixtures, "tests": tests, }) return sch.validate(data)
class Dependency(object): PARAM_RELPATH = 'relpath' PARAM_PATH = 'path' PARAM_MD5 = 'md5' MD5_DIR_SUFFIX = '.dir' SCHEMA = { PARAM_PATH: str, schema.Optional(PARAM_MD5): schema.Or(str, None), } def __init__(self, project, path, md5=None): self.project = project self.path = os.path.abspath(os.path.realpath(path)) if not self.path.startswith(self.project.root_dir): raise CmdOutputOutsideOfRepoError(self.rel_path) self.md5 = md5 @property def rel_path(self): return os.path.relpath(self.path) def _changed_md5(self): if not os.path.exists(self.path): return True return self.project.state.changed(self.path, self.md5) @staticmethod def _changed_msg(changed): if changed: return 'changed' return "didn't change" def changed(self): ret = self._changed_md5() msg = u'Dependency \'{}\' {}'.format(self.rel_path, self._changed_msg(ret)) self.project.logger.debug(msg) return ret def status(self): if self.changed(): #FIXME better msgs return {self.rel_path: 'changed'} return {} @staticmethod def is_dir_cache(cache): return cache.endswith(Output.MD5_DIR_SUFFIX) def save(self): if not os.path.exists(self.path): raise CmdOutputDoesNotExistError(self.rel_path) if not os.path.isfile(self.path) and not os.path.isdir(self.path): raise CmdOutputIsNotFileOrDirError(self.rel_path) self.md5 = self.project.state.update(self.path) @staticmethod def unixpath(path): assert not ntpath.isabs(path) assert not posixpath.isabs(path) return path.replace('\\', '/') def dumpd(self, cwd): return { Output.PARAM_PATH: self.unixpath(os.path.relpath(self.path, cwd)), Output.PARAM_MD5: self.md5, } @classmethod def loadd(cls, project, d, cwd=os.curdir): relpath = os.path.normpath(Output.unixpath(d[Output.PARAM_PATH])) path = os.path.join(cwd, relpath) md5 = d.get(Output.PARAM_MD5, None) return cls(project, path, md5=md5) @classmethod def loadd_from(cls, project, d_list, cwd=os.curdir): return [cls.loadd(project, x, cwd=cwd) for x in d_list] @classmethod def loads(cls, project, s, cwd=os.curdir): return cls(project, os.path.join(cwd, s), md5=None) @classmethod def loads_from(cls, project, s_list, cwd=os.curdir): return [cls.loads(project, x, cwd=cwd) for x in s_list] def stage(self): for stage in self.project.stages(): for out in stage.outs: if self.path == out.path: return stage return None
class SvnScm(Scm): SCHEMA = schema.Schema({ 'scm' : 'svn', 'url' : str, schema.Optional('dir') : str, schema.Optional('if') : schema.Or(str, IfExpression), schema.Optional('revision') : schema.Or(int, str), schema.Optional('sslVerify') : bool, }) def __init__(self, spec, overrides=[]): super().__init__(spec, overrides) self.__url = spec["url"] self.__dir = spec.get("dir", ".") self.__revision = spec.get("revision") self.__sslVerify = spec.get('sslVerify', True) def getProperties(self, isJenkins): ret = super().getProperties(isJenkins) ret.update({ 'scm' : 'svn', "url" : self.__url, "dir" : self.__dir, 'sslVerify' : self.__sslVerify, }) if self.__revision: ret["revision"] = self.__revision return ret async def invoke(self, invoker): options = [ "--non-interactive" ] if not self.__sslVerify: options += [ "--trust-server-cert-failures=unknown-ca,cn-mismatch,expired,not-yet-valid,other" ] if self.__revision: options += [ "-r", str(self.__revision) ] if os.path.isdir(invoker.joinPath(self.__dir, ".svn")): if "/tags/" not in self.__url: await invoker.checkCommand(["svn", "up"] + options, cwd=self.__dir) else: await invoker.checkCommand(["svn", "co"] + options + [self.__url, self.__dir]) def asDigestScript(self): """Return forward compatible stable string describing this svn module. The module is represented as "url[@rev] > dir". """ return (self.__url + ( ("@"+str(self.__revision)) if self.__revision else "" ) + " > " + self.__dir) def asJenkins(self, workPath, credentials, options): scm = ElementTree.Element("scm", attrib={ "class" : "hudson.scm.SubversionSCM", "plugin" : "[email protected]", }) locations = ElementTree.SubElement(scm, "locations") location = ElementTree.SubElement(locations, "hudson.scm.SubversionSCM_-ModuleLocation") url = self.__url if self.__revision: url += ( "@" + str(self.__revision) ) ElementTree.SubElement(location, "remote").text = url credentialsId = ElementTree.SubElement(location, "credentialsId") if credentials: credentialsId.text = credentials ElementTree.SubElement(location, "local").text = ( os.path.normpath(os.path.join(workPath, self.__dir)) ) ElementTree.SubElement(location, "depthOption").text = "infinity" ElementTree.SubElement(location, "ignoreExternalsOption").text = "true" ElementTree.SubElement(scm, "excludedRegions") ElementTree.SubElement(scm, "includedRegions") ElementTree.SubElement(scm, "excludedUsers") ElementTree.SubElement(scm, "excludedRevprop") ElementTree.SubElement(scm, "excludedCommitMessages") ElementTree.SubElement(scm, "workspaceUpdater", attrib={"class":"hudson.scm.subversion.UpdateUpdater"}) ElementTree.SubElement(scm, "ignoreDirPropChanges").text = "false" ElementTree.SubElement(scm, "filterChangelog").text = "false" return scm def getDirectory(self): return self.__dir def isDeterministic(self): return str(self.__revision).isnumeric() def hasJenkinsPlugin(self): return True def callSubversion(self, workspacePath, *args): cmdLine = ['svn'] cmdLine.extend(args) cwd = os.path.join(workspacePath, self.__dir) try: output = subprocess.check_output(cmdLine, cwd=cwd, universal_newlines=True, stderr=subprocess.DEVNULL) except subprocess.CalledProcessError as e: raise BuildError("svn error:\n Directory: '{}'\n Command: '{}'\n'{}'".format( cwd, " ".join(cmdLine), e.output.rstrip())) except OSError as e: raise BuildError("Error calling svn: " + str(e)) return output.strip() def status(self, workspacePath): status = ScmStatus() try: output = self.callSubversion(workspacePath, 'status') if output: status.add(ScmTaint.modified, joinLines("> modified:", indent(output, ' '))) output = self.callSubversion(workspacePath, 'info', '--xml') info = ElementTree.fromstring(output) entry = info.find('entry') url = entry.find('url').text revision = entry.attrib['revision'] if self.__url != url: status.add(ScmTaint.switched, "> URL: configured: '{}', actual: '{}'".format(self.__url, url)) if self.__revision is not None and int(revision) != int(self.__revision): status.add(ScmTaint.switched, "> revision: configured: {}, actual: {}".format(self.__revision, revision)) except BuildError as e: status.add(ScmTaint.error, e.slogan) return status def getAuditSpec(self): return ("svn", self.__dir, {})
class FlatpakPackagesPlugin(_AbstractFlatpakPlugin): key = 'flatpak-packages' schema = [ str, { schema.Optional('package'): str, schema.Optional('type'): schema.Or('bundle', 'ref', 'app', 'runtime'), schema.Optional('target'): schema.Or('system', 'user'), schema.Optional('remote'): str } ] @staticmethod def _download_package(url: str, suffix: str = '.flatpakref') -> str: file = tempfile.NamedTemporaryFile('wb', prefix='download_', suffix=suffix, delete=False) with urllib.request.urlopen(url) as response, open(file.name, 'wb') as out_file: shutil.copyfileobj(response, out_file) return file.name @staticmethod def _get_flatpakref_application_name(filepath: str) -> str: with open(filepath) as file: text = file.read() for line in text.splitlines(): if line.startswith('Name='): return line.split('=')[1].strip() raise Exception('Error parsing flatpakref file: {}'.format(filepath)) def _check_is_application_installed(self, name: str) -> bool: output = self.run_command('flatpak', 'list') return output.find(name) != -1 def _install_flatpak_package(self, app: str, remote: str = None, type_: str = None, target: str = 'system'): # Flatpak considers it an error to install already # installed applications # Workaround by using "--reinstall" which uninstalls # the application first if it is already installed. # FIXME: At least with .flatpak bundles, the "--reinstall" # option doesn't seem to fix the issue currently. cmd = ['flatpak', 'install', '--reinstall', '-y'] if target == 'user': cmd += ['--user'] else: cmd += ['--system'] if type_ is not None: if type_ == 'ref': cmd += ['--from'] elif type_ == 'bundle': cmd += ['--bundle'] elif type_ == 'runtime': cmd += ['--runtime'] elif type_ == 'app': cmd += ['--app'] if remote is not None: cmd += [remote] cmd += [app] if target == 'system': self.run_command_sudo(*cmd) else: self.run_command(*cmd) def perform(self): # Install flatpak if not already installed if not self._check_is_flatpak_installed(): self._install_flatpak() assert self._check_is_flatpak_installed() for flatpak in self.config: target = 'system' type_ = None remote = None if isinstance(flatpak, dict): package = flatpak['package'] if 'target' in flatpak: target = flatpak['target'] if 'type' in flatpak: type_ = flatpak['type'] if 'remote' in flatpak: remote = flatpak['remote'] else: package = flatpak # Determine type based on package argument if type_ is None: if package[package.rfind('.') + 1:] == 'flatpakref': type_ = 'ref' elif package[package.rfind('.') + 1:] == 'flatpak': type_ = 'bundle' else: type_ = 'app' # Download remote bundles or refs # This is required for bundles because it is not currently supported # by Flatpak to download remote .flatpak bundles. We also download # remote .flatpakref files to check whether the application is already # installed before attempting an installation. if type_ in ('ref', 'bundle'): is_remote_package = bool(urllib.parse.urlparse(package).scheme) if is_remote_package: package = self._download_package( package, suffix='.flatpakref' if type_ == 'ref' else '.flatpak') else: # Consider the package to be a local file, # therefore expand the path: package = self._expand_path(package) # In case of .flatpakref files, we will only perform # the installation if the application is not already installed if type_ == 'ref': # Check if the .flatpakref is remote app_name = self._get_flatpakref_application_name(package) if self._check_is_application_installed(app_name): continue # NOTE: Doesn't check whether the application is # already installed or not. Will perform a reinstall # if the application is already installed. self._install_flatpak_package(package, remote, type_, target)
class GitAudit(ScmAudit): SCHEMA = schema.Schema({ 'type': 'git', 'dir': str, 'remotes': { schema.Optional(str): str }, 'commit': str, 'description': str, 'dirty': bool, schema.Optional('submodules'): schema.Or(bool, [str]), schema.Optional('recurseSubmodules'): bool, }) async def _scanDir(self, workspace, dir, extra): self.__dir = dir self.__submodules = extra.get('submodules', False) self.__recurseSubmodules = extra.get('recurseSubmodules', False) dir = os.path.join(workspace, dir) try: remotes = (await check_output(["git", "remote", "-v"], cwd=dir, universal_newlines=True)).split("\n") remotes = (r[:-8].split("\t") for r in remotes if r.endswith("(fetch)")) self.__remotes = {remote: url for (remote, url) in remotes} self.__commit = (await check_output(["git", "rev-parse", "HEAD"], cwd=dir, universal_newlines=True)).strip() self.__description = (await check_output( ["git", "describe", "--always", "--dirty=-dirty"], cwd=dir, universal_newlines=True)).strip() subDirty = await self.__scanSubmodules(dir, self.__submodules) self.__dirty = subDirty or self.__description.endswith("-dirty") except subprocess.CalledProcessError as e: raise BuildError("Git audit failed: " + str(e)) except OSError as e: raise BuildError("Error calling git: " + str(e)) async def __scanSubmodules(self, dir, shouldExist, base="."): if not os.path.exists(os.path.join(dir, base, ".gitmodules")): return False # List all paths as per .gitmodules. This gives us the list of all # known submodules. try: allPaths = await check_output([ "git", "-C", base, "config", "-f", ".gitmodules", "-z", "--get-regexp", "path" ], cwd=dir, universal_newlines=True) except subprocess.CalledProcessError: allPaths = "" # No key found in file. Probably empty allPaths = [p.split("\n")[1] for p in allPaths.split("\0") if p] if not allPaths: return False # Fetch the respecive commits as per git ls-tree allPaths = await check_output( ["git", "-C", base, "ls-tree", "-z", "HEAD"] + allPaths, cwd=dir, universal_newlines=True) allPaths = { path: attribs.split(' ')[2] for attribs, path in (p.split('\t') for p in allPaths.split('\0') if p) if attribs.split(' ')[1] == "commit" } # Normalize subset of submodules if isinstance(shouldExist, list): shouldExist = set(normPath(p) for p in shouldExist) elif shouldExist: shouldExist = set(normPath(p) for p in allPaths.keys()) else: shouldExist = set() # Check each submodule for their commit and modifications. # Unconditionally recurse to even see if something is there even tough # it shouldn't. Bail out on first modification. for path, commit in sorted(allPaths.items()): subPath = os.path.join(base, path) subShouldExist = normPath(path) in shouldExist if not os.path.exists(os.path.join(dir, subPath, ".git")): if subShouldExist: return True # submodule is missing elif not dirIsEmpty(os.path.join(dir, subPath)): return True # something in submodule which should not be there else: continue elif not subShouldExist: # submodule checked out even though it shouldn't return True realCommit = (await check_output( ["git", "-C", subPath, "rev-parse", "HEAD"], cwd=dir, universal_newlines=True)).strip() if commit != realCommit: return True # different commit checked out proc = await run( ["git", "-C", subPath, "diff-index", "--quiet", "HEAD", "--"], cwd=dir) if proc.returncode != 0: return True # dirty if await self.__scanSubmodules(dir, self.__recurseSubmodules, subPath): return True # sub-submodule modified return False def _load(self, data): self.__dir = data["dir"] self.__remotes = data["remotes"] self.__commit = data["commit"] self.__description = data["description"] self.__dirty = data["dirty"] self.__submodules = data.get("submodules", False) self.__recurseSubmodules = data.get("recurseSubmodules", False) def dump(self): ret = { "type": "git", "dir": self.__dir, "remotes": self.__remotes, "commit": self.__commit, "description": self.__description, "dirty": self.__dirty, } if self.__submodules: ret["submodules"] = self.__submodules if self.__recurseSubmodules: ret["recurseSubmodules"] = True return ret def getStatusLine(self): return self.__description
class Stage(object): STAGE_FILE = 'Dvcfile' STAGE_FILE_SUFFIX = '.dvc' PARAM_CMD = 'cmd' PARAM_DEPS = 'deps' PARAM_OUTS = 'outs' SCHEMA = { schema.Optional(PARAM_CMD): schema.Or(str, None), schema.Optional(PARAM_DEPS): schema.Or(schema.And(list, schema.Schema([Dependency.SCHEMA])), None), schema.Optional(PARAM_OUTS): schema.Or(schema.And(list, schema.Schema([Output.SCHEMA])), None), } def __init__(self, project, path=None, cmd=None, cwd=None, deps=[], outs=[]): self.project = project self.path = path self.cmd = cmd self.cwd = cwd self.outs = outs self.deps = deps @property def relpath(self): return os.path.relpath(self.path) @property def is_data_source(self): return self.cmd is None @staticmethod def is_stage_file(path): if not os.path.isfile(path): return False if not path.endswith(Stage.STAGE_FILE_SUFFIX) and os.path.basename( path) != Stage.STAGE_FILE: return False return True def changed(self): ret = False for entry in itertools.chain(self.outs, self.deps): if entry.changed(): ret = True if ret: self.project.logger.debug(u'Dvc file \'{}\' changed'.format( self.relpath)) else: self.project.logger.debug(u'Dvc file \'{}\' didn\'t change'.format( self.relpath)) return ret def remove_outs(self): for out in self.outs: out.remove() if out.use_cache: self.project.scm.ignore_remove(out.path) def remove(self): self.remove_outs() os.unlink(self.path) def reproduce(self, force=False): if not self.changed() and not force: return None if self.cmd: # Removing outputs only if we actually have command to reproduce self.remove_outs() self.run() return self @staticmethod def validate(d): try: schema.Schema(Stage.SCHEMA).validate(d) except schema.SchemaError as exc: Logger.debug(str(exc)) raise StageFileFormatError() @staticmethod def loadd(project, d, path): Stage.validate(d) path = os.path.abspath(path) cwd = os.path.dirname(path) cmd = d.get(Stage.PARAM_CMD, None) deps = Dependency.loadd_from(project, d.get(Stage.PARAM_DEPS, []), cwd=cwd) outs = Output.loadd_from(project, d.get(Stage.PARAM_OUTS, []), cwd=cwd) return Stage(project=project, path=path, cmd=cmd, cwd=cwd, deps=deps, outs=outs) @staticmethod def loads(project=None, cmd=None, deps=[], outs=[], outs_no_cache=[], fname=None, cwd=os.curdir): cwd = os.path.abspath(cwd) path = os.path.join(cwd, fname) outputs = Output.loads_from(project, outs, use_cache=True, cwd=cwd) outputs += Output.loads_from(project, outs_no_cache, use_cache=False, cwd=cwd) dependencies = Dependency.loads_from(project, deps, cwd=cwd) return Stage(project=project, path=path, cmd=cmd, cwd=cwd, outs=outputs, deps=dependencies) @staticmethod def load(project, fname): with open(fname, 'r') as fd: return Stage.loadd(project, yaml.safe_load(fd), fname) def dumpd(self): deps = [x.dumpd(self.cwd) for x in self.deps] outs = [x.dumpd(self.cwd) for x in self.outs] ret = {} if self.cmd != None: ret[Stage.PARAM_CMD] = self.cmd if len(deps): ret[Stage.PARAM_DEPS] = deps if len(outs): ret[Stage.PARAM_OUTS] = outs return ret def dump(self, fname=None): if not fname: fname = self.path with open(fname, 'w') as fd: yaml.safe_dump(self.dumpd(), fd, default_flow_style=False) def save(self): for dep in self.deps: dep.save() for out in self.outs: out.save() if out.use_cache: self.project.scm.ignore(out.path) def run(self): if not self.is_data_source: self.project.logger.info(u'Reproducing \'{}\':\n\t{}'.format( self.relpath, self.cmd)) p = subprocess.Popen(self.cmd, cwd=self.cwd, shell=True) p.communicate() if p.returncode != 0: raise StageCmdFailedError(self) self.save() self.project.logger.debug(u'\'{}\' was reproduced'.format( self.relpath)) else: self.project.logger.info( u'Verifying data sources in \'{}\''.format(self.relpath)) self.check_missing_outputs() self.save() def check_missing_outputs(self): missing_outs = [ out.rel_path for out in self.outs if not os.path.exists(out.rel_path) ] if missing_outs: raise MissingDataSource(missing_outs) def checkout(self): for out in self.outs: out.checkout() def _status(self, entries, name): ret = {} for entry in entries: ret.update(entry.status()) if ret: return {name: ret} return {} def status(self): ret = {} ret.update(self._status(self.deps, 'deps')) ret.update(self._status(self.outs, 'outs')) if ret: return {self.relpath: ret} return {}
async def track(self): """ Do the polling for finished jobs. """ while True: # no lock, atomic, copy() because keys() returns an iterable view # instead of a fresh new list in python3 running_jobs = self.running_jobs.copy().keys() # somewhat inefficient for the potential number of requests per # poll round but more efficient in that it does not download an # ever growing list of jobs using tasks/list. Might also take # quite a while to get to all jobs if retries happen on each # request. # A call to get data about multiple tasks in one go would be nice # here. tasks/list could be used with the minimum job number as # offset and spread between highest and lowest job id as limit *if* # its output was sorted by job ID. Apparently limit and offset are # only meant to iterate over the job list in blocks but not to # return data about a specific range of job IDs from that list. for job_id in running_jobs: # report is an extended version of job status, so we can # optimise the number of requests here request_url = urllib.parse.urljoin( self.url, f'/api/job/{job_id}/report') try: async for attempt in self.retrier: with attempt: async with self.session.get( request_url) as response: json_resp = await response.json() cortexjob = schema.Schema( { 'status': str, # only to make sure the key is there, # is status string while not finished, # dict afterwards 'report': schema.Or( {}, str, ignore_extra_keys=True), }, ignore_extra_keys=True).validate(json_resp) except (ValueError, schema.SchemaError) as error: logger.error('Invalid JSON in job status: %s', error) continue except tenacity.RetryError as error: logger.error('Querying Cortex job status failed: %s', error) continue job_status = cortexjob['status'] if job_status in ['Success']: # pass original report element from json response for # validation and storage await self.resubmit_with_analyzer_report( job_id, json_resp['report']) continue if job_status in ['Failure', 'Deleted']: logger.warning( "Dropping job %s because it has failed " "in Cortex", job_id) await self.resubmit_as_failed(job_id) continue # drop jobs which have been running for too long. This is # mainly to prevent accumulation of jobs in our job list which # will never finish. We still want to wait for jobs to finish # even though our client might not be interested any more so # that we have the result cached for the next time we get the # same sample. await self.resubmit_as_failed_if_too_old( job_id, self.max_job_age) await asyncio.sleep(self.poll_interval) logger.debug("Cortex job tracker shut down.")
Sync the docker daemon clock up to current clock. ''' with self.logger.info('sync_clock'): cmd = 'date +%s -s @' + str(int(time.time())) self.executor.docker.run( CENTOS, cmd, rm=None, privileged=None ).interactive() _EMPTY = attr.make_class('Empty', [])() # represents a value that should be dropped from the YAML _DEPENDS_ON_ALL = '*' # special value for depends_on for connecting to everything else # (mainly for jupyter notebook or similar debugging tool) SK_CENTRAL_REMOTE = 'USER@GITLAB_HOSTNAME:REPO' _ENV_SCHEMA = schema.Or( [schema.Regex('[_a-zA-Z][_a-zA-Z0-9]*(=[^=]+)?')], {schema.Regex('[_a-zA-Z][_a-zA-Z0-9]*'): schema.Or(str, None)}) _PORT_SCHEMA = schema.Regex(r'\d+(\:\d+)?') ''' Environment variables in docker-compose: https://docs.docker.com/compose/compose-file/#/environment environment: RACK_ENV: development SHOW: 'true' SESSION_SECRET: environment: - RACK_ENV=development
if min_count == max_count: raise ValueError( f'Expecting a sequence of {min_count} values ({type_})' ) raise ValueError( f'Expecting a sequence betweeen {min_count} and {max_count} values' f' ({type_})' ) return validate common_schema = { Optional('tags'): {schema.Or(*tags.get_valid_tags())}, } _default_bitmask_style = dict(shape='rectangle', on_color='green', off_color='gray') schema_by_category = { 'command': schema.Schema({ 'variety': schema.Or(*varieties_by_category['command']), Optional('value', default=1): schema.Or(float, int, str), Optional('enum_strings'): [str], Optional('enum_dict'): dict, **common_schema }),
class FileInfoAnalyzerReport(CortexAnalyzerReport): """ Represents a Cortex FileInfo_8_0 analysis JSON report. """ report_schema = schema.Schema({ "summary": { "taxonomies": [ schema.Schema( { "level": schema.Or("info", "malicious", "safe"), "namespace": "FileInfo", # "predicate": str, # "value": str }, ignore_extra_keys=True) ] }, "full": { "results": [{ "submodule_name": "Basic properties", "results": [ { "submodule_section_header": "Hashes", "submodule_section_content": { "md5": schema.Regex(r'^[0-9a-z]{32}$'), "sha1": schema.Regex(r'^[0-9a-z]{40}$'), "sha256": schema.Regex(r'^[0-9a-z]{64}$'), "ssdeep": schema.Regex(r'^[0-9A-Za-z:+/]*$'), } }, { # We consume further structures submodule_sections and # explicitly check the submodule_section_header to not # be "Hashes" or it will accept "Hashes"-structures with # malfarmed hashes. "submodule_section_header": schema.And(str, lambda s: s != "Hashes"), "submodule_section_content": schema.Schema({}, ignore_extra_keys=True) }, ], "summary": { "taxonomies": [ schema.Schema( { "level": schema.Or("info", "malicious", "safe"), "namespace": "FileInfo", # "predicate": str, # "value": str }, ignore_extra_keys=True) ] } }] }, "success": bool, "artifacts": CortexAnalyzerReport.report_schema_artifacts, "operations": [] }) def __init__(self, unvalidated_report=None): """ @param report: hash with report data from Cortex FileInfo Analyzer """ super().__init__(unvalidated_report) basic_properties = self.get_element_from_list_of_dicts( self.report.get('full', []).get('results', {}), 'submodule_name', 'Basic properties').get('results', []) self._hashes = self.get_element_from_list_of_dicts( basic_properties, 'submodule_section_header', 'Hashes').get('submodule_section_content', {}) @property def sha256sum(self): """ Return the sha256 sum. """ return self._hashes.get('sha256') @property def md5sum(self): """ Return the md5 sum. """ return self._hashes.get('md5') @property def ssdeepsum(self): """ Return the ssdeep sum. """ # TODO: think about if we want to compare ssdeep hashes return self._hashes.get('ssdeep')
class CaseTransformation(BaseTransformation): schema = schema.Schema({ "field": str, "cond": [ schema.Or( { "case": str, "operator": str, "value": object, "result": object }, { "case": str, "result": object }, only_one=False, ) ], }) @staticmethod def sort_by_case(condition): try: cond = condition["case"].lower() if cond == "when": return 0 elif cond == "else": return 1 else: return 2 except: return 3 def apply(self, row: Dict, **kwargs): field_value = row.get(self.args["field"], None) conditions = self.args["cond"] conditions.sort(key=self.sort_by_case, reverse=False) for cond in conditions: l_cond = cond["case"].lower() if l_cond == "when": operator = COMPARISON_OPERATORS_MAPPING.get( cond["operator"], None) if operator is not None: field_value = self.cast_field(field_value) cond_value = self.cast_field(cond["value"]) if field_value is None and cond_value is None: if cond["operator"] == "=": return cond["result"] else: continue if field_value is None and cond_value is not None: continue if field_value is not None and cond_value is None: continue try: if operator(field_value, cond_value): return cond["result"] except Exception as e: continue elif l_cond == "else": return cond["result"]
def default_if_none(sub_schema, default_factory): 'Coerce Nones to a default value.' return schema.Or( schema.And(None, schema.Use(lambda a: default_factory())), sub_schema)
def none_or(sub_schema): 'allow None or sub_schema' return schema.Or(None, sub_schema)
class DownloadInfo: #pylint: disable=too-few-public-methods """Representation of an downloads.ini file for downloading files""" _hashes = ('md5', 'sha1', 'sha256', 'sha512') hash_url_delimiter = '|' _nonempty_keys = ('url', 'download_filename') _optional_keys = ( 'version', 'strip_leading_dirs', ) _passthrough_properties = (*_nonempty_keys, *_optional_keys, 'extractor', 'output_path') _ini_vars = { '_chromium_version': get_chromium_version(), } @staticmethod def _is_hash_url(value): return value.count( DownloadInfo.hash_url_delimiter) == 2 and value.split( DownloadInfo.hash_url_delimiter)[0] in iter(HashesURLEnum) _schema = schema.Schema({ schema.Optional(schema.And(str, len)): { **{x: schema.And(str, len) for x in _nonempty_keys}, 'output_path': (lambda x: str(Path(x).relative_to(''))), **{ schema.Optional(x): schema.And(str, len) for x in _optional_keys }, schema.Optional('extractor'): schema.Or(ExtractorEnum.TAR, ExtractorEnum.SEVENZIP, ExtractorEnum.WINRAR), schema.Optional(schema.Or(*_hashes)): schema.And(str, len), schema.Optional('hash_url'): lambda x: DownloadInfo._is_hash_url(x), #pylint: disable=unnecessary-lambda } }) class _DownloadsProperties: #pylint: disable=too-few-public-methods def __init__(self, section_dict, passthrough_properties, hashes): self._section_dict = section_dict self._passthrough_properties = passthrough_properties self._hashes = hashes def has_hash_url(self): """ Returns a boolean indicating whether the current download has a hash URL""" return 'hash_url' in self._section_dict def __getattr__(self, name): if name in self._passthrough_properties: return self._section_dict.get(name, fallback=None) if name == 'hashes': hashes_dict = dict() for hash_name in (*self._hashes, 'hash_url'): value = self._section_dict.get(hash_name, fallback=None) if value: if hash_name == 'hash_url': value = value.split( DownloadInfo.hash_url_delimiter) hashes_dict[hash_name] = value return hashes_dict raise AttributeError('"{}" has no attribute "{}"'.format( type(self).__name__, name)) def _parse_data(self, path): """ Parses an INI file located at path Raises schema.SchemaError if validation fails """ def _section_generator(data): for section in data: if section == configparser.DEFAULTSECT: continue yield section, dict( filter(lambda x: x[0] not in self._ini_vars, data.items(section))) new_data = configparser.ConfigParser(defaults=self._ini_vars) with path.open(encoding=ENCODING) as ini_file: new_data.read_file(ini_file, source=str(path)) try: self._schema.validate(dict(_section_generator(new_data))) except schema.SchemaError as exc: get_logger().error( 'downloads.ini failed schema validation (located in %s)', path) raise exc return new_data def __init__(self, ini_paths): """Reads an iterable of pathlib.Path to download.ini files""" self._data = configparser.ConfigParser() for path in ini_paths: self._data.read_dict(self._parse_data(path)) def __getitem__(self, section): """ Returns an object with keys as attributes and values already pre-processed strings """ return self._DownloadsProperties(self._data[section], self._passthrough_properties, self._hashes) def __contains__(self, item): """ Returns True if item is a name of a section; False otherwise. """ return self._data.has_section(item) def __iter__(self): """Returns an iterator over the section names""" return iter(self._data.sections()) def properties_iter(self): """Iterator for the download properties sorted by output path""" return sorted(map(lambda x: (x, self[x]), self), key=(lambda x: str(Path(x[1].output_path))))
} SCHEMA = { OutputBase.PARAM_PATH: str, # NOTE: currently there are only 3 possible checksum names: # # 1) md5 (LOCAL, SSH, GS); # 2) etag (S3); # 3) checksum (HDFS); # # so when a few types of outputs share the same name, we only need # specify it once. schema.Optional(RemoteLOCAL.PARAM_CHECKSUM): schema.Or(str, None), schema.Optional(RemoteS3.PARAM_CHECKSUM): schema.Or(str, None), schema.Optional(RemoteHDFS.PARAM_CHECKSUM): schema.Or(str, None), schema.Optional(OutputBase.PARAM_CACHE): bool, schema.Optional(OutputBase.PARAM_METRIC): OutputBase.METRIC_SCHEMA, } def _get(stage, p, info, cache, metric): parsed = urlparse(p) if parsed.scheme == 'remote': name = Config.SECTION_REMOTE_FMT.format(parsed.netloc)
class GitScm(Scm): SCHEMA = schema.Schema({ 'scm': 'git', 'url': str, schema.Optional('dir'): str, schema.Optional('if'): schema.Or(str, IfExpression), schema.Optional('branch'): str, schema.Optional('tag'): str, schema.Optional('commit'): str, schema.Optional('rev'): str, schema.Optional(schema.Regex('^remote-.*')): str, schema.Optional('sslVerify'): bool, schema.Optional('singleBranch'): bool, schema.Optional('shallow'): schema.Or(int, str), schema.Optional('submodules'): schema.Or(bool, [str]), schema.Optional('recurseSubmodules'): bool, schema.Optional('shallowSubmodules'): bool, }) REMOTE_PREFIX = "remote-" def __init__(self, spec, overrides=[], secureSSL=None, stripUser=None): super().__init__(spec, overrides) self.__url = spec["url"] self.__branch = None self.__tag = None self.__commit = None self.__remotes = {} if "rev" in spec: rev = spec["rev"] if rev.startswith("refs/heads/"): self.__branch = rev[11:] elif rev.startswith("refs/tags/"): self.__tag = rev[10:] elif len(rev) == 40: self.__commit = rev else: raise ParseError("Invalid rev format: " + rev) self.__branch = spec.get("branch", self.__branch) self.__tag = spec.get("tag", self.__tag) self.__commit = spec.get("commit", self.__commit) if self.__commit: # validate commit if re.match("^[0-9a-f]{40}$", self.__commit) is None: raise ParseError("Invalid commit id: " + str(self.__commit)) elif not self.__branch and not self.__tag: # nothing secified at all -> master branch self.__branch = "master" self.__dir = spec.get("dir", ".") # convert remotes into separate dictionary for key, val in spec.items(): if key.startswith(GitScm.REMOTE_PREFIX): stripped_key = key[len(GitScm.REMOTE_PREFIX):] # remove prefix if stripped_key == "origin": raise ParseError("Invalid remote name: " + stripped_key) self.__remotes.update({stripped_key: val}) self.__sslVerify = spec.get('sslVerify', secureSSL) self.__singleBranch = spec.get('singleBranch') self.__shallow = spec.get('shallow') self.__submodules = spec.get('submodules', False) self.__recurseSubmodules = spec.get('recurseSubmodules', False) self.__shallowSubmodules = spec.get('shallowSubmodules', True) self.__stripUser = stripUser def getProperties(self, isJenkins): properties = super().getProperties(isJenkins) properties.update({ 'scm': 'git', 'url': self.__url, 'branch': self.__branch, 'tag': self.__tag, 'commit': self.__commit, 'dir': self.__dir, 'rev': (self.__commit if self.__commit else (("refs/tags/" + self.__tag) if self.__tag else ("refs/heads/" + self.__branch))), 'sslVerify': self.__sslVerify, 'singleBranch': self.__singleBranch, 'shallow': self.__shallow, 'submodules': self.__submodules, 'recurseSubmodules': self.__recurseSubmodules, 'shallowSubmodules': self.__shallowSubmodules, }) for key, val in self.__remotes.items(): properties.update({GitScm.REMOTE_PREFIX + key: val}) return properties async def invoke(self, invoker, switch=False): # make sure the git directory exists if not os.path.isdir(invoker.joinPath(self.__dir, ".git")): await invoker.checkCommand(["git", "init", self.__dir]) # Shallow implies singleBranch if self.__singleBranch is None: singleBranch = self.__shallow is not None else: singleBranch = self.__singleBranch singleBranch = singleBranch and (self.__branch is not None) # setup and update remotes remotes = {"origin": self.__url} remotes.update(self.__remotes) existingRemotes = await invoker.checkOutputCommand(["git", "remote"], cwd=self.__dir) for remote in existingRemotes.split("\n"): if remote in remotes: cfgUrl = remotes[remote] realUrl = await invoker.checkOutputCommand( ["git", "ls-remote", "--get-url", remote], cwd=self.__dir) if cfgUrl != realUrl: await invoker.checkCommand( ["git", "remote", "set-url", remote, cfgUrl], cwd=self.__dir) del remotes[remote] # add remaining (new) remotes for remote, url in remotes.items(): addCmd = ["git", "remote", "add", remote, url] if singleBranch: addCmd += ["-t", self.__branch] await invoker.checkCommand(addCmd, cwd=self.__dir) # relax security if requested if not self.__sslVerify: await invoker.checkCommand( ["git", "config", "http.sslVerify", "false"], cwd=self.__dir) # Calculate refspec that is used internally. For the user a regular # refspec is kept in the git config. # Base fetch command with shallow support fetchCmd = ["git", "-c", "submodule.recurse=0", "fetch", "-p"] if isinstance(self.__shallow, int): fetchCmd.append("--depth={}".format(self.__shallow)) elif isinstance(self.__shallow, str): fetchCmd.append("--shallow-since={}".format(self.__shallow)) fetchCmd.append("origin") # Calculate appropriate refspec (all/singleBranch/tag) if singleBranch: fetchCmd += [ "+refs/heads/{0}:refs/remotes/origin/{0}".format(self.__branch) ] else: fetchCmd += ["+refs/heads/*:refs/remotes/origin/*"] if self.__tag: fetchCmd.append("refs/tags/{0}:refs/tags/{0}".format(self.__tag)) # do the checkout if self.__tag or self.__commit: await self.__checkoutTag(invoker, fetchCmd, switch) else: await self.__checkoutBranch(invoker, fetchCmd, switch) async def __checkoutTag(self, invoker, fetchCmd, switch): # checkout only if HEAD is invalid head = await invoker.callCommand( ["git", "rev-parse", "--verify", "-q", "HEAD"], stdout=False, cwd=self.__dir) if head or switch: await invoker.checkCommand(fetchCmd, cwd=self.__dir) await invoker.checkCommand([ "git", "checkout", "-q", "--no-recurse-submodules", self.__commit if self.__commit else "tags/" + self.__tag ], cwd=self.__dir) # FIXME: will not be called again if interrupted! await self.__checkoutSubmodules(invoker) async def __checkoutBranch(self, invoker, fetchCmd, switch): await invoker.checkCommand(fetchCmd, cwd=self.__dir) if await invoker.callCommand( ["git", "rev-parse", "--verify", "-q", "HEAD"], stdout=False, cwd=self.__dir): # checkout only if HEAD is invalid await invoker.checkCommand([ "git", "checkout", "--no-recurse-submodules", "-b", self.__branch, "remotes/origin/" + self.__branch ], cwd=self.__dir) await self.__checkoutSubmodules(invoker) elif switch: # We're switching the ref. There we will actively change the branch which # is normally forbidden. assert not self.__submodules if await invoker.callCommand([ "git", "show-ref", "-q", "--verify", "refs/heads/" + self.__branch ]): # Branch does not exist. Create and checkout. await invoker.checkCommand([ "git", "checkout", "--no-recurse-submodules", "-b", self.__branch, "remotes/origin/" + self.__branch ], cwd=self.__dir) else: # Branch exists already. Checkout and fast forward... await invoker.checkCommand([ "git", "checkout", "--no-recurse-submodules", self.__branch ], cwd=self.__dir) await invoker.checkCommand([ "git", "-c", "submodule.recurse=0", "merge", "--ff-only", "refs/remotes/origin/" + self.__branch ], cwd=self.__dir) elif (await invoker.checkOutputCommand( ["git", "rev-parse", "--abbrev-ref", "HEAD"], cwd=self.__dir)) == self.__branch: # pull only if on original branch preUpdate = await self.__updateSubmodulesPre(invoker) await invoker.checkCommand([ "git", "-c", "submodule.recurse=0", "merge", "--ff-only", "refs/remotes/origin/" + self.__branch ], cwd=self.__dir) await self.__updateSubmodulesPost(invoker, preUpdate) else: invoker.warn("Not updating", self.__dir, "because branch was changed manually...") async def __checkoutSubmodules(self, invoker): if not self.__submodules: return args = ["git", "submodule", "update", "--init"] if self.__shallowSubmodules: args += ["--depth", "1"] if self.__recurseSubmodules: args += ["--recursive"] if isinstance(self.__submodules, list): args.append("--") args.extend(self.__submodules) await invoker.checkCommand(args, cwd=self.__dir) async def __updateSubmodulesPre(self, invoker, base="."): """Query the status of the currently checked out submodules. Returns a map with the paths of all checked out submodules as keys. The value will be True if the submodule looks untouched by the user and is deemed to be updateable. If the value is False the submodule is different from the expected vanilla checkout state. The list may only be a sub-set of all known submodules. """ if not self.__submodules: return {} # List all active and checked out submodules. This way we know the # state of all submodules and compare them later to the expected state. args = [ "git", "-C", base, "submodule", "-q", "foreach", "printf '%s\\t%s\\n' \"$sm_path\" \"$(git rev-parse HEAD)\"" ] checkedOut = await invoker.checkOutputCommand(args, cwd=self.__dir) checkedOut = { path: commit for path, commit in (line.split("\t") for line in checkedOut.split("\n") if line) } if not checkedOut: return {} # List commits from git tree of all paths for checked out submodules. # This is what should be checked out. args = ["git", "-C", base, "ls-tree", "-z", "HEAD"] + sorted( checkedOut.keys()) allPaths = await invoker.checkOutputCommand(args, cwd=self.__dir) allPaths = { normPath(path): attribs.split(' ')[2] for attribs, path in (p.split('\t') for p in allPaths.split('\0') if p) if attribs.split(' ')[1] == "commit" } # Calculate which paths are in the right state. They must match the # commit and must be in detached HEAD state. ret = {} for path, commit in checkedOut.items(): path = normPath(path) if allPaths.get(path) != commit: ret[path] = False continue code = await invoker.callCommand( ["git", "symbolic-ref", "-q", "HEAD"], cwd=os.path.join(self.__dir, base, path)) if code == 0: ret[path] = False continue ret[path] = True return ret async def __updateSubmodulesPost(self, invoker, oldState, base="."): """Update all submodules that are safe. Will update all submodules that are either new or have not been touched by the user. This will be done recursively if that is enabled. """ if not self.__submodules: return {} if not os.path.exists(invoker.joinPath(self.__dir, base, ".gitmodules")): return {} # Sync remote URLs into our config in case they were changed args = ["git", "-C", base, "submodule", "sync"] await invoker.checkCommand(args, cwd=self.__dir) # List all paths as per .gitmodules. This gives us the list of all # known submodules. Optionally restrict to user specified subset. args = [ "git", "-C", base, "config", "-f", ".gitmodules", "-z", "--get-regexp", "path" ] finishedProc = await invoker.runCommand(args, cwd=self.__dir, stdout=True) allPaths = finishedProc.stdout.rstrip( ) if finishedProc.returncode == 0 else "" allPaths = [p.split("\n")[1] for p in allPaths.split("\0") if p] if isinstance(self.__submodules, list): subset = set(normPath(p) for p in self.__submodules) allPaths = [p for p in allPaths if normPath(p) in subset] # Update only new or unmodified paths updatePaths = [p for p in allPaths if oldState.get(normPath(p), True)] for p in sorted(set(allPaths) - set(updatePaths)): invoker.warn("Not updating submodule", os.path.join(self.__dir, base, p), "because its HEAD has been switched...") if not updatePaths: return # If we recurse into sub-submodules get their potential state up-front if self.__recurseSubmodules: # Explicit loop because of Python 3.5: "'await' expressions in # comprehensions are not supported". subMods = {} for p in updatePaths: subMods[p] = await self.__updateSubmodulesPre( invoker, os.path.join(base, p)) # Do the update of safe submodules args = ["git", "-C", base, "submodule", "update", "--init"] if self.__shallowSubmodules: args += ["--depth", "1"] args.append("--") args += updatePaths await invoker.checkCommand(args, cwd=self.__dir) # Update sub-submodules if requested if self.__recurseSubmodules: for p in updatePaths: await self.__updateSubmodulesPost(invoker, subMods[p], os.path.join(base, p)) def canSwitch(self, oldSpec): diff = self._diffSpec(oldSpec) # Filter irrelevant properties diff -= {"sslVerify", 'singleBranch', 'shallow', 'shallowSubmodules'} diff = set(prop for prop in diff if not prop.startswith("remote-")) # Enabling "submodules" and/or "recurseSubmodules" is ok. The # additional content will be checked out in invoke(). if not oldSpec.get("submodules", False) and self.__submodules: diff.discard("submodules") if not oldSpec.get("recursiveSubmodules", False) and self.__recurseSubmodules: diff.discard("recursiveSubmodules") # Without submodules the recursiveSubmodules property is irrelevant if not self.__submodules: diff.discard("recursiveSubmodules") # For the rest we can try a inline switch. Git does not handle # vanishing submodules well and neither do we. So if submodules are # enabled then we do not do an in-place update. if not diff: return True if not diff.issubset({"branch", "tag", "commit", "rev", "url"}): return False if self.__submodules: return False return True async def switch(self, invoker, oldSpec): # Try to checkout new state in old workspace. If something fails the # old attic logic will take over. await self.invoke(invoker, True) return True def asDigestScript(self): """Return forward compatible stable string describing this git module. The format is "url rev-spec dir" where rev-spec depends on the given reference. """ if self.__stripUser: filt = removeUserFromUrl else: filt = lambda x: x if self.__commit: ret = self.__commit + " " + self.__dir elif self.__tag: ret = filt( self.__url) + " refs/tags/" + self.__tag + " " + self.__dir else: ret = filt( self.__url) + " refs/heads/" + self.__branch + " " + self.__dir if self.__submodules: ret += " submodules" if isinstance(self.__submodules, list): ret += "[{}]".format(",".join(self.__submodules)) if self.__recurseSubmodules: ret += " recursive" return ret def asJenkins(self, workPath, credentials, options): scm = ElementTree.Element("scm", attrib={ "class": "hudson.plugins.git.GitSCM", "plugin": "[email protected]", }) ElementTree.SubElement(scm, "configVersion").text = "2" userconfigs = ElementTree.SubElement( ElementTree.SubElement(scm, "userRemoteConfigs"), "hudson.plugins.git.UserRemoteConfig") url = ElementTree.SubElement(userconfigs, "url") url.text = self.__url if credentials: credentialsId = ElementTree.SubElement(userconfigs, "credentialsId") credentialsId.text = credentials branch = ElementTree.SubElement( ElementTree.SubElement(ElementTree.SubElement(scm, "branches"), "hudson.plugins.git.BranchSpec"), "name") if self.__commit: branch.text = self.__commit elif self.__tag: branch.text = "refs/tags/" + self.__tag else: branch.text = "refs/heads/" + self.__branch ElementTree.SubElement( scm, "doGenerateSubmoduleConfigurations").text = "false" ElementTree.SubElement(scm, "submoduleCfg", attrib={"class": "list"}) extensions = ElementTree.SubElement(scm, "extensions") ElementTree.SubElement( ElementTree.SubElement( extensions, "hudson.plugins.git.extensions.impl.RelativeTargetDirectory"), "relativeTargetDir").text = os.path.normpath( os.path.join(workPath, self.__dir)) # remove untracked files and stale branches ElementTree.SubElement( extensions, "hudson.plugins.git.extensions.impl.CleanCheckout") ElementTree.SubElement( extensions, "hudson.plugins.git.extensions.impl.PruneStaleBranch") # set git clone options if isinstance(self.__shallow, int): shallow = str(self.__shallow) else: shallow = options.get("scm.git.shallow") timeout = options.get("scm.git.timeout") if shallow is not None or timeout is not None: co = ElementTree.SubElement( extensions, "hudson.plugins.git.extensions.impl.CloneOption") if shallow is not None: try: shallow = int(shallow) if shallow < 0: raise ValueError() except ValueError: raise BuildError("Invalid 'git.shallow' option: " + str(shallow)) if shallow > 0: ElementTree.SubElement(co, "shallow").text = "true" ElementTree.SubElement(co, "noTags").text = "false" ElementTree.SubElement(co, "reference").text = "" ElementTree.SubElement(co, "depth").text = str(shallow) ElementTree.SubElement(co, "honorRefspec").text = "false" if timeout is not None: try: timeout = int(timeout) if timeout < 0: raise ValueError() except ValueError: raise BuildError("Invalid 'git.timeout' option: " + str(timeout)) if timeout > 0: ElementTree.SubElement(co, "timeout").text = str(timeout) if self.__submodules: assert isinstance(self.__submodules, bool) sub = ElementTree.SubElement( extensions, "hudson.plugins.git.extensions.impl.SubmoduleOption") if self.__recurseSubmodules: ElementTree.SubElement(sub, "recursiveSubmodules").text = "true" if self.__shallowSubmodules: ElementTree.SubElement(sub, "shallow").text = "true" ElementTree.SubElement(sub, "depth").text = "1" if timeout is not None: ElementTree.SubElement(sub, "timeout").text = str(timeout) if isTrue(options.get("scm.ignore-hooks", "0")): ElementTree.SubElement( extensions, "hudson.plugins.git.extensions.impl.IgnoreNotifyCommit") return scm def getDirectory(self): return self.__dir def isDeterministic(self): return bool(self.__tag) or bool(self.__commit) def hasJenkinsPlugin(self): # Cloning a subset of submodules is not supported by the Jenkins # git-plugin. Fall back to our implementation in this case. return not isinstance(self.__submodules, list) def callGit(self, workspacePath, *args, check=True): cmdLine = ['git'] cmdLine.extend(args) cwd = os.path.join(workspacePath, self.__dir) try: output = subprocess.check_output(cmdLine, cwd=cwd, universal_newlines=True, stderr=subprocess.DEVNULL) except subprocess.CalledProcessError as e: if check: raise BuildError( "git error:\n Directory: '{}'\n Command: '{}'\n'{}'". format(cwd, " ".join(cmdLine), e.output.rstrip())) else: return "" except OSError as e: raise BuildError("Error calling git: " + str(e)) return output.strip() def status(self, workspacePath): status = ScmStatus() try: onCorrectBranch = False onTag = False output = self.callGit(workspacePath, 'ls-remote', '--get-url') if output != self.__url: status.add( ScmTaint.switched, "> URL: configured: '{}', actual: '{}'".format( self.__url, output)) if self.__commit: output = self.callGit(workspacePath, 'rev-parse', 'HEAD') if output != self.__commit: status.add( ScmTaint.switched, "> commit: configured: '{}', actual: '{}'".format( self.__commit, output)) elif self.__tag: output = self.callGit(workspacePath, 'tag', '--points-at', 'HEAD').splitlines() if self.__tag not in output: actual = ("'" + ", ".join(output) + "'") if output else "not on any tag" status.add( ScmTaint.switched, "> tag: configured: '{}', actual: {}".format( self.__tag, actual)) # Need to check if the tag still exists. Otherwise the "git # log" command at the end will trip. try: self.callGit(workspacePath, 'rev-parse', 'tags/' + self.__tag) onTag = True except BuildError: pass elif self.__branch: output = self.callGit(workspacePath, 'rev-parse', '--abbrev-ref', 'HEAD') if output != self.__branch: status.add( ScmTaint.switched, "> branch: configured: '{}', actual: '{}'".format( self.__branch, output)) else: output = self.callGit( workspacePath, 'log', '--oneline', 'refs/remotes/origin/' + self.__branch + '..HEAD') if output: status.add( ScmTaint.unpushed_main, joinLines( "> unpushed commits on {}:".format( self.__branch), indent(output, ' '))) onCorrectBranch = True # Check for modifications wrt. checked out commit output = self.callGit(workspacePath, 'status', '--porcelain', '--ignore-submodules=all') if output: status.add(ScmTaint.modified, joinLines("> modified:", indent(output, ' '))) # The following shows all unpushed commits reachable by any ref # (local branches, stash, detached HEAD, etc). # Exclude HEAD if the configured branch is checked out to not # double-count them. Does not mark the SCM as dirty. Exclude the # configured tag too if it is checked out. Otherwise the tag would # count as unpushed if it is not on a remote branch. what = ['--all', '--not', '--remotes'] if onCorrectBranch: what.append('HEAD') if onTag: what.append("tags/" + self.__tag) output = self.callGit(workspacePath, 'log', '--oneline', '--decorate', *what) if output: status.add( ScmTaint.unpushed_local, joinLines("> unpushed local commits:", indent(output, ' '))) # Dive into submodules self.__statusSubmodule(workspacePath, status, self.__submodules) except BuildError as e: status.add(ScmTaint.error, e.slogan) return status def __statusSubmodule(self, workspacePath, status, shouldExist, base="."): """Get the status of submodules and possibly sub-submodules. The regular "git status" command is not sufficient for our case. In case the submodule is not initialized "git status" will completely ignore it. Using "git submodule status" would help but it's output is not ment to be parsed by tools. So we first get the list of all possible submodules with their tracked commit. Then the actual commit is compared and any further modifications and unpuched commits are checked. """ if not os.path.exists(os.path.join(workspacePath, base, ".gitmodules")): return # List all paths as per .gitmodules. This gives us the list of all # known submodules. allPaths = self.callGit(workspacePath, "-C", base, "config", "-f", ".gitmodules", "-z", "--get-regexp", "path", check=False) allPaths = [p.split("\n")[1] for p in allPaths.split("\0") if p] if not allPaths: return # Fetch the respecive commits as per git ls-tree allPaths = self.callGit(workspacePath, "-C", base, "ls-tree", "-z", "HEAD", *allPaths) allPaths = { path: attribs.split(' ')[2] for attribs, path in (p.split('\t') for p in allPaths.split('\0') if p) if attribs.split(' ')[1] == "commit" } # Normalize subset of submodules if isinstance(shouldExist, list): shouldExist = set(normPath(p) for p in shouldExist) elif shouldExist: shouldExist = set(normPath(p) for p in allPaths.keys()) else: shouldExist = set() # Check each submodule for their commit, modifications and unpushed # stuff. Unconditionally recurse to even see if something is there even # tough it shouldn't. for path, commit in sorted(allPaths.items()): subPath = os.path.join(base, path) subShouldExist = normPath(path) in shouldExist if not os.path.exists(os.path.join(workspacePath, subPath, ".git")): if subShouldExist: status.add(ScmTaint.modified, "> submodule not checked out: " + subPath) elif not dirIsEmpty(os.path.join(workspacePath, subPath)): status.add(ScmTaint.modified, "> ignored submodule not empty: " + subPath) continue elif not subShouldExist: status.add(ScmTaint.modified, "> submodule checked out: " + subPath) realCommit = self.callGit(workspacePath, "-C", subPath, "rev-parse", "HEAD") if commit != realCommit: status.add( ScmTaint.switched, "> submodule '{}' switched commit: configured: '{}', actual: '{}'" .format(subPath, commit, realCommit)) output = self.callGit(workspacePath, "-C", subPath, 'status', '--porcelain', '--ignore-submodules=all') if output: status.add( ScmTaint.modified, joinLines("> submodule '{}' modified:".format(subPath), indent(output, ' '))) output = self.callGit(workspacePath, "-C", subPath, 'log', '--oneline', '--decorate', '--all', '--not', '--remotes') if output: status.add( ScmTaint.unpushed_local, joinLines( "> submodule '{}' unpushed local commits:".format( subPath), indent(output, ' '))) self.__statusSubmodule(workspacePath, status, self.__recurseSubmodules, subPath) def getAuditSpec(self): extra = {} if self.__submodules: extra['submodules'] = self.__submodules if self.__recurseSubmodules: extra['recurseSubmodules'] = True return ("git", self.__dir, extra) def hasLiveBuildId(self): return True async def predictLiveBuildId(self, step): if self.__commit: return bytes.fromhex(self.__commit) with stepAction(step, "LS-REMOTE", self.__url, (INFO, TRACE)) as a: if self.__tag: # Annotated tags are objects themselves. We need the commit object! refs = [ "refs/tags/" + self.__tag + '^{}', "refs/tags/" + self.__tag ] else: refs = ["refs/heads/" + self.__branch] cmdLine = ['git', 'ls-remote', self.__url] + refs try: stdout = await check_output(cmdLine, stderr=subprocess.DEVNULL, universal_newlines=True) output = stdout.strip() except subprocess.CalledProcessError as e: a.fail("exit {}".format(e.returncode), WARNING) return None except OSError as e: a.fail("error ({})".format(e)) return None # have we found anything at all? if not output: a.fail("unknown", WARNING) return None # See if we got one of our intended refs. Git is generating lines with # the following format: # # <sha1>\t<refname> # # Put the output into a dict with the refname as key. Be extra careful # and strip out lines not matching this pattern. output = { commitAndRef[1].strip(): bytes.fromhex(commitAndRef[0].strip()) for commitAndRef in (line.split('\t') for line in output.split('\n')) if len(commitAndRef) == 2 } for ref in refs: if ref in output: return output[ref] # uhh, should not happen... a.fail("unknown", WARNING) return None def calcLiveBuildId(self, workspacePath): if self.__commit: return bytes.fromhex(self.__commit) else: output = self.callGit(workspacePath, 'rev-parse', 'HEAD').strip() return bytes.fromhex(output) def getLiveBuildIdSpec(self, workspacePath): if self.__commit: return "=" + self.__commit else: return "g" + os.path.join(workspacePath, self.__dir) @staticmethod def processLiveBuildIdSpec(dir): try: return subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=dir, universal_newlines=True).strip() except subprocess.CalledProcessError as e: raise BuildError("Git audit failed: " + str(e)) except OSError as e: raise BuildError("Error calling git: " + str(e))
schema.Optional("type", default="context-free"): "context-free", "span_encoding_dim": schema.Use(int), "word_embeddings_dim": schema.Use(int), "chars_embeddings_dim": schema.Use(int), "hidden_dim": schema.Use(int), schema.Optional("soft_dropout_rate", default=0.3): schema.Use(float), schema.Optional("hard_dropout_rate", default=0.6): schema.Use(float), schema.Optional("features", default=None): [schema.Or(raw_features_schema, categorical_features_schema)], schema.Optional("token_features", default=None): [schema.Or(raw_features_schema, categorical_features_schema)], schema.Optional("external_boundaries", default=False): schema.Use(bool), }) elmo_encoder_schema = schema.Schema({ "type": "elmo", "span_encoding_dim": schema.Use(int), "elmo_options_file": schema.Use(str), "elmo_weight_file": schema.Use(str),
class ImportScm(Scm): SCHEMA = schema.Schema({ 'scm': 'import', 'url': str, schema.Optional('dir'): str, schema.Optional('if'): schema.Or(str, IfExpression), schema.Optional('prune'): bool, }) def __init__(self, spec, overrides=[]): super().__init__(spec, overrides) self.__url = spec["url"] self.__dir = spec.get("dir", ".") self.__prune = spec.get("prune", False) self.__data = spec.get("__data") def getProperties(self): ret = super().getProperties() ret.update({ 'scm': 'import', 'url': self.__url, 'dir': self.__dir, 'prune': self.__prune, '__data': packTree(self.__url), }) return ret async def invoke(self, invoker): dest = invoker.joinPath(self.__dir) os.makedirs(dest, exist_ok=True) if self.__prune: emptyDirectory(dest) if self.__data is None: if not os.path.isdir(self.__url): invoker.fail("Cannot import '{}': not a directory!".format( self.__url)) copyTree(self.__url, dest, invoker) else: unpackTree(self.__data, dest) def asDigestScript(self): return self.__url def getDirectory(self): return self.__dir def isDeterministic(self): return False def hasLiveBuildId(self): return True async def predictLiveBuildId(self, step): with stepAction(step, "HASH", self.__url, (INFO, TRACE)) as a: return hashDirectory(self.__url) def calcLiveBuildId(self, workspacePath): return hashDirectory(os.path.join(workspacePath, self.__dir)) def getAuditSpec(self): return ("import", self.__dir, {"url": self.__url}) def getLiveBuildIdSpec(self, workspacePath): return "#" + os.path.join(workspacePath, self.__dir)
import logging import schema from sklearn.model_selection import ParameterSampler logger = logging.getLogger("sampling") SCHEMA_SAMPLING = schema.Schema( { "hdock_distr": schema.Or(schema.And(str, lambda s: get_distribution(s) is not None)), "hdock_samples": schema.And(int, lambda n: n >= 0), schema.Optional("hdock_seed", default=0): int, schema.Optional("hdock_distr_kwargs", default={}): dict, }, ignore_extra_keys=True, ) def sample_values(value): """ A random sampling search. Using the special keys for distributions. { "parameter": { "hdock_distr": <str> (From scipy.stats.distributions), "hdock_distr_kwargs": {
def check_api_schema(api): """ Checks the schema of the API and raises exceptions if something does not match. :param api: The API dictionary """ # Schema for checking we have a string in a Python 2 and 3 compatible way # Link schema class StringSchema(object): def validate(self, data): # Check the basic properties schema.Schema(schema.Or(*six.string_types)).validate(data) # No empty strings either if not data: raise schema.SchemaError("String is empty") return data string_schema = StringSchema() # Schema for checking the location location_schema = schema.Schema({ 'path': string_schema, schema.Optional('include'): string_schema, 'line-start': int, 'line-end': schema.Or(int, None) }) # Check that members's 'unique-name' is in the API class MemberInAPI(object): def __init__(self, api): self.api = api def validate(self, data): if data not in self.api: raise schema.SchemaError("%r not found in the API " "valid keys are %r" % (data, self.api.keys())) return data # Link schema class LinkSchema(object): def __init__(self, api): self.api = api def validate(self, data): # Check the basic properties schema.Schema({ 'url': bool, 'value': string_schema, }).validate(data) # For url we are done if data['url']: return data # Check that if non url we have the link in the API if data['value'] not in self.api: raise schema.SchemaError("Link value %r not found in the API " "valid keys are %r" % (data, self.api.keys())) return data # Paragraphs text schema paragraphs_text_schema = schema.Schema({ 'kind': 'text', 'content': string_schema, schema.Optional('link'): LinkSchema(api=api) }) # Paragraphs code schema paragraphs_code_schema = schema.Schema({ 'kind': 'code', 'content': string_schema, 'is_block': bool }) # Paragraphs list schema class ItemsParagraphs(object): def __init__(self): self.use_schema = None def validate(self, data): return self.use_schema.validate(data) # We define a validator object but defer the initilization of the schema to # use. The reason is the items kind is itself a list of paragraphs so we # have a recursive dependency. items_paragraphs = ItemsParagraphs() paragraphs_list_schema = schema.Schema({ 'kind': 'list', 'ordered': bool, 'items': [items_paragraphs] }) # Paragraphs schema paragraphs_schema = schema.Schema([ schema.Or(paragraphs_text_schema, paragraphs_code_schema, paragraphs_list_schema) ]) # Initilize the items schema which itself is a list of paragraphs items_paragraphs.use_schema = paragraphs_schema # type schema type_schema = schema.Schema([{ 'value': string_schema, schema.Optional('link'): LinkSchema(api=api) }]) # template parameter schema template_parameter_schema = schema.Schema([{ 'type': type_schema, 'name': string_schema, schema.Optional('default'): type_schema, schema.Optional('description'): paragraphs_schema }]) # Schema for checking the namespace kind namespace_schema = schema.Schema({ 'kind': 'namespace', 'name': string_schema, 'scope': schema.Or(string_schema, None), 'members': [MemberInAPI(api=api)], 'briefdescription': paragraphs_schema, 'detaileddescription': paragraphs_schema, 'inline': bool }) # Schema for checking classes and structs class_struct_schema = schema.Schema({ 'kind': schema.Or('class', 'struct'), 'name': string_schema, 'location': location_schema, 'scope': schema.Or(string_schema, None), 'access': schema.Or('public', 'protected', 'private'), schema.Optional('template_parameters'): template_parameter_schema, 'members': [MemberInAPI(api=api)], 'briefdescription': paragraphs_schema, 'detaileddescription': paragraphs_schema }) # Enum schema enum_schema = schema.Schema({ 'kind': 'enum', 'name': string_schema, 'location': location_schema, 'scope': schema.Or(string_schema, None), 'access': schema.Or('public', 'protected', 'private'), 'values': [{ 'name': string_schema, 'briefdescription': paragraphs_schema, 'detaileddescription': paragraphs_schema, schema.Optional('value'): string_schema }], 'briefdescription': paragraphs_schema, 'detaileddescription': paragraphs_schema }) # Type schema type_schema = schema.Schema([{ 'value': string_schema, schema.Optional('link'): LinkSchema(api=api) }]) # Typedef / using schema typedef_using_schema = schema.Schema({ 'kind': schema.Or('typedef', 'using'), 'name': string_schema, 'location': location_schema, 'scope': schema.Or(string_schema, None), 'access': schema.Or('public', 'protected', 'private'), 'type': type_schema, 'briefdescription': paragraphs_schema, 'detaileddescription': paragraphs_schema }) # Function schema function_schema = schema.Schema({ 'kind': 'function', 'name': string_schema, 'location': location_schema, 'scope': schema.Or(string_schema, None), schema.Optional('return'): { 'type': type_schema, 'description': paragraphs_schema }, 'signature': string_schema, schema.Optional('template_parameters'): template_parameter_schema, 'is_const': bool, 'is_static': bool, 'is_virtual': bool, 'is_explicit': bool, 'is_inline': bool, 'is_constructor': bool, 'is_destructor': bool, 'access': schema.Or('public', 'protected', 'private'), 'briefdescription': paragraphs_schema, 'detaileddescription': paragraphs_schema, 'parameters': [{ 'type': type_schema, schema.Optional('name'): string_schema, 'description': paragraphs_schema }], }) # variable schema variable_schema = schema.Schema({ 'kind': 'variable', 'name': string_schema, schema.Optional('value'): string_schema, 'type': type_schema, 'location': location_schema, 'is_static': bool, 'is_mutable': bool, 'is_volatile': bool, 'is_const': bool, 'is_constexpr': bool, 'scope': schema.Or(string_schema, None), 'access': schema.Or('public', 'protected', 'private'), 'briefdescription': paragraphs_schema, 'detaileddescription': paragraphs_schema, }) # Dispatch to the "right" kind of schema. We could do this with a # schema.Or(...) clause but it makes the error output hard to read api_schemas = { 'namespace': namespace_schema, 'class': class_struct_schema, 'struct': class_struct_schema, 'enum': enum_schema, 'typedef': typedef_using_schema, 'using': typedef_using_schema, 'function': function_schema, 'variable': variable_schema } class SchemaApi(object): def validate(self, data): if 'kind' not in data: raise schema.SchemaError( "Required 'kind' key not found in %r" % data) if data['kind'] not in api_schemas: raise schema.SchemaError( "Unknown 'kind' key in %r valid kinds are %r" % (data, api_schemas.keys())) return api_schemas[data['kind']].validate(data) schema.Schema({str: SchemaApi()}).validate(api)
class UrlScm(Scm): SCHEMA = schema.Schema({ 'scm': 'url', 'url': str, schema.Optional('dir'): str, schema.Optional('if'): str, schema.Optional('digestSHA1'): str, schema.Optional('digestSHA256'): str, schema.Optional('extract'): schema.Or(bool, str), schema.Optional('fileName'): str, schema.Optional('stripComponents'): int, schema.Optional('sslVerify'): bool, }) EXTENSIONS = [ (".tar.gz", "tar"), (".tar.xz", "tar"), (".tar.bz2", "tar"), (".tar.bzip2", "tar"), (".tgz", "tar"), (".tar", "tar"), (".gz", "gzip"), (".xz", "xz"), (".7z", "7z"), (".zip", "zip"), ] EXTRACTORS = { "tar": ("tar -x --no-same-owner --no-same-permissions -f", "--strip-components={}"), "gzip": ("gunzip -kf", None), "xz": ("unxz -kf", None), "7z": ("7z x -y", None), "zip": ("unzip -o", None), } def __init__(self, spec, overrides=[], tidy=None): super().__init__(spec, overrides) self.__url = spec["url"] self.__digestSha1 = spec.get("digestSHA1") if self.__digestSha1: # validate digest if re.match("^[0-9a-f]{40}$", self.__digestSha1) is None: raise ParseError("Invalid SHA1 digest: " + str(self.__digestSha1)) self.__digestSha256 = spec.get("digestSHA256") if self.__digestSha256: # validate digest if re.match("^[0-9a-f]{64}$", self.__digestSha256) is None: raise ParseError("Invalid SHA256 digest: " + str(self.__digestSha256)) self.__dir = spec.get("dir", ".") self.__fn = spec.get("fileName") if not self.__fn: self.__fn = self.__url.split("/")[-1] self.__extract = spec.get("extract", "auto") self.__tidy = tidy self.__strip = spec.get("stripComponents", 0) self.__sslVerify = spec.get('sslVerify', True) def getProperties(self): ret = super().getProperties() ret.update({ 'scm': 'url', 'url': self.__url, 'digestSHA1': self.__digestSha1, 'digestSHA256': self.__digestSha256, 'dir': self.__dir, 'fileName': self.__fn, 'extract': self.__extract, 'stripComponents': self.__strip, 'sslVerify': self.__sslVerify, }) return ret def asScript(self): options = "-sSgLf" if not self.__sslVerify: options += "k" ret = """ {HEADER} mkdir -p {DIR} cd {DIR} if [ -e {FILE} ] ; then curl {OPTIONS} -o {FILE} -z {FILE} {URL} else ( F=$(mktemp) trap 'rm -f $F' EXIT set -e curl {OPTIONS} -o $F {URL} mv $F {FILE} ) fi """.format(HEADER=super().asScript(), DIR=quote(self.__dir), URL=quote(self.__url), FILE=quote(self.__fn), OPTIONS=options) if self.__digestSha1: ret += "echo {DIGEST}\ \ {FILE} | sha1sum -c\n".format( DIGEST=self.__digestSha1, FILE=self.__fn) if self.__digestSha256: ret += "echo {DIGEST}\ \ {FILE} | sha256sum -c\n".format( DIGEST=self.__digestSha256, FILE=self.__fn) extractor = None if self.__extract in ["yes", "auto", True]: for (ext, tool) in UrlScm.EXTENSIONS: if self.__fn.endswith(ext): extractor = UrlScm.EXTRACTORS[tool] break if not extractor and self.__extract != "auto": raise ParseError("Don't know how to extract '" + self.__fn + "' automatically.") elif self.__extract in UrlScm.EXTRACTORS: extractor = UrlScm.EXTRACTORS[tool] elif self.__extract not in ["no", False]: raise ParseError("Invalid extract mode: " + self.__extract) if extractor: if self.__strip > 0: if extractor[1] is None: raise ParseError( "Extractor does not support 'stripComponents'!") strip = " " + extractor[1].format(self.__strip) else: strip = "" ret += """ if [ {FILE} -nt .{FILE}.extracted ] ; then {TOOL} {FILE}{STRIP} touch .{FILE}.extracted fi """.format(FILE=quote(self.__fn), TOOL=extractor[0], STRIP=strip) return ret def asDigestScript(self): """Return forward compatible stable string describing this url. The format is "digest dir extract" if a SHA checksum was specified. Otherwise it is "url dir extract". A "s#" is appended if leading paths are stripped where # is the number of stripped elements. """ return ( self.__digestSha256 if self.__digestSha256 else (self.__digestSha1 if self.__digestSha1 else self.__url) ) + " " + os.path.join(self.__dir, self.__fn) + " " + str(self.__extract) + \ ( " s{}".format(self.__strip) if self.__strip > 0 else "" ) def getDirectory(self): return self.__dir if self.__tidy else os.path.join( self.__dir, self.__fn) def isDeterministic(self): return (self.__digestSha1 is not None) or (self.__digestSha256 is not None) def getAuditSpec(self): return ("url", os.path.join(self.__dir, self.__fn)) def hasLiveBuildId(self): return self.isDeterministic() async def predictLiveBuildId(self, step): return self.calcLiveBuildId(None) def calcLiveBuildId(self, workspacePath): if self.__digestSha256: return bytes.fromhex(self.__digestSha256) elif self.__digestSha1: return bytes.fromhex(self.__digestSha1) else: return None def getLiveBuildIdSpec(self, workspacePath): if self.__digestSha256: return "=" + self.__digestSha256 elif self.__digestSha1: return "=" + self.__digestSha1 else: return None
import schema as sc AttrsSchema = sc.Schema({sc.Optional(str): sc.Or(str, int, float, bool)}) RecordSchema = sc.Schema({ str: sc.Or(None, list), sc.Optional("attrs", default={}): AttrsSchema, }) RootSchema = sc.Schema({"root": str, sc.Optional("attrs"): AttrsSchema}) MapSchema = sc.Schema([sc.Or(RecordSchema, RootSchema)]) EvalPredicateSchema = sc.Schema({ "type": "eval", "target": str, }) RegexPredicateSchema = sc.Schema({ "type": "regex", "target": str, "pattern": str, }) NamePredicateSchema = sc.Schema({ "type": "name", "pattern": str, }) PredicateSchema = sc.Or(EvalPredicateSchema, RegexPredicateSchema, NamePredicateSchema) StyleSchema = sc.Schema({ "predicate": PredicateSchema, sc.Optional("attrs", default={}): AttrsSchema, sc.Optional("order", default=0): int, sc.Optional("transform"): str, })
class Stage(object): STAGE_FILE = 'Dvcfile' STAGE_FILE_SUFFIX = '.dvc' PARAM_MD5 = 'md5' PARAM_CMD = 'cmd' PARAM_DEPS = 'deps' PARAM_OUTS = 'outs' PARAM_LOCKED = 'locked' SCHEMA = { schema.Optional(PARAM_MD5): schema.Or(str, None), schema.Optional(PARAM_CMD): schema.Or(str, None), schema.Optional(PARAM_DEPS): schema.Or(schema.And(list, schema.Schema([dependency.SCHEMA])), None), schema.Optional(PARAM_OUTS): schema.Or(schema.And(list, schema.Schema([output.SCHEMA])), None), schema.Optional(PARAM_LOCKED): bool, } def __init__(self, project, path=None, cmd=None, cwd=os.curdir, deps=[], outs=[], md5=None, locked=False): self.project = project self.path = path self.cmd = cmd self.cwd = cwd self.outs = outs self.deps = deps self.md5 = md5 self.locked = locked @property def relpath(self): return os.path.relpath(self.path) @property def is_data_source(self): return self.cmd is None @staticmethod def is_stage_file(path): if not os.path.isfile(path): return False if not path.endswith(Stage.STAGE_FILE_SUFFIX) and os.path.basename( path) != Stage.STAGE_FILE: return False return True def changed_md5(self): md5 = self.dumpd().get(self.PARAM_MD5, None) # backward compatibility if self.md5 == None: return False if self.md5 and md5 and self.md5 == md5: return False msg = "Dvc file '{}' md5 changed(expected '{}', actual '{}')" self.project.logger.debug(msg.format(self.relpath, self.md5, md5)) return True @property def is_callback(self): return not self.is_data_source and len(self.deps) == 0 @property def is_import(self): return not self.cmd and \ len(self.deps) == 1 and \ len(self.outs) == 1 def changed(self): ret = False if self.is_callback: ret = True if self.locked: entries = self.outs else: entries = itertools.chain(self.outs, self.deps) for entry in entries: if entry.changed(): ret = True if self.changed_md5(): ret = True if ret: self.project.logger.debug(u'Dvc file \'{}\' changed'.format( self.relpath)) else: self.project.logger.debug(u'Dvc file \'{}\' didn\'t change'.format( self.relpath)) return ret def remove_outs(self, ignore_remove=False): for out in self.outs: out.remove(ignore_remove=ignore_remove) def remove(self): self.remove_outs(ignore_remove=True) os.unlink(self.path) def reproduce(self, force=False): if not self.changed() and not force: return None if (self.cmd or self.is_import) and not self.locked: # Removing outputs only if we actually have command to reproduce self.remove_outs(ignore_remove=False) self.project.logger.info(u'Reproducing \'{}\''.format(self.relpath)) self.run() self.project.logger.debug(u'\'{}\' was reproduced'.format( self.relpath)) return self @staticmethod def validate(d): try: schema.Schema(Stage.SCHEMA).validate(d) except schema.SchemaError as exc: Logger.debug(str(exc)) raise StageFileFormatError() @staticmethod def loadd(project, d, path): Stage.validate(d) path = os.path.abspath(path) cwd = os.path.dirname(path) cmd = d.get(Stage.PARAM_CMD, None) md5 = d.get(Stage.PARAM_MD5, None) locked = d.get(Stage.PARAM_LOCKED, False) stage = Stage(project=project, path=path, cmd=cmd, cwd=cwd, md5=md5, locked=locked) stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, [])) stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, [])) return stage @classmethod def _stage_fname_cwd(cls, fname, cwd, outs, add): if fname and cwd: return (fname, cwd) if not outs: return (cls.STAGE_FILE, cwd if cwd else os.getcwd()) out = outs[0] if out.path_info['scheme'] == 'local': path = os.path else: path = posixpath fname = fname if fname else path.basename( out.path) + cls.STAGE_FILE_SUFFIX cwd = path.dirname(out.path) if not cwd or add else cwd return (fname, cwd) @staticmethod def loads(project=None, cmd=None, deps=[], outs=[], outs_no_cache=[], metrics_no_cache=[], fname=None, cwd=os.curdir, locked=False, add=False): stage = Stage(project=project, cwd=cwd, cmd=cmd, locked=locked) stage.outs = output.loads_from(stage, outs, use_cache=True) stage.outs += output.loads_from(stage, outs_no_cache, use_cache=False) stage.outs += output.loads_from(stage, metrics_no_cache, use_cache=False, metric=True) stage.deps = dependency.loads_from(stage, deps) fname, cwd = Stage._stage_fname_cwd(fname, cwd, stage.outs, add=add) cwd = os.path.abspath(cwd) path = os.path.join(cwd, fname) stage.cwd = cwd stage.path = path return stage @staticmethod def load(project, fname): with open(fname, 'r') as fd: return Stage.loadd(project, yaml.safe_load(fd), fname) def dumpd(self): deps = [x.dumpd() for x in self.deps] outs = [x.dumpd() for x in self.outs] ret = {} if self.cmd != None: ret[Stage.PARAM_CMD] = self.cmd if len(deps): ret[Stage.PARAM_DEPS] = deps if len(outs): ret[Stage.PARAM_OUTS] = outs if self.locked: ret[Stage.PARAM_LOCKED] = self.locked ret[Stage.PARAM_MD5] = dict_md5(ret) return ret def dump(self, fname=None): if not fname: fname = self.path with open(fname, 'w') as fd: yaml.safe_dump(self.dumpd(), fd, default_flow_style=False) def save(self): for dep in self.deps: dep.save() for out in self.outs: out.save() def run(self): if self.locked: self.project.logger.info( u'Verifying outputs in locked stage \'{}\''.format( self.relpath)) self.check_missing_outputs() elif self.is_import: msg = u'Importing \'{}\' -> \'{}\'' self.project.logger.info( msg.format(self.deps[0].path, self.outs[0].path)) self.deps[0].download(self.outs[0].path_info) elif self.is_data_source: self.project.logger.info( u'Verifying data sources in \'{}\''.format(self.relpath)) self.check_missing_outputs() else: self.project.logger.info(u'Running command:\n\t{}'.format( self.cmd)) p = subprocess.Popen(self.cmd, cwd=self.cwd, shell=True, env=os.environ, executable=os.getenv('SHELL')) p.communicate() if p.returncode != 0: raise StageCmdFailedError(self) self.save() def check_missing_outputs(self): outs = [out for out in self.outs if not out.exists] paths = [ out.path if out.path_info['scheme'] != 'local' else out.rel_path for out in outs ] if paths: raise MissingDataSource(paths) def checkout(self): for out in self.outs: out.checkout() def _status(self, entries, name): ret = {} for entry in entries: ret.update(entry.status()) if ret: return {name: ret} return {} def status(self): ret = {} if not self.locked: ret.update(self._status(self.deps, 'deps')) ret.update(self._status(self.outs, 'outs')) if ret or self.changed_md5() or self.is_callback: return {self.relpath: ret} return {}
def __init__(self, report=None, url=None): """ @param report: hash with report data from Cuckoo @type report: dict @param url: URL where the report was retrieved from @type url: string """ self._url = url if report is None: report = {} # some common building blocks for reuse dns_element_schema = {'request': str} description_element_schema = {'description': str} # defaults of optional keys are not validated. Therefore their # validators can't set more default values. So we can only rely on the # validation result to contain the top-level key defaults. To avoid # confusion make no assumptions about optional key existance at all and # only schema compliance. We still use the result though because # ignore_extra_keys has stripped it of extraneous data which protects # us somewhat from accidentally processing it. report = schema.Schema( { schema.Optional('network', default={}, ignore_extra_keys=True): { schema.Optional('dns', default=[]): schema.Or(list([dns_element_schema]), tuple([dns_element_schema]), ignore_extra_keys=True), }, schema.Optional('signatures', default=[]): schema.Or(list([description_element_schema]), tuple([description_element_schema]), ignore_extra_keys=True), schema.Optional('info', default={}): { schema.Optional('score', default=0.0): schema.Or( int, float), }, schema.Optional('debug', default={}): { schema.Optional('errors', default=[]): schema.Or(list([str]), tuple([str])), schema.Optional('cuckoo', default=[]): schema.Or(list([str]), tuple([str])), }, }, ignore_extra_keys=True).validate(report) self._requested_domains = [ domain['request'] for domain in report.get('network', {}).get('dns', []) ] self._signature_descriptions = [ sig['description'] for sig in report.get('signatures', []) ] # explicitly convert to the types of our external API here if we accept # multiple types as input (schema.Use could convert as well but does it # before validation in duck-typing fashion which could make us accept # unintended types, e.g. a string because it can be converted to a list # because it's iterable). self._score = float(report.get('info', {}).get('score', 0.0)) debug = report.get('debug', {}) self._errors = list(debug.get('errors', [])) self._server_messages = list(debug.get('cuckoo', []))
elif evaluation == 'old-matlab': benchmark = CAT2000Matlab() elif evaluation == 'new': benchmark = CAT2000() else: raise ValueError(evaluation) elif dataset.lower() == 'mit1003': assert evaluation == 'new' benchmark = MIT1003() else: raise ValueError(dataset) return benchmark, model MaybeString = schema.Or(str, None) display_schema = schema.Schema({ schema.Optional('name', default=None): str, schema.Optional('published', default=''): str, schema.Optional('code', default=''): MaybeString, schema.Optional('evaluation_comment', default=''): str, schema.Optional('first_tested', default=None): str, }) config_schema = schema.Schema({ 'model': { 'name': str, 'filename': str, 'probabilistic': bool, schema.Optional('loss', default=False): schema.Or('AUC', 'sAUC', 'IG', 'NSS', 'CC', 'KLDiv', 'SIM'),
class SvnScm(Scm): SCHEMA = schema.Schema({ 'scm': 'svn', 'url': str, schema.Optional('dir'): str, schema.Optional('if'): str, schema.Optional('revision'): schema.Or(int, str), schema.Optional('sslVerify'): bool, }) def __init__(self, spec, overrides=[]): super().__init__(spec, overrides) self.__url = spec["url"] self.__dir = spec.get("dir", ".") self.__revision = spec.get("revision") self.__sslVerify = spec.get('sslVerify', True) def getProperties(self): ret = super().getProperties() ret.update({ 'scm': 'svn', "url": self.__url, "dir": self.__dir, 'sslVerify': self.__sslVerify, }) if self.__revision: ret["revision"] = self.__revision return ret def asScript(self): options = "--non-interactive" if not self.__sslVerify: options += " --trust-server-cert-failures=unknown-ca,cn-mismatch,expired,not-yet-valid,other" return """ {HEADER} if [[ -d {SUBDIR}/.svn ]] ; then if [[ {URL} != */tags/* ]] ; then svn up {OPTIONS} {REVISION_ARG} {SUBDIR} fi else if ! svn co {OPTIONS} {REVISION_ARG} {URL} {SUBDIR} ; then rm -rf {SUBDIR} exit 1 fi fi """.format(HEADER=super().asScript(), OPTIONS=options, URL=quote(self.__url), SUBDIR=quote(self.__dir), REVISION_ARG=(("-r " + quote(str(self.__revision))) if self.__revision else '')) def asDigestScript(self): """Return forward compatible stable string describing this svn module. The module is represented as "url[@rev] > dir". """ return (self.__url + (("@" + str(self.__revision)) if self.__revision else "") + " > " + self.__dir) def asJenkins(self, workPath, credentials, options): scm = ElementTree.Element("scm", attrib={ "class": "hudson.scm.SubversionSCM", "plugin": "[email protected]", }) locations = ElementTree.SubElement(scm, "locations") location = ElementTree.SubElement( locations, "hudson.scm.SubversionSCM_-ModuleLocation") url = self.__url if self.__revision: url += ("@" + str(self.__revision)) ElementTree.SubElement(location, "remote").text = url credentialsId = ElementTree.SubElement(location, "credentialsId") if credentials: credentialsId.text = credentials ElementTree.SubElement(location, "local").text = (os.path.normpath( os.path.join(workPath, self.__dir))) ElementTree.SubElement(location, "depthOption").text = "infinity" ElementTree.SubElement(location, "ignoreExternalsOption").text = "true" ElementTree.SubElement(scm, "excludedRegions") ElementTree.SubElement(scm, "includedRegions") ElementTree.SubElement(scm, "excludedUsers") ElementTree.SubElement(scm, "excludedRevprop") ElementTree.SubElement(scm, "excludedCommitMessages") ElementTree.SubElement( scm, "workspaceUpdater", attrib={"class": "hudson.scm.subversion.UpdateUpdater"}) ElementTree.SubElement(scm, "ignoreDirPropChanges").text = "false" ElementTree.SubElement(scm, "filterChangelog").text = "false" return scm def getDirectories(self): return {self.__dir: hashString(self.asDigestScript())} def isDeterministic(self): return str(self.__revision).isnumeric() def hasJenkinsPlugin(self): return True def callSubversion(self, workspacePath, *args): cmdLine = ['svn'] cmdLine.extend(args) cwd = os.path.join(workspacePath, self.__dir) try: output = subprocess.check_output(cmdLine, cwd=cwd, universal_newlines=True, stderr=subprocess.DEVNULL) except subprocess.CalledProcessError as e: raise BuildError( "svn error:\n Directory: '{}'\n Command: '{}'\n'{}'".format( cwd, " ".join(cmdLine), e.output.rstrip())) except OSError as e: raise BuildError("Error calling svn: " + str(e)) return output # Get SvnSCM status. The purpose of this function is to return the status of the given directory # # return values: # - error: the scm is in a error state. Use this if svn call returns a error code. # - dirty: SCM is dirty. Could be: modified files, switched to another URL or revision # - clean: same URL and revision as specified in the recipe and no local changes. # - empty: directory is not existing # # This function is called when build with --clean-checkout. 'error' and 'dirty' scm's are moved to attic, # while empty and clean directories are not. def status(self, workspacePath): if not os.path.exists(os.path.join(workspacePath, self.__dir)): return 'empty', '', '' status = 'clean' shortStatus = '' longStatus = '' def setStatus(shortMsg, longMsg, dirty=True): nonlocal status, shortStatus, longStatus if (shortMsg not in shortStatus): shortStatus += shortMsg longStatus += longMsg if (dirty): status = 'dirty' try: svnoutput = self.callSubversion(workspacePath, 'status') if len(svnoutput): longMsg = colorize("> modified:\n", "33") for line in svnoutput.split('\n'): longMsg += ' ' + line.rstrip() setStatus('M', longMsg) svnoutput = self.callSubversion(workspacePath, 'info', '--xml') info = ElementTree.fromstring(svnoutput) entry = info.find('entry') url = entry.find('url').text revision = entry.attrib['revision'] if self.__url != url: setStatus( 'S', colorize( "> URLs do not match!\n recipe:\t{}\n svn info:\t{}" .format(self.__url, url), "33")) if self.__revision is not None and int(revision) != int( self.__revision): setStatus( 'S', colorize( "> wrong revision: recipe: {} svn info: {}".format( self.__revision, revision), "33")) except BuildError as e: print(e) status = 'error' return status, shortStatus, longStatus def getAuditSpec(self): return ("svn", self.__dir)
class GitScm(Scm): SCHEMA = schema.Schema({ 'scm' : 'git', 'url' : str, schema.Optional('dir') : str, schema.Optional('if') : schema.Or(str, IfExpression), schema.Optional('branch') : str, schema.Optional('tag') : str, schema.Optional('commit') : str, schema.Optional('rev') : str, schema.Optional(schema.Regex('^remote-.*')) : str, schema.Optional('sslVerify') : bool, }) REMOTE_PREFIX = "remote-" def __init__(self, spec, overrides=[], secureSSL=None): super().__init__(spec, overrides) self.__url = spec["url"] self.__branch = None self.__tag = None self.__commit = None self.__remotes = {} if "rev" in spec: rev = spec["rev"] if rev.startswith("refs/heads/"): self.__branch = rev[11:] elif rev.startswith("refs/tags/"): self.__tag = rev[10:] elif len(rev) == 40: self.__commit = rev else: raise ParseError("Invalid rev format: " + rev) self.__branch = spec.get("branch", self.__branch) self.__tag = spec.get("tag", self.__tag) self.__commit = spec.get("commit", self.__commit) if self.__commit: # validate commit if re.match("^[0-9a-f]{40}$", self.__commit) is None: raise ParseError("Invalid commit id: " + str(self.__commit)) elif not self.__branch and not self.__tag: # nothing secified at all -> master branch self.__branch = "master" self.__dir = spec.get("dir", ".") # convert remotes into separate dictionary for key, val in spec.items(): if key.startswith(GitScm.REMOTE_PREFIX): stripped_key = key[len(GitScm.REMOTE_PREFIX):] # remove prefix if stripped_key == "origin": raise ParseError("Invalid remote name: " + stripped_key) self.__remotes.update({stripped_key : val}) self.__sslVerify = spec.get('sslVerify', secureSSL) def getProperties(self): properties = super().getProperties() properties.update({ 'scm' : 'git', 'url' : self.__url, 'branch' : self.__branch, 'tag' : self.__tag, 'commit' : self.__commit, 'dir' : self.__dir, 'rev' : ( self.__commit if self.__commit else (("refs/tags/" + self.__tag) if self.__tag else ("refs/heads/" + self.__branch)) ), 'sslVerify' : self.__sslVerify, }) for key, val in self.__remotes.items(): properties.update({GitScm.REMOTE_PREFIX+key : val}) return properties async def invoke(self, invoker): # make sure the git directory exists if not os.path.isdir(invoker.joinPath(self.__dir, ".git")): await invoker.checkCommand(["git", "init", self.__dir]) # setup and update remotes remotes = { "origin" : self.__url } remotes.update(self.__remotes) existingRemotes = await invoker.checkOutputCommand(["git", "remote"], cwd=self.__dir) for remote in existingRemotes.split("\n"): if remote in remotes: cfgUrl = remotes[remote] realUrl = await invoker.checkOutputCommand( ["git", "ls-remote", "--get-url", remote], cwd=self.__dir) if cfgUrl != realUrl: await invoker.checkCommand(["git", "remote", "set-url", remote, cfgUrl], cwd=self.__dir) del remotes[remote] # add remaining (new) remotes for remote,url in remotes.items(): await invoker.checkCommand(["git", "remote", "add", remote, url], cwd=self.__dir) # relax security if requested if not self.__sslVerify: await invoker.checkCommand(["git", "config", "http.sslVerify", "false"], cwd=self.__dir) # do the checkout if self.__tag or self.__commit: refSpec = ["+refs/heads/*:refs/remotes/origin/*"] if self.__tag: refSpec.append("refs/tags/{0}:refs/tags/{0}".format(self.__tag)) # checkout only if HEAD is invalid head = await invoker.callCommand(["git", "rev-parse", "--verify", "-q", "HEAD"], stdout=False, cwd=self.__dir) if head: await invoker.checkCommand(["git", "fetch", "origin"] + refSpec, cwd=self.__dir) await invoker.checkCommand(["git", "checkout", "-q", self.__commit if self.__commit else "tags/"+self.__tag], cwd=self.__dir) else: await invoker.checkCommand(["git", "fetch", "-p", "origin"], cwd=self.__dir) if await invoker.callCommand(["git", "rev-parse", "--verify", "-q", "HEAD"], stdout=False, cwd=self.__dir): # checkout only if HEAD is invalid await invoker.checkCommand(["git", "checkout", "-b", self.__branch, "remotes/origin/"+self.__branch], cwd=self.__dir) elif (await invoker.checkOutputCommand(["git", "rev-parse", "--abbrev-ref", "HEAD"], cwd=self.__dir)) == self.__branch: # pull only if on original branch await invoker.checkCommand(["git", "merge", "--ff-only", "refs/remotes/origin/"+self.__branch], cwd=self.__dir) else: invoker.warn("Not updating", self.__dir, "because branch was changed manually...") def asDigestScript(self): """Return forward compatible stable string describing this git module. The format is "url rev-spec dir" where rev-spec depends on the given reference. """ if self.__commit: return self.__commit + " " + self.__dir elif self.__tag: return self.__url + " refs/tags/" + self.__tag + " " + self.__dir else: return self.__url + " refs/heads/" + self.__branch + " " + self.__dir def asJenkins(self, workPath, credentials, options): scm = ElementTree.Element("scm", attrib={ "class" : "hudson.plugins.git.GitSCM", "plugin" : "[email protected]", }) ElementTree.SubElement(scm, "configVersion").text = "2" userconfigs = ElementTree.SubElement( ElementTree.SubElement(scm, "userRemoteConfigs"), "hudson.plugins.git.UserRemoteConfig") url = ElementTree.SubElement(userconfigs, "url") url.text = self.__url if credentials: credentialsId = ElementTree.SubElement(userconfigs, "credentialsId") credentialsId.text = credentials branch = ElementTree.SubElement( ElementTree.SubElement( ElementTree.SubElement(scm, "branches"), "hudson.plugins.git.BranchSpec"), "name") if self.__commit: branch.text = self.__commit elif self.__tag: branch.text = "refs/tags/" + self.__tag else: branch.text = "refs/heads/" + self.__branch ElementTree.SubElement(scm, "doGenerateSubmoduleConfigurations").text = "false" ElementTree.SubElement(scm, "submoduleCfg", attrib={"class" : "list"}) extensions = ElementTree.SubElement(scm, "extensions") ElementTree.SubElement( ElementTree.SubElement(extensions, "hudson.plugins.git.extensions.impl.RelativeTargetDirectory"), "relativeTargetDir").text = os.path.normpath(os.path.join(workPath, self.__dir)) # remove untracked files and stale branches ElementTree.SubElement(extensions, "hudson.plugins.git.extensions.impl.CleanCheckout") ElementTree.SubElement(extensions, "hudson.plugins.git.extensions.impl.PruneStaleBranch") # set git clone options shallow = options.get("scm.git.shallow") timeout = options.get("scm.git.timeout") if shallow is not None or timeout is not None: co = ElementTree.SubElement(extensions, "hudson.plugins.git.extensions.impl.CloneOption") if shallow is not None: try: shallow = int(shallow) if shallow < 0: raise ValueError() except ValueError: raise BuildError("Invalid 'git.shallow' option: " + str(shallow)) if shallow > 0: ElementTree.SubElement(co, "shallow").text = "true" ElementTree.SubElement(co, "noTags").text = "false" ElementTree.SubElement(co, "reference").text = "" ElementTree.SubElement(co, "depth").text = str(shallow) ElementTree.SubElement(co, "honorRefspec").text = "false" if timeout is not None: try: timeout = int(timeout) if timeout < 0: raise ValueError() except ValueError: raise BuildError("Invalid 'git.timeout' option: " + str(shallow)) if timeout > 0: ElementTree.SubElement(co, "timeout").text = str(timeout) if isTrue(options.get("scm.ignore-hooks", "0")): ElementTree.SubElement(extensions, "hudson.plugins.git.extensions.impl.IgnoreNotifyCommit") return scm def getDirectory(self): return self.__dir def isDeterministic(self): return bool(self.__tag) or bool(self.__commit) def hasJenkinsPlugin(self): return True def callGit(self, workspacePath, *args): cmdLine = ['git'] cmdLine.extend(args) cwd = os.path.join(workspacePath, self.__dir) try: output = subprocess.check_output(cmdLine, cwd=cwd, universal_newlines=True, stderr=subprocess.DEVNULL) except subprocess.CalledProcessError as e: raise BuildError("git error:\n Directory: '{}'\n Command: '{}'\n'{}'".format( cwd, " ".join(cmdLine), e.output.rstrip())) except OSError as e: raise BuildError("Error calling git: " + str(e)) return output.strip() def status(self, workspacePath): status = ScmStatus() try: onCorrectBranch = False onTag = False output = self.callGit(workspacePath, 'ls-remote' ,'--get-url') if output != self.__url: status.add(ScmTaint.switched, "> URL: configured: '{}', actual: '{}'".format(self.__url, output)) if self.__commit: output = self.callGit(workspacePath, 'rev-parse', 'HEAD') if output != self.__commit: status.add(ScmTaint.switched, "> commit: configured: '{}', actual: '{}'".format(self.__commit, output)) elif self.__tag: output = self.callGit(workspacePath, 'tag', '--points-at', 'HEAD').splitlines() if self.__tag not in output: actual = ("'" + ", ".join(output) + "'") if output else "not on any tag" status.add(ScmTaint.switched, "> tag: configured: '{}', actual: {}".format(self.__tag, actual)) # Need to check if the tag still exists. Otherwise the "git # log" command at the end will trip. try: self.callGit(workspacePath, 'rev-parse', 'tags/'+self.__tag) onTag = True except BuildError: pass elif self.__branch: output = self.callGit(workspacePath, 'rev-parse', '--abbrev-ref', 'HEAD') if output != self.__branch: status.add(ScmTaint.switched, "> branch: configured: '{}', actual: '{}'".format(self.__branch, output)) else: output = self.callGit(workspacePath, 'log', '--oneline', 'refs/remotes/origin/'+self.__branch+'..HEAD') if output: status.add(ScmTaint.unpushed_main, joinLines("> unpushed commits on {}:".format(self.__branch), indent(output, ' '))) onCorrectBranch = True # Check for modifications wrt. checked out commit output = self.callGit(workspacePath, 'status', '--porcelain') if output: status.add(ScmTaint.modified, joinLines("> modified:", indent(output, ' '))) # The following shows all unpushed commits reachable by any ref # (local branches, stash, detached HEAD, etc). # Exclude HEAD if the configured branch is checked out to not # double-count them. Does not mark the SCM as dirty. Exclude the # configured tag too if it is checked out. Otherwise the tag would # count as unpushed if it is not on a remote branch. what = ['--all', '--not', '--remotes'] if onCorrectBranch: what.append('HEAD') if onTag: what.append("tags/"+self.__tag) output = self.callGit(workspacePath, 'log', '--oneline', '--decorate', *what) if output: status.add(ScmTaint.unpushed_local, joinLines("> unpushed local commits:", indent(output, ' '))) except BuildError as e: status.add(ScmTaint.error, e.slogan) return status def getAuditSpec(self): return ("git", self.__dir, {}) def hasLiveBuildId(self): return True async def predictLiveBuildId(self, step): if self.__commit: return bytes.fromhex(self.__commit) with stepAction(step, "LS-REMOTE", self.__url, (INFO, TRACE)) as a: if self.__tag: # Annotated tags are objects themselves. We need the commit object! refs = ["refs/tags/" + self.__tag + '^{}', "refs/tags/" + self.__tag] else: refs = ["refs/heads/" + self.__branch] cmdLine = ['git', 'ls-remote', self.__url] + refs try: proc = await asyncio.create_subprocess_exec(*cmdLine, stdout=asyncio.subprocess.PIPE, stderr=subprocess.DEVNULL) try: stdout, stderr = await proc.communicate() rc = await proc.wait() finally: try: proc.terminate() except ProcessLookupError: pass if rc != 0: a.fail("exit {}".format(rc), WARNING) return None output = stdout.decode(locale.getpreferredencoding(False)).strip() except (subprocess.CalledProcessError, OSError) as e: a.fail("error ({})".format(e)) return None # have we found anything at all? if not output: a.fail("unknown", WARNING) return None # See if we got one of our intended refs. Git is generating lines with # the following format: # # <sha1>\t<refname> # # Put the output into a dict with the refname as key. Be extra careful # and strip out lines not matching this pattern. output = { commitAndRef[1].strip() : bytes.fromhex(commitAndRef[0].strip()) for commitAndRef in (line.split('\t') for line in output.split('\n')) if len(commitAndRef) == 2 } for ref in refs: if ref in output: return output[ref] # uhh, should not happen... a.fail("unknown", WARNING) return None def calcLiveBuildId(self, workspacePath): if self.__commit: return bytes.fromhex(self.__commit) else: output = self.callGit(workspacePath, 'rev-parse', 'HEAD').strip() return bytes.fromhex(output) def getLiveBuildIdSpec(self, workspacePath): if self.__commit: return "=" + self.__commit else: return "g" + os.path.join(workspacePath, self.__dir) @staticmethod def processLiveBuildIdSpec(dir): try: return subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=dir, universal_newlines=True).strip() except subprocess.CalledProcessError as e: raise BuildError("Git audit failed: " + str(e)) except OSError as e: raise BuildError("Error calling git: " + str(e))
Schemes.S3: OutputS3, Schemes.GS: OutputGS, Schemes.SSH: OutputSSH, Schemes.LOCAL: OutputLOCAL, } # NOTE: currently there are only 3 possible checksum names: # # 1) md5 (LOCAL, SSH, GS); # 2) etag (S3); # 3) checksum (HDFS); # # so when a few types of outputs share the same name, we only need # specify it once. CHECKSUM_SCHEMA = { schema.Optional(RemoteLOCAL.PARAM_CHECKSUM): schema.Or(str, None), schema.Optional(RemoteS3.PARAM_CHECKSUM): schema.Or(str, None), schema.Optional(RemoteHDFS.PARAM_CHECKSUM): schema.Or(str, None), } TAGS_SCHEMA = {schema.Optional(str): CHECKSUM_SCHEMA} SCHEMA = CHECKSUM_SCHEMA.copy() SCHEMA[OutputBase.PARAM_PATH] = str SCHEMA[schema.Optional(OutputBase.PARAM_CACHE)] = bool SCHEMA[schema.Optional(OutputBase.PARAM_METRIC)] = OutputBase.METRIC_SCHEMA SCHEMA[schema.Optional(OutputBase.PARAM_TAGS)] = TAGS_SCHEMA SCHEMA[schema.Optional(OutputBase.PARAM_PERSIST)] = bool def _get(stage, p, info, cache, metric, persist=False, tags=None):
from datetime import datetime import schema as sm from .client import HttpClient, ClientError from .provider import ExchangeRatesProvider, ExchangeRateLoadError from ..models import Currency, Rate _POSSIBLE_CCY = [ccy.value for ccy in Currency] _ccy_validator = sm.Or(*_POSSIBLE_CCY) _rate_validator = sm.And(sm.Use(float), lambda n: n >= 0) _date_validator = sm.Regex(r"\d{4}-\d{2}-\d{2}") _ecb_rate_scm = sm.Schema({ "rates": { _ccy_validator: _rate_validator }, "base": _ccy_validator, "date": _date_validator }) class ECBProvider(ExchangeRatesProvider): API_ENDPOINT_CONFIG_NAME = "API_ENDPOINT" @classmethod def create(cls, endpoint: str): if not endpoint: raise ValueError("API endpoint is required")