Exemple #1
0
def list_or_tuple_of(sub_schema):
    'validates either a list or tuple of sub_schemas'
    return schema.Or((sub_schema,), [sub_schema])
Exemple #2
0
def validate(data):
    def is_file(path):
        return path.is_file()

    def to_fixture(data):
        return Fixture(**data)

    def to_step(data):
        return Step(**data)

    def guard_to_condition(guard):
        return Condition(guard["guard"])

    def to_test(data):
        data["config_file"] = data.pop("config-file")
        return Test(**data)

    def absolute_path(path):
        absolute = Path(os.path.expanduser(path))
        if not absolute.is_absolute():
            absolute = (SET_DIR / path).resolve()
        return absolute

    def replace_path(raw_command):
        return raw_command.replace("@.", str(SET_DIR))

    def to_command(raw_command):
        return shlex.split(replace_path(raw_command))

    def to_result(raw_result):
        return Result[raw_result.upper()]

    fixture = schema.Schema(
        schema.And(
            {
                "enter": schema.And(str, len),
                "exit": schema.And(str, len)
            },
            schema.Use(to_fixture),
        ))
    fixtures = schema.Schema({schema.And(str, len): fixture})
    step = schema.Schema(
        schema.And(
            {
                "command":
                schema.And(schema.Const(schema.And(str, len)),
                           schema.Use(to_command)),
                schema.Optional("input", default=None):
                schema.And(schema.Use(absolute_path), is_file),
                schema.Optional("transformation", default=None):
                schema.Use(replace_path),
                schema.Optional("expected_result", default=Result.SUCCESS):
                schema.Use(to_result),
            },
            schema.Use(to_step),
        ))
    guard = schema.Schema(
        schema.And({"guard": str}, schema.Use(guard_to_condition)))
    test = schema.Schema(
        schema.And(
            {
                schema.Optional("tags", default=None): [str],
                schema.Optional("condition", default=None):
                schema.Use(Condition),
                schema.Optional("config-file", default=None):
                schema.Use(absolute_path),
                schema.Optional("fixture", default=None):
                str,
                "steps": [schema.Or(step, guard)],
            },
            schema.Use(to_test),
        ))
    tests = schema.Schema({schema.And(str, len): test})
    sch = schema.Schema({
        schema.Optional("config-file", default=None):
        schema.Use(absolute_path),
        schema.Optional("fixtures", default=None):
        fixtures,
        "tests":
        tests,
    })
    return sch.validate(data)
Exemple #3
0
class Dependency(object):
    PARAM_RELPATH = 'relpath'
    PARAM_PATH = 'path'
    PARAM_MD5 = 'md5'
    MD5_DIR_SUFFIX = '.dir'

    SCHEMA = {
        PARAM_PATH: str,
        schema.Optional(PARAM_MD5): schema.Or(str, None),
    }

    def __init__(self, project, path, md5=None):
        self.project = project
        self.path = os.path.abspath(os.path.realpath(path))

        if not self.path.startswith(self.project.root_dir):
            raise CmdOutputOutsideOfRepoError(self.rel_path)

        self.md5 = md5

    @property
    def rel_path(self):
        return os.path.relpath(self.path)

    def _changed_md5(self):
        if not os.path.exists(self.path):
            return True

        return self.project.state.changed(self.path, self.md5)

    @staticmethod
    def _changed_msg(changed):
        if changed:
            return 'changed'
        return "didn't change"

    def changed(self):
        ret = self._changed_md5()

        msg = u'Dependency \'{}\' {}'.format(self.rel_path,
                                             self._changed_msg(ret))
        self.project.logger.debug(msg)

        return ret

    def status(self):
        if self.changed():
            #FIXME better msgs
            return {self.rel_path: 'changed'}
        return {}

    @staticmethod
    def is_dir_cache(cache):
        return cache.endswith(Output.MD5_DIR_SUFFIX)

    def save(self):
        if not os.path.exists(self.path):
            raise CmdOutputDoesNotExistError(self.rel_path)

        if not os.path.isfile(self.path) and not os.path.isdir(self.path):
            raise CmdOutputIsNotFileOrDirError(self.rel_path)

        self.md5 = self.project.state.update(self.path)

    @staticmethod
    def unixpath(path):
        assert not ntpath.isabs(path)
        assert not posixpath.isabs(path)
        return path.replace('\\', '/')

    def dumpd(self, cwd):
        return {
            Output.PARAM_PATH: self.unixpath(os.path.relpath(self.path, cwd)),
            Output.PARAM_MD5: self.md5,
        }

    @classmethod
    def loadd(cls, project, d, cwd=os.curdir):
        relpath = os.path.normpath(Output.unixpath(d[Output.PARAM_PATH]))
        path = os.path.join(cwd, relpath)
        md5 = d.get(Output.PARAM_MD5, None)
        return cls(project, path, md5=md5)

    @classmethod
    def loadd_from(cls, project, d_list, cwd=os.curdir):
        return [cls.loadd(project, x, cwd=cwd) for x in d_list]

    @classmethod
    def loads(cls, project, s, cwd=os.curdir):
        return cls(project, os.path.join(cwd, s), md5=None)

    @classmethod
    def loads_from(cls, project, s_list, cwd=os.curdir):
        return [cls.loads(project, x, cwd=cwd) for x in s_list]

    def stage(self):
        for stage in self.project.stages():
            for out in stage.outs:
                if self.path == out.path:
                    return stage
        return None
Exemple #4
0
class SvnScm(Scm):

    SCHEMA = schema.Schema({
        'scm' : 'svn',
        'url' : str,
        schema.Optional('dir') : str,
        schema.Optional('if') : schema.Or(str, IfExpression),
        schema.Optional('revision') : schema.Or(int, str),
        schema.Optional('sslVerify') : bool,
    })

    def __init__(self, spec, overrides=[]):
        super().__init__(spec, overrides)
        self.__url = spec["url"]
        self.__dir = spec.get("dir", ".")
        self.__revision = spec.get("revision")
        self.__sslVerify = spec.get('sslVerify', True)

    def getProperties(self, isJenkins):
        ret = super().getProperties(isJenkins)
        ret.update({
            'scm' : 'svn',
            "url" : self.__url,
            "dir" : self.__dir,
            'sslVerify' : self.__sslVerify,
        })
        if self.__revision:
            ret["revision"] = self.__revision
        return ret

    async def invoke(self, invoker):
        options = [ "--non-interactive" ]
        if not self.__sslVerify:
            options += [ "--trust-server-cert-failures=unknown-ca,cn-mismatch,expired,not-yet-valid,other" ]
        if self.__revision:
            options += [ "-r", str(self.__revision) ]

        if os.path.isdir(invoker.joinPath(self.__dir, ".svn")):
            if "/tags/" not in self.__url:
                await invoker.checkCommand(["svn", "up"] + options, cwd=self.__dir)
        else:
            await invoker.checkCommand(["svn", "co"] + options + [self.__url, self.__dir])

    def asDigestScript(self):
        """Return forward compatible stable string describing this svn module.

        The module is represented as "url[@rev] > dir".
        """
        return (self.__url + ( ("@"+str(self.__revision)) if self.__revision else "" ) + " > "
                + self.__dir)

    def asJenkins(self, workPath, credentials, options):
        scm = ElementTree.Element("scm", attrib={
            "class" : "hudson.scm.SubversionSCM",
            "plugin" : "[email protected]",
        })

        locations = ElementTree.SubElement(scm, "locations")
        location = ElementTree.SubElement(locations,
            "hudson.scm.SubversionSCM_-ModuleLocation")

        url = self.__url
        if self.__revision:
            url += ( "@" + str(self.__revision) )

        ElementTree.SubElement(location, "remote").text = url
        credentialsId = ElementTree.SubElement(location, "credentialsId")
        if credentials: credentialsId.text = credentials
        ElementTree.SubElement(location, "local").text = (
            os.path.normpath(os.path.join(workPath, self.__dir)) )
        ElementTree.SubElement(location, "depthOption").text = "infinity"
        ElementTree.SubElement(location, "ignoreExternalsOption").text = "true"

        ElementTree.SubElement(scm, "excludedRegions")
        ElementTree.SubElement(scm, "includedRegions")
        ElementTree.SubElement(scm, "excludedUsers")
        ElementTree.SubElement(scm, "excludedRevprop")
        ElementTree.SubElement(scm, "excludedCommitMessages")
        ElementTree.SubElement(scm, "workspaceUpdater",
            attrib={"class":"hudson.scm.subversion.UpdateUpdater"})
        ElementTree.SubElement(scm, "ignoreDirPropChanges").text = "false"
        ElementTree.SubElement(scm, "filterChangelog").text = "false"

        return scm

    def getDirectory(self):
        return self.__dir

    def isDeterministic(self):
        return str(self.__revision).isnumeric()

    def hasJenkinsPlugin(self):
        return True

    def callSubversion(self, workspacePath, *args):
        cmdLine = ['svn']
        cmdLine.extend(args)
        cwd = os.path.join(workspacePath, self.__dir)
        try:
            output = subprocess.check_output(cmdLine, cwd=cwd,
                universal_newlines=True, stderr=subprocess.DEVNULL)
        except subprocess.CalledProcessError as e:
            raise BuildError("svn error:\n Directory: '{}'\n Command: '{}'\n'{}'".format(
                cwd, " ".join(cmdLine), e.output.rstrip()))
        except OSError as e:
            raise BuildError("Error calling svn: " + str(e))
        return output.strip()

    def status(self, workspacePath):
        status = ScmStatus()
        try:
            output = self.callSubversion(workspacePath, 'status')
            if output:
                status.add(ScmTaint.modified, joinLines("> modified:", indent(output, '   ')))

            output = self.callSubversion(workspacePath, 'info', '--xml')
            info = ElementTree.fromstring(output)
            entry = info.find('entry')
            url = entry.find('url').text
            revision = entry.attrib['revision']

            if self.__url != url:
                status.add(ScmTaint.switched,
                    "> URL: configured: '{}', actual: '{}'".format(self.__url, url))
            if self.__revision is not None and int(revision) != int(self.__revision):
                status.add(ScmTaint.switched,
                    "> revision: configured: {}, actual: {}".format(self.__revision, revision))

        except BuildError as e:
            status.add(ScmTaint.error, e.slogan)

        return status

    def getAuditSpec(self):
        return ("svn", self.__dir, {})
Exemple #5
0
class FlatpakPackagesPlugin(_AbstractFlatpakPlugin):
    key = 'flatpak-packages'
    schema = [
        str, {
            schema.Optional('package'): str,
            schema.Optional('type'): schema.Or('bundle', 'ref', 'app',
                                               'runtime'),
            schema.Optional('target'): schema.Or('system', 'user'),
            schema.Optional('remote'): str
        }
    ]

    @staticmethod
    def _download_package(url: str, suffix: str = '.flatpakref') -> str:
        file = tempfile.NamedTemporaryFile('wb',
                                           prefix='download_',
                                           suffix=suffix,
                                           delete=False)
        with urllib.request.urlopen(url) as response, open(file.name,
                                                           'wb') as out_file:
            shutil.copyfileobj(response, out_file)

        return file.name

    @staticmethod
    def _get_flatpakref_application_name(filepath: str) -> str:
        with open(filepath) as file:
            text = file.read()
            for line in text.splitlines():
                if line.startswith('Name='):
                    return line.split('=')[1].strip()
        raise Exception('Error parsing flatpakref file: {}'.format(filepath))

    def _check_is_application_installed(self, name: str) -> bool:
        output = self.run_command('flatpak', 'list')
        return output.find(name) != -1

    def _install_flatpak_package(self,
                                 app: str,
                                 remote: str = None,
                                 type_: str = None,
                                 target: str = 'system'):
        # Flatpak considers it an error to install already
        # installed applications
        # Workaround by using "--reinstall" which uninstalls
        # the application first if it is already installed.
        # FIXME: At least with .flatpak bundles, the "--reinstall"
        # option doesn't seem to fix the issue currently.
        cmd = ['flatpak', 'install', '--reinstall', '-y']
        if target == 'user':
            cmd += ['--user']
        else:
            cmd += ['--system']
        if type_ is not None:
            if type_ == 'ref':
                cmd += ['--from']
            elif type_ == 'bundle':
                cmd += ['--bundle']
            elif type_ == 'runtime':
                cmd += ['--runtime']
            elif type_ == 'app':
                cmd += ['--app']
        if remote is not None:
            cmd += [remote]
        cmd += [app]
        if target == 'system':
            self.run_command_sudo(*cmd)
        else:
            self.run_command(*cmd)

    def perform(self):
        # Install flatpak if not already installed
        if not self._check_is_flatpak_installed():
            self._install_flatpak()

        assert self._check_is_flatpak_installed()

        for flatpak in self.config:
            target = 'system'
            type_ = None
            remote = None

            if isinstance(flatpak, dict):
                package = flatpak['package']
                if 'target' in flatpak:
                    target = flatpak['target']
                if 'type' in flatpak:
                    type_ = flatpak['type']
                if 'remote' in flatpak:
                    remote = flatpak['remote']
            else:
                package = flatpak

            # Determine type based on package argument
            if type_ is None:
                if package[package.rfind('.') + 1:] == 'flatpakref':
                    type_ = 'ref'
                elif package[package.rfind('.') + 1:] == 'flatpak':
                    type_ = 'bundle'
                else:
                    type_ = 'app'

            # Download remote bundles or refs
            # This is required for bundles because it is not currently supported
            # by Flatpak to download remote .flatpak bundles. We also download
            # remote .flatpakref files to check whether the application is already
            # installed before attempting an installation.
            if type_ in ('ref', 'bundle'):
                is_remote_package = bool(urllib.parse.urlparse(package).scheme)
                if is_remote_package:
                    package = self._download_package(
                        package,
                        suffix='.flatpakref' if type_ == 'ref' else '.flatpak')
                else:
                    # Consider the package to be a local file,
                    # therefore expand the path:
                    package = self._expand_path(package)

            # In case of .flatpakref files, we will only perform
            # the installation if the application is not already installed
            if type_ == 'ref':
                # Check if the .flatpakref is remote
                app_name = self._get_flatpakref_application_name(package)
                if self._check_is_application_installed(app_name):
                    continue

            # NOTE: Doesn't check whether the application is
            # already installed or not. Will perform a reinstall
            # if the application is already installed.
            self._install_flatpak_package(package, remote, type_, target)
Exemple #6
0
class GitAudit(ScmAudit):

    SCHEMA = schema.Schema({
        'type':
        'git',
        'dir':
        str,
        'remotes': {
            schema.Optional(str): str
        },
        'commit':
        str,
        'description':
        str,
        'dirty':
        bool,
        schema.Optional('submodules'):
        schema.Or(bool, [str]),
        schema.Optional('recurseSubmodules'):
        bool,
    })

    async def _scanDir(self, workspace, dir, extra):
        self.__dir = dir
        self.__submodules = extra.get('submodules', False)
        self.__recurseSubmodules = extra.get('recurseSubmodules', False)
        dir = os.path.join(workspace, dir)
        try:
            remotes = (await check_output(["git", "remote", "-v"],
                                          cwd=dir,
                                          universal_newlines=True)).split("\n")
            remotes = (r[:-8].split("\t") for r in remotes
                       if r.endswith("(fetch)"))
            self.__remotes = {remote: url for (remote, url) in remotes}

            self.__commit = (await
                             check_output(["git", "rev-parse", "HEAD"],
                                          cwd=dir,
                                          universal_newlines=True)).strip()
            self.__description = (await check_output(
                ["git", "describe", "--always", "--dirty=-dirty"],
                cwd=dir,
                universal_newlines=True)).strip()
            subDirty = await self.__scanSubmodules(dir, self.__submodules)
            self.__dirty = subDirty or self.__description.endswith("-dirty")
        except subprocess.CalledProcessError as e:
            raise BuildError("Git audit failed: " + str(e))
        except OSError as e:
            raise BuildError("Error calling git: " + str(e))

    async def __scanSubmodules(self, dir, shouldExist, base="."):
        if not os.path.exists(os.path.join(dir, base, ".gitmodules")):
            return False

        # List all paths as per .gitmodules. This gives us the list of all
        # known submodules.
        try:
            allPaths = await check_output([
                "git", "-C", base, "config", "-f", ".gitmodules", "-z",
                "--get-regexp", "path"
            ],
                                          cwd=dir,
                                          universal_newlines=True)
        except subprocess.CalledProcessError:
            allPaths = ""  # No key found in file. Probably empty
        allPaths = [p.split("\n")[1] for p in allPaths.split("\0") if p]
        if not allPaths:
            return False

        # Fetch the respecive commits as per git ls-tree
        allPaths = await check_output(
            ["git", "-C", base, "ls-tree", "-z", "HEAD"] + allPaths,
            cwd=dir,
            universal_newlines=True)
        allPaths = {
            path: attribs.split(' ')[2]
            for attribs, path in (p.split('\t') for p in allPaths.split('\0')
                                  if p) if attribs.split(' ')[1] == "commit"
        }

        # Normalize subset of submodules
        if isinstance(shouldExist, list):
            shouldExist = set(normPath(p) for p in shouldExist)
        elif shouldExist:
            shouldExist = set(normPath(p) for p in allPaths.keys())
        else:
            shouldExist = set()

        # Check each submodule for their commit and modifications.
        # Unconditionally recurse to even see if something is there even tough
        # it shouldn't. Bail out on first modification.
        for path, commit in sorted(allPaths.items()):
            subPath = os.path.join(base, path)
            subShouldExist = normPath(path) in shouldExist
            if not os.path.exists(os.path.join(dir, subPath, ".git")):
                if subShouldExist:
                    return True  # submodule is missing
                elif not dirIsEmpty(os.path.join(dir, subPath)):
                    return True  # something in submodule which should not be there
                else:
                    continue
            elif not subShouldExist:
                # submodule checked out even though it shouldn't
                return True

            realCommit = (await check_output(
                ["git", "-C", subPath, "rev-parse", "HEAD"],
                cwd=dir,
                universal_newlines=True)).strip()
            if commit != realCommit:
                return True  # different commit checked out

            proc = await run(
                ["git", "-C", subPath, "diff-index", "--quiet", "HEAD", "--"],
                cwd=dir)
            if proc.returncode != 0:
                return True  # dirty

            if await self.__scanSubmodules(dir, self.__recurseSubmodules,
                                           subPath):
                return True  # sub-submodule modified

        return False

    def _load(self, data):
        self.__dir = data["dir"]
        self.__remotes = data["remotes"]
        self.__commit = data["commit"]
        self.__description = data["description"]
        self.__dirty = data["dirty"]
        self.__submodules = data.get("submodules", False)
        self.__recurseSubmodules = data.get("recurseSubmodules", False)

    def dump(self):
        ret = {
            "type": "git",
            "dir": self.__dir,
            "remotes": self.__remotes,
            "commit": self.__commit,
            "description": self.__description,
            "dirty": self.__dirty,
        }
        if self.__submodules:
            ret["submodules"] = self.__submodules
            if self.__recurseSubmodules:
                ret["recurseSubmodules"] = True
        return ret

    def getStatusLine(self):
        return self.__description
Exemple #7
0
class Stage(object):
    STAGE_FILE = 'Dvcfile'
    STAGE_FILE_SUFFIX = '.dvc'

    PARAM_CMD = 'cmd'
    PARAM_DEPS = 'deps'
    PARAM_OUTS = 'outs'

    SCHEMA = {
        schema.Optional(PARAM_CMD):
        schema.Or(str, None),
        schema.Optional(PARAM_DEPS):
        schema.Or(schema.And(list, schema.Schema([Dependency.SCHEMA])), None),
        schema.Optional(PARAM_OUTS):
        schema.Or(schema.And(list, schema.Schema([Output.SCHEMA])), None),
    }

    def __init__(self,
                 project,
                 path=None,
                 cmd=None,
                 cwd=None,
                 deps=[],
                 outs=[]):
        self.project = project
        self.path = path
        self.cmd = cmd
        self.cwd = cwd
        self.outs = outs
        self.deps = deps

    @property
    def relpath(self):
        return os.path.relpath(self.path)

    @property
    def is_data_source(self):
        return self.cmd is None

    @staticmethod
    def is_stage_file(path):
        if not os.path.isfile(path):
            return False

        if not path.endswith(Stage.STAGE_FILE_SUFFIX) and os.path.basename(
                path) != Stage.STAGE_FILE:
            return False

        return True

    def changed(self):
        ret = False
        for entry in itertools.chain(self.outs, self.deps):
            if entry.changed():
                ret = True

        if ret:
            self.project.logger.debug(u'Dvc file \'{}\' changed'.format(
                self.relpath))
        else:
            self.project.logger.debug(u'Dvc file \'{}\' didn\'t change'.format(
                self.relpath))

        return ret

    def remove_outs(self):
        for out in self.outs:
            out.remove()
            if out.use_cache:
                self.project.scm.ignore_remove(out.path)

    def remove(self):
        self.remove_outs()
        os.unlink(self.path)

    def reproduce(self, force=False):
        if not self.changed() and not force:
            return None

        if self.cmd:
            # Removing outputs only if we actually have command to reproduce
            self.remove_outs()

        self.run()

        return self

    @staticmethod
    def validate(d):
        try:
            schema.Schema(Stage.SCHEMA).validate(d)
        except schema.SchemaError as exc:
            Logger.debug(str(exc))
            raise StageFileFormatError()

    @staticmethod
    def loadd(project, d, path):
        Stage.validate(d)

        path = os.path.abspath(path)
        cwd = os.path.dirname(path)
        cmd = d.get(Stage.PARAM_CMD, None)
        deps = Dependency.loadd_from(project,
                                     d.get(Stage.PARAM_DEPS, []),
                                     cwd=cwd)
        outs = Output.loadd_from(project, d.get(Stage.PARAM_OUTS, []), cwd=cwd)

        return Stage(project=project,
                     path=path,
                     cmd=cmd,
                     cwd=cwd,
                     deps=deps,
                     outs=outs)

    @staticmethod
    def loads(project=None,
              cmd=None,
              deps=[],
              outs=[],
              outs_no_cache=[],
              fname=None,
              cwd=os.curdir):
        cwd = os.path.abspath(cwd)
        path = os.path.join(cwd, fname)
        outputs = Output.loads_from(project, outs, use_cache=True, cwd=cwd)
        outputs += Output.loads_from(project,
                                     outs_no_cache,
                                     use_cache=False,
                                     cwd=cwd)
        dependencies = Dependency.loads_from(project, deps, cwd=cwd)

        return Stage(project=project,
                     path=path,
                     cmd=cmd,
                     cwd=cwd,
                     outs=outputs,
                     deps=dependencies)

    @staticmethod
    def load(project, fname):
        with open(fname, 'r') as fd:
            return Stage.loadd(project, yaml.safe_load(fd), fname)

    def dumpd(self):
        deps = [x.dumpd(self.cwd) for x in self.deps]
        outs = [x.dumpd(self.cwd) for x in self.outs]

        ret = {}
        if self.cmd != None:
            ret[Stage.PARAM_CMD] = self.cmd

        if len(deps):
            ret[Stage.PARAM_DEPS] = deps

        if len(outs):
            ret[Stage.PARAM_OUTS] = outs

        return ret

    def dump(self, fname=None):
        if not fname:
            fname = self.path

        with open(fname, 'w') as fd:
            yaml.safe_dump(self.dumpd(), fd, default_flow_style=False)

    def save(self):
        for dep in self.deps:
            dep.save()

        for out in self.outs:
            out.save()
            if out.use_cache:
                self.project.scm.ignore(out.path)

    def run(self):
        if not self.is_data_source:
            self.project.logger.info(u'Reproducing \'{}\':\n\t{}'.format(
                self.relpath, self.cmd))

            p = subprocess.Popen(self.cmd, cwd=self.cwd, shell=True)
            p.communicate()
            if p.returncode != 0:
                raise StageCmdFailedError(self)

            self.save()

            self.project.logger.debug(u'\'{}\' was reproduced'.format(
                self.relpath))
        else:
            self.project.logger.info(
                u'Verifying data sources in \'{}\''.format(self.relpath))
            self.check_missing_outputs()
            self.save()

    def check_missing_outputs(self):
        missing_outs = [
            out.rel_path for out in self.outs
            if not os.path.exists(out.rel_path)
        ]
        if missing_outs:
            raise MissingDataSource(missing_outs)

    def checkout(self):
        for out in self.outs:
            out.checkout()

    def _status(self, entries, name):
        ret = {}

        for entry in entries:
            ret.update(entry.status())

        if ret:
            return {name: ret}

        return {}

    def status(self):
        ret = {}
        ret.update(self._status(self.deps, 'deps'))
        ret.update(self._status(self.outs, 'outs'))

        if ret:
            return {self.relpath: ret}

        return {}
Exemple #8
0
    async def track(self):
        """ Do the polling for finished jobs. """
        while True:
            # no lock, atomic, copy() because keys() returns an iterable view
            # instead of a fresh new list in python3
            running_jobs = self.running_jobs.copy().keys()

            # somewhat inefficient for the potential number of requests per
            # poll round but more efficient in that it does not download an
            # ever growing list of jobs using tasks/list. Might also take
            # quite a while to get to all jobs if retries happen on each
            # request.
            # A call to get data about multiple tasks in one go would be nice
            # here. tasks/list could be used with the minimum job number as
            # offset and spread between highest and lowest job id as limit *if*
            # its output was sorted by job ID. Apparently limit and offset are
            # only meant to iterate over the job list in blocks but not to
            # return data about a specific range of job IDs from that list.
            for job_id in running_jobs:
                # report is an extended version of job status, so we can
                # optimise the number of requests here
                request_url = urllib.parse.urljoin(
                    self.url, f'/api/job/{job_id}/report')
                try:
                    async for attempt in self.retrier:
                        with attempt:
                            async with self.session.get(
                                    request_url) as response:
                                json_resp = await response.json()
                                cortexjob = schema.Schema(
                                    {
                                        'status':
                                        str,
                                        # only to make sure the key is there,
                                        # is status string while not finished,
                                        # dict afterwards
                                        'report':
                                        schema.Or(
                                            {}, str, ignore_extra_keys=True),
                                    },
                                    ignore_extra_keys=True).validate(json_resp)
                except (ValueError, schema.SchemaError) as error:
                    logger.error('Invalid JSON in job status: %s', error)
                    continue
                except tenacity.RetryError as error:
                    logger.error('Querying Cortex job status failed: %s',
                                 error)
                    continue

                job_status = cortexjob['status']
                if job_status in ['Success']:
                    # pass original report element from json response for
                    # validation and storage
                    await self.resubmit_with_analyzer_report(
                        job_id, json_resp['report'])
                    continue

                if job_status in ['Failure', 'Deleted']:
                    logger.warning(
                        "Dropping job %s because it has failed "
                        "in Cortex", job_id)
                    await self.resubmit_as_failed(job_id)
                    continue

                # drop jobs which have been running for too long. This is
                # mainly to prevent accumulation of jobs in our job list which
                # will never finish. We still want to wait for jobs to finish
                # even though our client might not be interested any more so
                # that we have the result cached for the next time we get the
                # same sample.
                await self.resubmit_as_failed_if_too_old(
                    job_id, self.max_job_age)

            await asyncio.sleep(self.poll_interval)

        logger.debug("Cortex job tracker shut down.")
Exemple #9
0
        Sync the docker daemon clock up to current clock.
        '''
        with self.logger.info('sync_clock'):
            cmd = 'date +%s -s @' + str(int(time.time()))
            self.executor.docker.run(
                CENTOS, cmd, rm=None, privileged=None
            ).interactive()


_EMPTY = attr.make_class('Empty', [])()  # represents a value that should be dropped from the YAML
_DEPENDS_ON_ALL = '*'  # special value for depends_on for connecting to everything else
# (mainly for jupyter notebook or similar debugging tool)
SK_CENTRAL_REMOTE = 'USER@GITLAB_HOSTNAME:REPO'

_ENV_SCHEMA = schema.Or(
    [schema.Regex('[_a-zA-Z][_a-zA-Z0-9]*(=[^=]+)?')],
    {schema.Regex('[_a-zA-Z][_a-zA-Z0-9]*'): schema.Or(str, None)})

_PORT_SCHEMA = schema.Regex(r'\d+(\:\d+)?')

'''
Environment variables in docker-compose:
https://docs.docker.com/compose/compose-file/#/environment

environment:
  RACK_ENV: development
  SHOW: 'true'
  SESSION_SECRET:

environment:
  - RACK_ENV=development
Exemple #10
0
        if min_count == max_count:
            raise ValueError(
                f'Expecting a sequence of {min_count} values ({type_})'
            )

        raise ValueError(
            f'Expecting a sequence betweeen {min_count} and {max_count} values'
            f' ({type_})'
        )

    return validate


common_schema = {
    Optional('tags'): {schema.Or(*tags.get_valid_tags())},
}


_default_bitmask_style = dict(shape='rectangle', on_color='green',
                              off_color='gray')

schema_by_category = {
    'command': schema.Schema({
        'variety': schema.Or(*varieties_by_category['command']),
        Optional('value', default=1): schema.Or(float, int, str),
        Optional('enum_strings'): [str],
        Optional('enum_dict'): dict,
        **common_schema
    }),
Exemple #11
0
class FileInfoAnalyzerReport(CortexAnalyzerReport):
    """ Represents a Cortex FileInfo_8_0 analysis JSON report. """

    report_schema = schema.Schema({
        "summary": {
            "taxonomies": [
                schema.Schema(
                    {
                        "level": schema.Or("info", "malicious", "safe"),
                        "namespace": "FileInfo",
                        #    "predicate": str,
                        #    "value": str
                    },
                    ignore_extra_keys=True)
            ]
        },
        "full": {
            "results": [{
                "submodule_name":
                "Basic properties",
                "results": [
                    {
                        "submodule_section_header": "Hashes",
                        "submodule_section_content": {
                            "md5": schema.Regex(r'^[0-9a-z]{32}$'),
                            "sha1": schema.Regex(r'^[0-9a-z]{40}$'),
                            "sha256": schema.Regex(r'^[0-9a-z]{64}$'),
                            "ssdeep": schema.Regex(r'^[0-9A-Za-z:+/]*$'),
                        }
                    },
                    {
                        # We consume further structures submodule_sections and
                        # explicitly check the submodule_section_header to not
                        # be "Hashes" or it will accept "Hashes"-structures with
                        # malfarmed hashes.
                        "submodule_section_header":
                        schema.And(str, lambda s: s != "Hashes"),
                        "submodule_section_content":
                        schema.Schema({}, ignore_extra_keys=True)
                    },
                ],
                "summary": {
                    "taxonomies": [
                        schema.Schema(
                            {
                                "level": schema.Or("info", "malicious",
                                                   "safe"),
                                "namespace": "FileInfo",
                                #    "predicate": str,
                                #    "value": str
                            },
                            ignore_extra_keys=True)
                    ]
                }
            }]
        },
        "success": bool,
        "artifacts": CortexAnalyzerReport.report_schema_artifacts,
        "operations": []
    })

    def __init__(self, unvalidated_report=None):
        """
        @param report: hash with report data from Cortex FileInfo Analyzer
        """
        super().__init__(unvalidated_report)

        basic_properties = self.get_element_from_list_of_dicts(
            self.report.get('full', []).get('results', {}), 'submodule_name',
            'Basic properties').get('results', [])
        self._hashes = self.get_element_from_list_of_dicts(
            basic_properties, 'submodule_section_header',
            'Hashes').get('submodule_section_content', {})

    @property
    def sha256sum(self):
        """ Return the sha256 sum. """
        return self._hashes.get('sha256')

    @property
    def md5sum(self):
        """ Return the md5 sum. """
        return self._hashes.get('md5')

    @property
    def ssdeepsum(self):
        """ Return the ssdeep sum. """
        # TODO: think about if we want to compare ssdeep hashes
        return self._hashes.get('ssdeep')
class CaseTransformation(BaseTransformation):

    schema = schema.Schema({
        "field":
        str,
        "cond": [
            schema.Or(
                {
                    "case": str,
                    "operator": str,
                    "value": object,
                    "result": object
                },
                {
                    "case": str,
                    "result": object
                },
                only_one=False,
            )
        ],
    })

    @staticmethod
    def sort_by_case(condition):
        try:
            cond = condition["case"].lower()
            if cond == "when":
                return 0
            elif cond == "else":
                return 1
            else:
                return 2
        except:
            return 3

    def apply(self, row: Dict, **kwargs):
        field_value = row.get(self.args["field"], None)
        conditions = self.args["cond"]
        conditions.sort(key=self.sort_by_case, reverse=False)

        for cond in conditions:
            l_cond = cond["case"].lower()
            if l_cond == "when":
                operator = COMPARISON_OPERATORS_MAPPING.get(
                    cond["operator"], None)
                if operator is not None:
                    field_value = self.cast_field(field_value)
                    cond_value = self.cast_field(cond["value"])
                    if field_value is None and cond_value is None:
                        if cond["operator"] == "=":
                            return cond["result"]
                        else:
                            continue

                    if field_value is None and cond_value is not None:
                        continue

                    if field_value is not None and cond_value is None:
                        continue
                    try:
                        if operator(field_value, cond_value):
                            return cond["result"]
                    except Exception as e:
                        continue

            elif l_cond == "else":
                return cond["result"]
Exemple #13
0
def default_if_none(sub_schema, default_factory):
    'Coerce Nones to a default value.'
    return schema.Or(
        schema.And(None, schema.Use(lambda a: default_factory())),
        sub_schema)
Exemple #14
0
def none_or(sub_schema):
    'allow None or sub_schema'
    return schema.Or(None, sub_schema)
Exemple #15
0
class DownloadInfo:  #pylint: disable=too-few-public-methods
    """Representation of an downloads.ini file for downloading files"""

    _hashes = ('md5', 'sha1', 'sha256', 'sha512')
    hash_url_delimiter = '|'
    _nonempty_keys = ('url', 'download_filename')
    _optional_keys = (
        'version',
        'strip_leading_dirs',
    )
    _passthrough_properties = (*_nonempty_keys, *_optional_keys, 'extractor',
                               'output_path')
    _ini_vars = {
        '_chromium_version': get_chromium_version(),
    }

    @staticmethod
    def _is_hash_url(value):
        return value.count(
            DownloadInfo.hash_url_delimiter) == 2 and value.split(
                DownloadInfo.hash_url_delimiter)[0] in iter(HashesURLEnum)

    _schema = schema.Schema({
        schema.Optional(schema.And(str, len)): {
            **{x: schema.And(str, len)
               for x in _nonempty_keys},
            'output_path': (lambda x: str(Path(x).relative_to(''))),
            **{
                schema.Optional(x): schema.And(str, len)
                for x in _optional_keys
            },
            schema.Optional('extractor'):
            schema.Or(ExtractorEnum.TAR, ExtractorEnum.SEVENZIP,
                      ExtractorEnum.WINRAR),
            schema.Optional(schema.Or(*_hashes)): schema.And(str, len),
            schema.Optional('hash_url'):
            lambda x: DownloadInfo._is_hash_url(x),  #pylint: disable=unnecessary-lambda
        }
    })

    class _DownloadsProperties:  #pylint: disable=too-few-public-methods
        def __init__(self, section_dict, passthrough_properties, hashes):
            self._section_dict = section_dict
            self._passthrough_properties = passthrough_properties
            self._hashes = hashes

        def has_hash_url(self):
            """
            Returns a boolean indicating whether the current
            download has a hash URL"""
            return 'hash_url' in self._section_dict

        def __getattr__(self, name):
            if name in self._passthrough_properties:
                return self._section_dict.get(name, fallback=None)
            if name == 'hashes':
                hashes_dict = dict()
                for hash_name in (*self._hashes, 'hash_url'):
                    value = self._section_dict.get(hash_name, fallback=None)
                    if value:
                        if hash_name == 'hash_url':
                            value = value.split(
                                DownloadInfo.hash_url_delimiter)
                        hashes_dict[hash_name] = value
                return hashes_dict
            raise AttributeError('"{}" has no attribute "{}"'.format(
                type(self).__name__, name))

    def _parse_data(self, path):
        """
        Parses an INI file located at path

        Raises schema.SchemaError if validation fails
        """
        def _section_generator(data):
            for section in data:
                if section == configparser.DEFAULTSECT:
                    continue
                yield section, dict(
                    filter(lambda x: x[0] not in self._ini_vars,
                           data.items(section)))

        new_data = configparser.ConfigParser(defaults=self._ini_vars)
        with path.open(encoding=ENCODING) as ini_file:
            new_data.read_file(ini_file, source=str(path))
        try:
            self._schema.validate(dict(_section_generator(new_data)))
        except schema.SchemaError as exc:
            get_logger().error(
                'downloads.ini failed schema validation (located in %s)', path)
            raise exc
        return new_data

    def __init__(self, ini_paths):
        """Reads an iterable of pathlib.Path to download.ini files"""
        self._data = configparser.ConfigParser()
        for path in ini_paths:
            self._data.read_dict(self._parse_data(path))

    def __getitem__(self, section):
        """
        Returns an object with keys as attributes and
        values already pre-processed strings
        """
        return self._DownloadsProperties(self._data[section],
                                         self._passthrough_properties,
                                         self._hashes)

    def __contains__(self, item):
        """
        Returns True if item is a name of a section; False otherwise.
        """
        return self._data.has_section(item)

    def __iter__(self):
        """Returns an iterator over the section names"""
        return iter(self._data.sections())

    def properties_iter(self):
        """Iterator for the download properties sorted by output path"""
        return sorted(map(lambda x: (x, self[x]), self),
                      key=(lambda x: str(Path(x[1].output_path))))
Exemple #16
0
}

SCHEMA = {
    OutputBase.PARAM_PATH:
    str,

    # NOTE: currently there are only 3 possible checksum names:
    #
    #    1) md5 (LOCAL, SSH, GS);
    #    2) etag (S3);
    #    3) checksum (HDFS);
    #
    # so when a few types of outputs share the same name, we only need
    # specify it once.
    schema.Optional(RemoteLOCAL.PARAM_CHECKSUM):
    schema.Or(str, None),
    schema.Optional(RemoteS3.PARAM_CHECKSUM):
    schema.Or(str, None),
    schema.Optional(RemoteHDFS.PARAM_CHECKSUM):
    schema.Or(str, None),
    schema.Optional(OutputBase.PARAM_CACHE):
    bool,
    schema.Optional(OutputBase.PARAM_METRIC):
    OutputBase.METRIC_SCHEMA,
}


def _get(stage, p, info, cache, metric):
    parsed = urlparse(p)
    if parsed.scheme == 'remote':
        name = Config.SECTION_REMOTE_FMT.format(parsed.netloc)
Exemple #17
0
class GitScm(Scm):

    SCHEMA = schema.Schema({
        'scm':
        'git',
        'url':
        str,
        schema.Optional('dir'):
        str,
        schema.Optional('if'):
        schema.Or(str, IfExpression),
        schema.Optional('branch'):
        str,
        schema.Optional('tag'):
        str,
        schema.Optional('commit'):
        str,
        schema.Optional('rev'):
        str,
        schema.Optional(schema.Regex('^remote-.*')):
        str,
        schema.Optional('sslVerify'):
        bool,
        schema.Optional('singleBranch'):
        bool,
        schema.Optional('shallow'):
        schema.Or(int, str),
        schema.Optional('submodules'):
        schema.Or(bool, [str]),
        schema.Optional('recurseSubmodules'):
        bool,
        schema.Optional('shallowSubmodules'):
        bool,
    })
    REMOTE_PREFIX = "remote-"

    def __init__(self, spec, overrides=[], secureSSL=None, stripUser=None):
        super().__init__(spec, overrides)
        self.__url = spec["url"]
        self.__branch = None
        self.__tag = None
        self.__commit = None
        self.__remotes = {}
        if "rev" in spec:
            rev = spec["rev"]
            if rev.startswith("refs/heads/"):
                self.__branch = rev[11:]
            elif rev.startswith("refs/tags/"):
                self.__tag = rev[10:]
            elif len(rev) == 40:
                self.__commit = rev
            else:
                raise ParseError("Invalid rev format: " + rev)
        self.__branch = spec.get("branch", self.__branch)
        self.__tag = spec.get("tag", self.__tag)
        self.__commit = spec.get("commit", self.__commit)
        if self.__commit:
            # validate commit
            if re.match("^[0-9a-f]{40}$", self.__commit) is None:
                raise ParseError("Invalid commit id: " + str(self.__commit))
        elif not self.__branch and not self.__tag:
            # nothing secified at all -> master branch
            self.__branch = "master"
        self.__dir = spec.get("dir", ".")
        # convert remotes into separate dictionary
        for key, val in spec.items():
            if key.startswith(GitScm.REMOTE_PREFIX):
                stripped_key = key[len(GitScm.REMOTE_PREFIX):]  # remove prefix
                if stripped_key == "origin":
                    raise ParseError("Invalid remote name: " + stripped_key)
                self.__remotes.update({stripped_key: val})
        self.__sslVerify = spec.get('sslVerify', secureSSL)
        self.__singleBranch = spec.get('singleBranch')
        self.__shallow = spec.get('shallow')
        self.__submodules = spec.get('submodules', False)
        self.__recurseSubmodules = spec.get('recurseSubmodules', False)
        self.__shallowSubmodules = spec.get('shallowSubmodules', True)
        self.__stripUser = stripUser

    def getProperties(self, isJenkins):
        properties = super().getProperties(isJenkins)
        properties.update({
            'scm':
            'git',
            'url':
            self.__url,
            'branch':
            self.__branch,
            'tag':
            self.__tag,
            'commit':
            self.__commit,
            'dir':
            self.__dir,
            'rev': (self.__commit if self.__commit else
                    (("refs/tags/" + self.__tag) if self.__tag else
                     ("refs/heads/" + self.__branch))),
            'sslVerify':
            self.__sslVerify,
            'singleBranch':
            self.__singleBranch,
            'shallow':
            self.__shallow,
            'submodules':
            self.__submodules,
            'recurseSubmodules':
            self.__recurseSubmodules,
            'shallowSubmodules':
            self.__shallowSubmodules,
        })
        for key, val in self.__remotes.items():
            properties.update({GitScm.REMOTE_PREFIX + key: val})
        return properties

    async def invoke(self, invoker, switch=False):
        # make sure the git directory exists
        if not os.path.isdir(invoker.joinPath(self.__dir, ".git")):
            await invoker.checkCommand(["git", "init", self.__dir])

        # Shallow implies singleBranch
        if self.__singleBranch is None:
            singleBranch = self.__shallow is not None
        else:
            singleBranch = self.__singleBranch
        singleBranch = singleBranch and (self.__branch is not None)

        # setup and update remotes
        remotes = {"origin": self.__url}
        remotes.update(self.__remotes)
        existingRemotes = await invoker.checkOutputCommand(["git", "remote"],
                                                           cwd=self.__dir)
        for remote in existingRemotes.split("\n"):
            if remote in remotes:
                cfgUrl = remotes[remote]
                realUrl = await invoker.checkOutputCommand(
                    ["git", "ls-remote", "--get-url", remote], cwd=self.__dir)
                if cfgUrl != realUrl:
                    await invoker.checkCommand(
                        ["git", "remote", "set-url", remote, cfgUrl],
                        cwd=self.__dir)
                del remotes[remote]

        # add remaining (new) remotes
        for remote, url in remotes.items():
            addCmd = ["git", "remote", "add", remote, url]
            if singleBranch: addCmd += ["-t", self.__branch]
            await invoker.checkCommand(addCmd, cwd=self.__dir)

        # relax security if requested
        if not self.__sslVerify:
            await invoker.checkCommand(
                ["git", "config", "http.sslVerify", "false"], cwd=self.__dir)

        # Calculate refspec that is used internally. For the user a regular
        # refspec is kept in the git config.

        # Base fetch command with shallow support
        fetchCmd = ["git", "-c", "submodule.recurse=0", "fetch", "-p"]
        if isinstance(self.__shallow, int):
            fetchCmd.append("--depth={}".format(self.__shallow))
        elif isinstance(self.__shallow, str):
            fetchCmd.append("--shallow-since={}".format(self.__shallow))
        fetchCmd.append("origin")

        # Calculate appropriate refspec (all/singleBranch/tag)
        if singleBranch:
            fetchCmd += [
                "+refs/heads/{0}:refs/remotes/origin/{0}".format(self.__branch)
            ]
        else:
            fetchCmd += ["+refs/heads/*:refs/remotes/origin/*"]
        if self.__tag:
            fetchCmd.append("refs/tags/{0}:refs/tags/{0}".format(self.__tag))

        # do the checkout
        if self.__tag or self.__commit:
            await self.__checkoutTag(invoker, fetchCmd, switch)
        else:
            await self.__checkoutBranch(invoker, fetchCmd, switch)

    async def __checkoutTag(self, invoker, fetchCmd, switch):
        # checkout only if HEAD is invalid
        head = await invoker.callCommand(
            ["git", "rev-parse", "--verify", "-q", "HEAD"],
            stdout=False,
            cwd=self.__dir)
        if head or switch:
            await invoker.checkCommand(fetchCmd, cwd=self.__dir)
            await invoker.checkCommand([
                "git", "checkout", "-q", "--no-recurse-submodules",
                self.__commit if self.__commit else "tags/" + self.__tag
            ],
                                       cwd=self.__dir)
            # FIXME: will not be called again if interrupted!
            await self.__checkoutSubmodules(invoker)

    async def __checkoutBranch(self, invoker, fetchCmd, switch):
        await invoker.checkCommand(fetchCmd, cwd=self.__dir)
        if await invoker.callCommand(
            ["git", "rev-parse", "--verify", "-q", "HEAD"],
                stdout=False,
                cwd=self.__dir):
            # checkout only if HEAD is invalid
            await invoker.checkCommand([
                "git", "checkout", "--no-recurse-submodules", "-b",
                self.__branch, "remotes/origin/" + self.__branch
            ],
                                       cwd=self.__dir)
            await self.__checkoutSubmodules(invoker)
        elif switch:
            # We're switching the ref. There we will actively change the branch which
            # is normally forbidden.
            assert not self.__submodules
            if await invoker.callCommand([
                    "git", "show-ref", "-q", "--verify",
                    "refs/heads/" + self.__branch
            ]):
                # Branch does not exist. Create and checkout.
                await invoker.checkCommand([
                    "git", "checkout", "--no-recurse-submodules", "-b",
                    self.__branch, "remotes/origin/" + self.__branch
                ],
                                           cwd=self.__dir)
            else:
                # Branch exists already. Checkout and fast forward...
                await invoker.checkCommand([
                    "git", "checkout", "--no-recurse-submodules", self.__branch
                ],
                                           cwd=self.__dir)
                await invoker.checkCommand([
                    "git", "-c", "submodule.recurse=0", "merge", "--ff-only",
                    "refs/remotes/origin/" + self.__branch
                ],
                                           cwd=self.__dir)
        elif (await invoker.checkOutputCommand(
            ["git", "rev-parse", "--abbrev-ref", "HEAD"],
                cwd=self.__dir)) == self.__branch:
            # pull only if on original branch
            preUpdate = await self.__updateSubmodulesPre(invoker)
            await invoker.checkCommand([
                "git", "-c", "submodule.recurse=0", "merge", "--ff-only",
                "refs/remotes/origin/" + self.__branch
            ],
                                       cwd=self.__dir)
            await self.__updateSubmodulesPost(invoker, preUpdate)
        else:
            invoker.warn("Not updating", self.__dir,
                         "because branch was changed manually...")

    async def __checkoutSubmodules(self, invoker):
        if not self.__submodules: return

        args = ["git", "submodule", "update", "--init"]
        if self.__shallowSubmodules:
            args += ["--depth", "1"]
        if self.__recurseSubmodules:
            args += ["--recursive"]
        if isinstance(self.__submodules, list):
            args.append("--")
            args.extend(self.__submodules)
        await invoker.checkCommand(args, cwd=self.__dir)

    async def __updateSubmodulesPre(self, invoker, base="."):
        """Query the status of the currently checked out submodules.

        Returns a map with the paths of all checked out submodules as keys.
        The value will be True if the submodule looks untouched by the user and
        is deemed to be updateable. If the value is False the submodule is
        different from the expected vanilla checkout state. The list may only
        be a sub-set of all known submodules.
        """

        if not self.__submodules:
            return {}

        # List all active and checked out submodules. This way we know the
        # state of all submodules and compare them later to the expected state.
        args = [
            "git", "-C", base, "submodule", "-q", "foreach",
            "printf '%s\\t%s\\n' \"$sm_path\" \"$(git rev-parse HEAD)\""
        ]
        checkedOut = await invoker.checkOutputCommand(args, cwd=self.__dir)
        checkedOut = {
            path: commit
            for path, commit in (line.split("\t")
                                 for line in checkedOut.split("\n") if line)
        }
        if not checkedOut: return {}

        # List commits from git tree of all paths for checked out submodules.
        # This is what should be checked out.
        args = ["git", "-C", base, "ls-tree", "-z", "HEAD"] + sorted(
            checkedOut.keys())
        allPaths = await invoker.checkOutputCommand(args, cwd=self.__dir)
        allPaths = {
            normPath(path): attribs.split(' ')[2]
            for attribs, path in (p.split('\t') for p in allPaths.split('\0')
                                  if p) if attribs.split(' ')[1] == "commit"
        }

        # Calculate which paths are in the right state. They must match the
        # commit and must be in detached HEAD state.
        ret = {}
        for path, commit in checkedOut.items():
            path = normPath(path)
            if allPaths.get(path) != commit:
                ret[path] = False
                continue

            code = await invoker.callCommand(
                ["git", "symbolic-ref", "-q", "HEAD"],
                cwd=os.path.join(self.__dir, base, path))
            if code == 0:
                ret[path] = False
                continue

            ret[path] = True

        return ret

    async def __updateSubmodulesPost(self, invoker, oldState, base="."):
        """Update all submodules that are safe.

        Will update all submodules that are either new or have not been touched
        by the user. This will be done recursively if that is enabled.
        """
        if not self.__submodules:
            return {}
        if not os.path.exists(invoker.joinPath(self.__dir, base,
                                               ".gitmodules")):
            return {}

        # Sync remote URLs into our config in case they were changed
        args = ["git", "-C", base, "submodule", "sync"]
        await invoker.checkCommand(args, cwd=self.__dir)

        # List all paths as per .gitmodules. This gives us the list of all
        # known submodules. Optionally restrict to user specified subset.
        args = [
            "git", "-C", base, "config", "-f", ".gitmodules", "-z",
            "--get-regexp", "path"
        ]
        finishedProc = await invoker.runCommand(args,
                                                cwd=self.__dir,
                                                stdout=True)
        allPaths = finishedProc.stdout.rstrip(
        ) if finishedProc.returncode == 0 else ""
        allPaths = [p.split("\n")[1] for p in allPaths.split("\0") if p]
        if isinstance(self.__submodules, list):
            subset = set(normPath(p) for p in self.__submodules)
            allPaths = [p for p in allPaths if normPath(p) in subset]

        # Update only new or unmodified paths
        updatePaths = [p for p in allPaths if oldState.get(normPath(p), True)]
        for p in sorted(set(allPaths) - set(updatePaths)):
            invoker.warn("Not updating submodule",
                         os.path.join(self.__dir, base, p),
                         "because its HEAD has been switched...")
        if not updatePaths:
            return

        # If we recurse into sub-submodules get their potential state up-front
        if self.__recurseSubmodules:
            # Explicit loop because of Python 3.5: "'await' expressions in
            # comprehensions are not supported".
            subMods = {}
            for p in updatePaths:
                subMods[p] = await self.__updateSubmodulesPre(
                    invoker, os.path.join(base, p))

        # Do the update of safe submodules
        args = ["git", "-C", base, "submodule", "update", "--init"]
        if self.__shallowSubmodules:
            args += ["--depth", "1"]
        args.append("--")
        args += updatePaths
        await invoker.checkCommand(args, cwd=self.__dir)

        # Update sub-submodules if requested
        if self.__recurseSubmodules:
            for p in updatePaths:
                await self.__updateSubmodulesPost(invoker, subMods[p],
                                                  os.path.join(base, p))

    def canSwitch(self, oldSpec):
        diff = self._diffSpec(oldSpec)

        # Filter irrelevant properties
        diff -= {"sslVerify", 'singleBranch', 'shallow', 'shallowSubmodules'}
        diff = set(prop for prop in diff if not prop.startswith("remote-"))

        # Enabling "submodules" and/or "recurseSubmodules" is ok. The
        # additional content will be checked out in invoke().
        if not oldSpec.get("submodules", False) and self.__submodules:
            diff.discard("submodules")
        if not oldSpec.get("recursiveSubmodules",
                           False) and self.__recurseSubmodules:
            diff.discard("recursiveSubmodules")

        # Without submodules the recursiveSubmodules property is irrelevant
        if not self.__submodules:
            diff.discard("recursiveSubmodules")

        # For the rest we can try a inline switch. Git does not handle
        # vanishing submodules well and neither do we. So if submodules are
        # enabled then we do not do an in-place update.
        if not diff:
            return True
        if not diff.issubset({"branch", "tag", "commit", "rev", "url"}):
            return False
        if self.__submodules:
            return False
        return True

    async def switch(self, invoker, oldSpec):
        # Try to checkout new state in old workspace. If something fails the
        # old attic logic will take over.
        await self.invoke(invoker, True)
        return True

    def asDigestScript(self):
        """Return forward compatible stable string describing this git module.

        The format is "url rev-spec dir" where rev-spec depends on the given reference.
        """
        if self.__stripUser:
            filt = removeUserFromUrl
        else:
            filt = lambda x: x

        if self.__commit:
            ret = self.__commit + " " + self.__dir
        elif self.__tag:
            ret = filt(
                self.__url) + " refs/tags/" + self.__tag + " " + self.__dir
        else:
            ret = filt(
                self.__url) + " refs/heads/" + self.__branch + " " + self.__dir

        if self.__submodules:
            ret += " submodules"
            if isinstance(self.__submodules, list):
                ret += "[{}]".format(",".join(self.__submodules))
            if self.__recurseSubmodules:
                ret += " recursive"

        return ret

    def asJenkins(self, workPath, credentials, options):
        scm = ElementTree.Element("scm",
                                  attrib={
                                      "class": "hudson.plugins.git.GitSCM",
                                      "plugin": "[email protected]",
                                  })
        ElementTree.SubElement(scm, "configVersion").text = "2"

        userconfigs = ElementTree.SubElement(
            ElementTree.SubElement(scm, "userRemoteConfigs"),
            "hudson.plugins.git.UserRemoteConfig")

        url = ElementTree.SubElement(userconfigs, "url")
        url.text = self.__url

        if credentials:
            credentialsId = ElementTree.SubElement(userconfigs,
                                                   "credentialsId")
            credentialsId.text = credentials

        branch = ElementTree.SubElement(
            ElementTree.SubElement(ElementTree.SubElement(scm, "branches"),
                                   "hudson.plugins.git.BranchSpec"), "name")
        if self.__commit:
            branch.text = self.__commit
        elif self.__tag:
            branch.text = "refs/tags/" + self.__tag
        else:
            branch.text = "refs/heads/" + self.__branch

        ElementTree.SubElement(
            scm, "doGenerateSubmoduleConfigurations").text = "false"
        ElementTree.SubElement(scm, "submoduleCfg", attrib={"class": "list"})

        extensions = ElementTree.SubElement(scm, "extensions")
        ElementTree.SubElement(
            ElementTree.SubElement(
                extensions,
                "hudson.plugins.git.extensions.impl.RelativeTargetDirectory"),
            "relativeTargetDir").text = os.path.normpath(
                os.path.join(workPath, self.__dir))
        # remove untracked files and stale branches
        ElementTree.SubElement(
            extensions, "hudson.plugins.git.extensions.impl.CleanCheckout")
        ElementTree.SubElement(
            extensions, "hudson.plugins.git.extensions.impl.PruneStaleBranch")
        # set git clone options
        if isinstance(self.__shallow, int):
            shallow = str(self.__shallow)
        else:
            shallow = options.get("scm.git.shallow")
        timeout = options.get("scm.git.timeout")
        if shallow is not None or timeout is not None:
            co = ElementTree.SubElement(
                extensions, "hudson.plugins.git.extensions.impl.CloneOption")
            if shallow is not None:
                try:
                    shallow = int(shallow)
                    if shallow < 0: raise ValueError()
                except ValueError:
                    raise BuildError("Invalid 'git.shallow' option: " +
                                     str(shallow))
                if shallow > 0:
                    ElementTree.SubElement(co, "shallow").text = "true"
                    ElementTree.SubElement(co, "noTags").text = "false"
                    ElementTree.SubElement(co, "reference").text = ""
                    ElementTree.SubElement(co, "depth").text = str(shallow)
                    ElementTree.SubElement(co, "honorRefspec").text = "false"

            if timeout is not None:
                try:
                    timeout = int(timeout)
                    if timeout < 0: raise ValueError()
                except ValueError:
                    raise BuildError("Invalid 'git.timeout' option: " +
                                     str(timeout))
                if timeout > 0:
                    ElementTree.SubElement(co, "timeout").text = str(timeout)

        if self.__submodules:
            assert isinstance(self.__submodules, bool)
            sub = ElementTree.SubElement(
                extensions,
                "hudson.plugins.git.extensions.impl.SubmoduleOption")
            if self.__recurseSubmodules:
                ElementTree.SubElement(sub,
                                       "recursiveSubmodules").text = "true"
            if self.__shallowSubmodules:
                ElementTree.SubElement(sub, "shallow").text = "true"
                ElementTree.SubElement(sub, "depth").text = "1"
            if timeout is not None:
                ElementTree.SubElement(sub, "timeout").text = str(timeout)

        if isTrue(options.get("scm.ignore-hooks", "0")):
            ElementTree.SubElement(
                extensions,
                "hudson.plugins.git.extensions.impl.IgnoreNotifyCommit")

        return scm

    def getDirectory(self):
        return self.__dir

    def isDeterministic(self):
        return bool(self.__tag) or bool(self.__commit)

    def hasJenkinsPlugin(self):
        # Cloning a subset of submodules is not supported by the Jenkins
        # git-plugin. Fall back to our implementation in this case.
        return not isinstance(self.__submodules, list)

    def callGit(self, workspacePath, *args, check=True):
        cmdLine = ['git']
        cmdLine.extend(args)
        cwd = os.path.join(workspacePath, self.__dir)
        try:
            output = subprocess.check_output(cmdLine,
                                             cwd=cwd,
                                             universal_newlines=True,
                                             stderr=subprocess.DEVNULL)
        except subprocess.CalledProcessError as e:
            if check:
                raise BuildError(
                    "git error:\n Directory: '{}'\n Command: '{}'\n'{}'".
                    format(cwd, " ".join(cmdLine), e.output.rstrip()))
            else:
                return ""
        except OSError as e:
            raise BuildError("Error calling git: " + str(e))
        return output.strip()

    def status(self, workspacePath):
        status = ScmStatus()
        try:
            onCorrectBranch = False
            onTag = False
            output = self.callGit(workspacePath, 'ls-remote', '--get-url')
            if output != self.__url:
                status.add(
                    ScmTaint.switched,
                    "> URL: configured: '{}', actual: '{}'".format(
                        self.__url, output))

            if self.__commit:
                output = self.callGit(workspacePath, 'rev-parse', 'HEAD')
                if output != self.__commit:
                    status.add(
                        ScmTaint.switched,
                        "> commit: configured: '{}', actual: '{}'".format(
                            self.__commit, output))
            elif self.__tag:
                output = self.callGit(workspacePath, 'tag', '--points-at',
                                      'HEAD').splitlines()
                if self.__tag not in output:
                    actual = ("'" + ", ".join(output) +
                              "'") if output else "not on any tag"
                    status.add(
                        ScmTaint.switched,
                        "> tag: configured: '{}', actual: {}".format(
                            self.__tag, actual))

                # Need to check if the tag still exists. Otherwise the "git
                # log" command at the end will trip.
                try:
                    self.callGit(workspacePath, 'rev-parse',
                                 'tags/' + self.__tag)
                    onTag = True
                except BuildError:
                    pass
            elif self.__branch:
                output = self.callGit(workspacePath, 'rev-parse',
                                      '--abbrev-ref', 'HEAD')
                if output != self.__branch:
                    status.add(
                        ScmTaint.switched,
                        "> branch: configured: '{}', actual: '{}'".format(
                            self.__branch, output))
                else:
                    output = self.callGit(
                        workspacePath, 'log', '--oneline',
                        'refs/remotes/origin/' + self.__branch + '..HEAD')
                    if output:
                        status.add(
                            ScmTaint.unpushed_main,
                            joinLines(
                                "> unpushed commits on {}:".format(
                                    self.__branch), indent(output, '   ')))
                    onCorrectBranch = True

            # Check for modifications wrt. checked out commit
            output = self.callGit(workspacePath, 'status', '--porcelain',
                                  '--ignore-submodules=all')
            if output:
                status.add(ScmTaint.modified,
                           joinLines("> modified:", indent(output, '   ')))

            # The following shows all unpushed commits reachable by any ref
            # (local branches, stash, detached HEAD, etc).
            # Exclude HEAD if the configured branch is checked out to not
            # double-count them. Does not mark the SCM as dirty. Exclude the
            # configured tag too if it is checked out. Otherwise the tag would
            # count as unpushed if it is not on a remote branch.
            what = ['--all', '--not', '--remotes']
            if onCorrectBranch: what.append('HEAD')
            if onTag: what.append("tags/" + self.__tag)
            output = self.callGit(workspacePath, 'log', '--oneline',
                                  '--decorate', *what)
            if output:
                status.add(
                    ScmTaint.unpushed_local,
                    joinLines("> unpushed local commits:",
                              indent(output, '   ')))

            # Dive into submodules
            self.__statusSubmodule(workspacePath, status, self.__submodules)

        except BuildError as e:
            status.add(ScmTaint.error, e.slogan)

        return status

    def __statusSubmodule(self, workspacePath, status, shouldExist, base="."):
        """Get the status of submodules and possibly sub-submodules.

        The regular "git status" command is not sufficient for our case. In
        case the submodule is not initialized "git status" will completely
        ignore it. Using "git submodule status" would help but it's output is
        not ment to be parsed by tools.

        So we first get the list of all possible submodules with their tracked
        commit. Then the actual commit is compared and any further
        modifications and unpuched commits are checked.
        """
        if not os.path.exists(os.path.join(workspacePath, base,
                                           ".gitmodules")):
            return

        # List all paths as per .gitmodules. This gives us the list of all
        # known submodules.
        allPaths = self.callGit(workspacePath,
                                "-C",
                                base,
                                "config",
                                "-f",
                                ".gitmodules",
                                "-z",
                                "--get-regexp",
                                "path",
                                check=False)
        allPaths = [p.split("\n")[1] for p in allPaths.split("\0") if p]
        if not allPaths:
            return

        # Fetch the respecive commits as per git ls-tree
        allPaths = self.callGit(workspacePath, "-C", base, "ls-tree", "-z",
                                "HEAD", *allPaths)
        allPaths = {
            path: attribs.split(' ')[2]
            for attribs, path in (p.split('\t') for p in allPaths.split('\0')
                                  if p) if attribs.split(' ')[1] == "commit"
        }

        # Normalize subset of submodules
        if isinstance(shouldExist, list):
            shouldExist = set(normPath(p) for p in shouldExist)
        elif shouldExist:
            shouldExist = set(normPath(p) for p in allPaths.keys())
        else:
            shouldExist = set()

        # Check each submodule for their commit, modifications and unpushed
        # stuff. Unconditionally recurse to even see if something is there even
        # tough it shouldn't.
        for path, commit in sorted(allPaths.items()):
            subPath = os.path.join(base, path)
            subShouldExist = normPath(path) in shouldExist
            if not os.path.exists(os.path.join(workspacePath, subPath,
                                               ".git")):
                if subShouldExist:
                    status.add(ScmTaint.modified,
                               "> submodule not checked out: " + subPath)
                elif not dirIsEmpty(os.path.join(workspacePath, subPath)):
                    status.add(ScmTaint.modified,
                               "> ignored submodule not empty: " + subPath)
                continue
            elif not subShouldExist:
                status.add(ScmTaint.modified,
                           "> submodule checked out: " + subPath)

            realCommit = self.callGit(workspacePath, "-C", subPath,
                                      "rev-parse", "HEAD")
            if commit != realCommit:
                status.add(
                    ScmTaint.switched,
                    "> submodule '{}' switched commit: configured: '{}', actual: '{}'"
                    .format(subPath, commit, realCommit))

            output = self.callGit(workspacePath, "-C", subPath, 'status',
                                  '--porcelain', '--ignore-submodules=all')
            if output:
                status.add(
                    ScmTaint.modified,
                    joinLines("> submodule '{}' modified:".format(subPath),
                              indent(output, '   ')))

            output = self.callGit(workspacePath, "-C", subPath, 'log',
                                  '--oneline', '--decorate', '--all', '--not',
                                  '--remotes')
            if output:
                status.add(
                    ScmTaint.unpushed_local,
                    joinLines(
                        "> submodule '{}' unpushed local commits:".format(
                            subPath), indent(output, '   ')))

            self.__statusSubmodule(workspacePath, status,
                                   self.__recurseSubmodules, subPath)

    def getAuditSpec(self):
        extra = {}
        if self.__submodules:
            extra['submodules'] = self.__submodules
            if self.__recurseSubmodules:
                extra['recurseSubmodules'] = True
        return ("git", self.__dir, extra)

    def hasLiveBuildId(self):
        return True

    async def predictLiveBuildId(self, step):
        if self.__commit:
            return bytes.fromhex(self.__commit)

        with stepAction(step, "LS-REMOTE", self.__url, (INFO, TRACE)) as a:
            if self.__tag:
                # Annotated tags are objects themselves. We need the commit object!
                refs = [
                    "refs/tags/" + self.__tag + '^{}',
                    "refs/tags/" + self.__tag
                ]
            else:
                refs = ["refs/heads/" + self.__branch]
            cmdLine = ['git', 'ls-remote', self.__url] + refs
            try:
                stdout = await check_output(cmdLine,
                                            stderr=subprocess.DEVNULL,
                                            universal_newlines=True)
                output = stdout.strip()
            except subprocess.CalledProcessError as e:
                a.fail("exit {}".format(e.returncode), WARNING)
                return None
            except OSError as e:
                a.fail("error ({})".format(e))
                return None

            # have we found anything at all?
            if not output:
                a.fail("unknown", WARNING)
                return None

            # See if we got one of our intended refs. Git is generating lines with
            # the following format:
            #
            #   <sha1>\t<refname>
            #
            # Put the output into a dict with the refname as key. Be extra careful
            # and strip out lines not matching this pattern.
            output = {
                commitAndRef[1].strip(): bytes.fromhex(commitAndRef[0].strip())
                for commitAndRef in (line.split('\t')
                                     for line in output.split('\n'))
                if len(commitAndRef) == 2
            }
            for ref in refs:
                if ref in output: return output[ref]

            # uhh, should not happen...
            a.fail("unknown", WARNING)
            return None

    def calcLiveBuildId(self, workspacePath):
        if self.__commit:
            return bytes.fromhex(self.__commit)
        else:
            output = self.callGit(workspacePath, 'rev-parse', 'HEAD').strip()
            return bytes.fromhex(output)

    def getLiveBuildIdSpec(self, workspacePath):
        if self.__commit:
            return "=" + self.__commit
        else:
            return "g" + os.path.join(workspacePath, self.__dir)

    @staticmethod
    def processLiveBuildIdSpec(dir):
        try:
            return subprocess.check_output(["git", "rev-parse", "HEAD"],
                                           cwd=dir,
                                           universal_newlines=True).strip()
        except subprocess.CalledProcessError as e:
            raise BuildError("Git audit failed: " + str(e))
        except OSError as e:
            raise BuildError("Error calling git: " + str(e))
Exemple #18
0
    schema.Optional("type", default="context-free"):
    "context-free",
    "span_encoding_dim":
    schema.Use(int),
    "word_embeddings_dim":
    schema.Use(int),
    "chars_embeddings_dim":
    schema.Use(int),
    "hidden_dim":
    schema.Use(int),
    schema.Optional("soft_dropout_rate", default=0.3):
    schema.Use(float),
    schema.Optional("hard_dropout_rate", default=0.6):
    schema.Use(float),
    schema.Optional("features", default=None):
    [schema.Or(raw_features_schema, categorical_features_schema)],
    schema.Optional("token_features", default=None):
    [schema.Or(raw_features_schema, categorical_features_schema)],
    schema.Optional("external_boundaries", default=False):
    schema.Use(bool),
})

elmo_encoder_schema = schema.Schema({
    "type":
    "elmo",
    "span_encoding_dim":
    schema.Use(int),
    "elmo_options_file":
    schema.Use(str),
    "elmo_weight_file":
    schema.Use(str),
Exemple #19
0
class ImportScm(Scm):

    SCHEMA = schema.Schema({
        'scm': 'import',
        'url': str,
        schema.Optional('dir'): str,
        schema.Optional('if'): schema.Or(str, IfExpression),
        schema.Optional('prune'): bool,
    })

    def __init__(self, spec, overrides=[]):
        super().__init__(spec, overrides)
        self.__url = spec["url"]
        self.__dir = spec.get("dir", ".")
        self.__prune = spec.get("prune", False)
        self.__data = spec.get("__data")

    def getProperties(self):
        ret = super().getProperties()
        ret.update({
            'scm': 'import',
            'url': self.__url,
            'dir': self.__dir,
            'prune': self.__prune,
            '__data': packTree(self.__url),
        })
        return ret

    async def invoke(self, invoker):
        dest = invoker.joinPath(self.__dir)
        os.makedirs(dest, exist_ok=True)
        if self.__prune: emptyDirectory(dest)
        if self.__data is None:
            if not os.path.isdir(self.__url):
                invoker.fail("Cannot import '{}': not a directory!".format(
                    self.__url))
            copyTree(self.__url, dest, invoker)
        else:
            unpackTree(self.__data, dest)

    def asDigestScript(self):
        return self.__url

    def getDirectory(self):
        return self.__dir

    def isDeterministic(self):
        return False

    def hasLiveBuildId(self):
        return True

    async def predictLiveBuildId(self, step):
        with stepAction(step, "HASH", self.__url, (INFO, TRACE)) as a:
            return hashDirectory(self.__url)

    def calcLiveBuildId(self, workspacePath):
        return hashDirectory(os.path.join(workspacePath, self.__dir))

    def getAuditSpec(self):
        return ("import", self.__dir, {"url": self.__url})

    def getLiveBuildIdSpec(self, workspacePath):
        return "#" + os.path.join(workspacePath, self.__dir)
Exemple #20
0
import logging

import schema
from sklearn.model_selection import ParameterSampler

logger = logging.getLogger("sampling")

SCHEMA_SAMPLING = schema.Schema(
    {
        "hdock_distr":
        schema.Or(schema.And(str, lambda s: get_distribution(s) is not None)),
        "hdock_samples":
        schema.And(int, lambda n: n >= 0),
        schema.Optional("hdock_seed", default=0):
        int,
        schema.Optional("hdock_distr_kwargs", default={}):
        dict,
    },
    ignore_extra_keys=True,
)


def sample_values(value):
    """
    A random sampling search.

    Using the special keys for distributions.
    {
        "parameter": {
            "hdock_distr": <str> (From scipy.stats.distributions),
            "hdock_distr_kwargs": {
Exemple #21
0
def check_api_schema(api):
    """ Checks the schema of the API and raises exceptions if something
    does not match.

    :param api: The API dictionary
    """

    # Schema for checking we have a string in a Python 2 and 3 compatible way

    # Link schema
    class StringSchema(object):
        def validate(self, data):

            # Check the basic properties
            schema.Schema(schema.Or(*six.string_types)).validate(data)

            # No empty strings either
            if not data:
                raise schema.SchemaError("String is empty")

            return data

    string_schema = StringSchema()

    # Schema for checking the location
    location_schema = schema.Schema({
        'path': string_schema,
        schema.Optional('include'): string_schema,
        'line-start': int,
        'line-end': schema.Or(int, None)
    })

    # Check that members's 'unique-name' is in the API
    class MemberInAPI(object):
        def __init__(self, api):
            self.api = api

        def validate(self, data):

            if data not in self.api:
                raise schema.SchemaError("%r not found in the API "
                                         "valid keys are %r" %
                                         (data, self.api.keys()))

            return data

    # Link schema
    class LinkSchema(object):
        def __init__(self, api):
            self.api = api

        def validate(self, data):

            # Check the basic properties
            schema.Schema({
                'url': bool,
                'value': string_schema,
            }).validate(data)

            # For url we are done
            if data['url']:
                return data

            # Check that if non url we have the link in the API
            if data['value'] not in self.api:
                raise schema.SchemaError("Link value %r not found in the API "
                                         "valid keys are %r" %
                                         (data, self.api.keys()))

            return data

    # Paragraphs text schema
    paragraphs_text_schema = schema.Schema({
        'kind':
        'text',
        'content':
        string_schema,
        schema.Optional('link'):
        LinkSchema(api=api)
    })

    # Paragraphs code schema
    paragraphs_code_schema = schema.Schema({
        'kind': 'code',
        'content': string_schema,
        'is_block': bool
    })

    # Paragraphs list schema

    class ItemsParagraphs(object):
        def __init__(self):
            self.use_schema = None

        def validate(self, data):
            return self.use_schema.validate(data)

    # We define a validator object but defer the initilization of the schema to
    # use. The reason is the items kind is itself a list of paragraphs so we
    # have a recursive dependency.
    items_paragraphs = ItemsParagraphs()

    paragraphs_list_schema = schema.Schema({
        'kind': 'list',
        'ordered': bool,
        'items': [items_paragraphs]
    })

    # Paragraphs schema
    paragraphs_schema = schema.Schema([
        schema.Or(paragraphs_text_schema, paragraphs_code_schema,
                  paragraphs_list_schema)
    ])

    # Initilize the items schema which itself is a list of paragraphs
    items_paragraphs.use_schema = paragraphs_schema

    # type schema

    type_schema = schema.Schema([{
        'value': string_schema,
        schema.Optional('link'): LinkSchema(api=api)
    }])

    # template parameter schema

    template_parameter_schema = schema.Schema([{
        'type':
        type_schema,
        'name':
        string_schema,
        schema.Optional('default'):
        type_schema,
        schema.Optional('description'):
        paragraphs_schema
    }])

    # Schema for checking the namespace kind
    namespace_schema = schema.Schema({
        'kind': 'namespace',
        'name': string_schema,
        'scope': schema.Or(string_schema, None),
        'members': [MemberInAPI(api=api)],
        'briefdescription': paragraphs_schema,
        'detaileddescription': paragraphs_schema,
        'inline': bool
    })

    # Schema for checking classes and structs
    class_struct_schema = schema.Schema({
        'kind':
        schema.Or('class', 'struct'),
        'name':
        string_schema,
        'location':
        location_schema,
        'scope':
        schema.Or(string_schema, None),
        'access':
        schema.Or('public', 'protected', 'private'),
        schema.Optional('template_parameters'):
        template_parameter_schema,
        'members': [MemberInAPI(api=api)],
        'briefdescription':
        paragraphs_schema,
        'detaileddescription':
        paragraphs_schema
    })

    # Enum schema
    enum_schema = schema.Schema({
        'kind':
        'enum',
        'name':
        string_schema,
        'location':
        location_schema,
        'scope':
        schema.Or(string_schema, None),
        'access':
        schema.Or('public', 'protected', 'private'),
        'values': [{
            'name': string_schema,
            'briefdescription': paragraphs_schema,
            'detaileddescription': paragraphs_schema,
            schema.Optional('value'): string_schema
        }],
        'briefdescription':
        paragraphs_schema,
        'detaileddescription':
        paragraphs_schema
    })

    # Type schema
    type_schema = schema.Schema([{
        'value': string_schema,
        schema.Optional('link'): LinkSchema(api=api)
    }])

    # Typedef / using schema
    typedef_using_schema = schema.Schema({
        'kind':
        schema.Or('typedef', 'using'),
        'name':
        string_schema,
        'location':
        location_schema,
        'scope':
        schema.Or(string_schema, None),
        'access':
        schema.Or('public', 'protected', 'private'),
        'type':
        type_schema,
        'briefdescription':
        paragraphs_schema,
        'detaileddescription':
        paragraphs_schema
    })

    # Function schema
    function_schema = schema.Schema({
        'kind':
        'function',
        'name':
        string_schema,
        'location':
        location_schema,
        'scope':
        schema.Or(string_schema, None),
        schema.Optional('return'): {
            'type': type_schema,
            'description': paragraphs_schema
        },
        'signature':
        string_schema,
        schema.Optional('template_parameters'):
        template_parameter_schema,
        'is_const':
        bool,
        'is_static':
        bool,
        'is_virtual':
        bool,
        'is_explicit':
        bool,
        'is_inline':
        bool,
        'is_constructor':
        bool,
        'is_destructor':
        bool,
        'access':
        schema.Or('public', 'protected', 'private'),
        'briefdescription':
        paragraphs_schema,
        'detaileddescription':
        paragraphs_schema,
        'parameters': [{
            'type': type_schema,
            schema.Optional('name'): string_schema,
            'description': paragraphs_schema
        }],
    })

    # variable schema

    variable_schema = schema.Schema({
        'kind':
        'variable',
        'name':
        string_schema,
        schema.Optional('value'):
        string_schema,
        'type':
        type_schema,
        'location':
        location_schema,
        'is_static':
        bool,
        'is_mutable':
        bool,
        'is_volatile':
        bool,
        'is_const':
        bool,
        'is_constexpr':
        bool,
        'scope':
        schema.Or(string_schema, None),
        'access':
        schema.Or('public', 'protected', 'private'),
        'briefdescription':
        paragraphs_schema,
        'detaileddescription':
        paragraphs_schema,
    })

    # Dispatch to the "right" kind of schema. We could do this with a
    # schema.Or(...) clause but it makes the error output hard to read
    api_schemas = {
        'namespace': namespace_schema,
        'class': class_struct_schema,
        'struct': class_struct_schema,
        'enum': enum_schema,
        'typedef': typedef_using_schema,
        'using': typedef_using_schema,
        'function': function_schema,
        'variable': variable_schema
    }

    class SchemaApi(object):
        def validate(self, data):

            if 'kind' not in data:
                raise schema.SchemaError(
                    "Required 'kind' key not found in %r" % data)

            if data['kind'] not in api_schemas:
                raise schema.SchemaError(
                    "Unknown 'kind' key in %r valid kinds are %r" %
                    (data, api_schemas.keys()))

            return api_schemas[data['kind']].validate(data)

    schema.Schema({str: SchemaApi()}).validate(api)
Exemple #22
0
class UrlScm(Scm):

    SCHEMA = schema.Schema({
        'scm': 'url',
        'url': str,
        schema.Optional('dir'): str,
        schema.Optional('if'): str,
        schema.Optional('digestSHA1'): str,
        schema.Optional('digestSHA256'): str,
        schema.Optional('extract'): schema.Or(bool, str),
        schema.Optional('fileName'): str,
        schema.Optional('stripComponents'): int,
        schema.Optional('sslVerify'): bool,
    })

    EXTENSIONS = [
        (".tar.gz", "tar"),
        (".tar.xz", "tar"),
        (".tar.bz2", "tar"),
        (".tar.bzip2", "tar"),
        (".tgz", "tar"),
        (".tar", "tar"),
        (".gz", "gzip"),
        (".xz", "xz"),
        (".7z", "7z"),
        (".zip", "zip"),
    ]

    EXTRACTORS = {
        "tar": ("tar -x --no-same-owner --no-same-permissions -f",
                "--strip-components={}"),
        "gzip": ("gunzip -kf", None),
        "xz": ("unxz -kf", None),
        "7z": ("7z x -y", None),
        "zip": ("unzip -o", None),
    }

    def __init__(self, spec, overrides=[], tidy=None):
        super().__init__(spec, overrides)
        self.__url = spec["url"]
        self.__digestSha1 = spec.get("digestSHA1")
        if self.__digestSha1:
            # validate digest
            if re.match("^[0-9a-f]{40}$", self.__digestSha1) is None:
                raise ParseError("Invalid SHA1 digest: " +
                                 str(self.__digestSha1))
        self.__digestSha256 = spec.get("digestSHA256")
        if self.__digestSha256:
            # validate digest
            if re.match("^[0-9a-f]{64}$", self.__digestSha256) is None:
                raise ParseError("Invalid SHA256 digest: " +
                                 str(self.__digestSha256))
        self.__dir = spec.get("dir", ".")
        self.__fn = spec.get("fileName")
        if not self.__fn:
            self.__fn = self.__url.split("/")[-1]
        self.__extract = spec.get("extract", "auto")
        self.__tidy = tidy
        self.__strip = spec.get("stripComponents", 0)
        self.__sslVerify = spec.get('sslVerify', True)

    def getProperties(self):
        ret = super().getProperties()
        ret.update({
            'scm': 'url',
            'url': self.__url,
            'digestSHA1': self.__digestSha1,
            'digestSHA256': self.__digestSha256,
            'dir': self.__dir,
            'fileName': self.__fn,
            'extract': self.__extract,
            'stripComponents': self.__strip,
            'sslVerify': self.__sslVerify,
        })
        return ret

    def asScript(self):
        options = "-sSgLf"
        if not self.__sslVerify: options += "k"
        ret = """
{HEADER}
mkdir -p {DIR}
cd {DIR}
if [ -e {FILE} ] ; then
    curl {OPTIONS} -o {FILE} -z {FILE} {URL}
else
    (
        F=$(mktemp)
        trap 'rm -f $F' EXIT
        set -e
        curl {OPTIONS} -o $F {URL}
        mv $F {FILE}
    )
fi
""".format(HEADER=super().asScript(),
           DIR=quote(self.__dir),
           URL=quote(self.__url),
           FILE=quote(self.__fn),
           OPTIONS=options)

        if self.__digestSha1:
            ret += "echo {DIGEST}\ \ {FILE} | sha1sum -c\n".format(
                DIGEST=self.__digestSha1, FILE=self.__fn)
        if self.__digestSha256:
            ret += "echo {DIGEST}\ \ {FILE} | sha256sum -c\n".format(
                DIGEST=self.__digestSha256, FILE=self.__fn)

        extractor = None
        if self.__extract in ["yes", "auto", True]:
            for (ext, tool) in UrlScm.EXTENSIONS:
                if self.__fn.endswith(ext):
                    extractor = UrlScm.EXTRACTORS[tool]
                    break
            if not extractor and self.__extract != "auto":
                raise ParseError("Don't know how to extract '" + self.__fn +
                                 "' automatically.")
        elif self.__extract in UrlScm.EXTRACTORS:
            extractor = UrlScm.EXTRACTORS[tool]
        elif self.__extract not in ["no", False]:
            raise ParseError("Invalid extract mode: " + self.__extract)

        if extractor:
            if self.__strip > 0:
                if extractor[1] is None:
                    raise ParseError(
                        "Extractor does not support 'stripComponents'!")
                strip = " " + extractor[1].format(self.__strip)
            else:
                strip = ""
            ret += """
if [ {FILE} -nt .{FILE}.extracted ] ; then
    {TOOL} {FILE}{STRIP}
    touch .{FILE}.extracted
fi
""".format(FILE=quote(self.__fn), TOOL=extractor[0], STRIP=strip)

        return ret

    def asDigestScript(self):
        """Return forward compatible stable string describing this url.

        The format is "digest dir extract" if a SHA checksum was specified.
        Otherwise it is "url dir extract". A "s#" is appended if leading paths
        are stripped where # is the number of stripped elements.
        """
        return ( self.__digestSha256 if self.__digestSha256
                 else (self.__digestSha1 if self.__digestSha1 else self.__url)
                    ) + " " + os.path.join(self.__dir, self.__fn) + " " + str(self.__extract) + \
                    ( " s{}".format(self.__strip) if self.__strip > 0 else "" )

    def getDirectory(self):
        return self.__dir if self.__tidy else os.path.join(
            self.__dir, self.__fn)

    def isDeterministic(self):
        return (self.__digestSha1 is not None) or (self.__digestSha256
                                                   is not None)

    def getAuditSpec(self):
        return ("url", os.path.join(self.__dir, self.__fn))

    def hasLiveBuildId(self):
        return self.isDeterministic()

    async def predictLiveBuildId(self, step):
        return self.calcLiveBuildId(None)

    def calcLiveBuildId(self, workspacePath):
        if self.__digestSha256:
            return bytes.fromhex(self.__digestSha256)
        elif self.__digestSha1:
            return bytes.fromhex(self.__digestSha1)
        else:
            return None

    def getLiveBuildIdSpec(self, workspacePath):
        if self.__digestSha256:
            return "=" + self.__digestSha256
        elif self.__digestSha1:
            return "=" + self.__digestSha1
        else:
            return None
Exemple #23
0
import schema as sc

AttrsSchema = sc.Schema({sc.Optional(str): sc.Or(str, int, float, bool)})
RecordSchema = sc.Schema({
    str: sc.Or(None, list),
    sc.Optional("attrs", default={}): AttrsSchema,
})
RootSchema = sc.Schema({"root": str, sc.Optional("attrs"): AttrsSchema})
MapSchema = sc.Schema([sc.Or(RecordSchema, RootSchema)])
EvalPredicateSchema = sc.Schema({
    "type": "eval",
    "target": str,
})
RegexPredicateSchema = sc.Schema({
    "type": "regex",
    "target": str,
    "pattern": str,
})
NamePredicateSchema = sc.Schema({
    "type": "name",
    "pattern": str,
})
PredicateSchema = sc.Or(EvalPredicateSchema, RegexPredicateSchema,
                        NamePredicateSchema)
StyleSchema = sc.Schema({
    "predicate": PredicateSchema,
    sc.Optional("attrs", default={}): AttrsSchema,
    sc.Optional("order", default=0): int,
    sc.Optional("transform"): str,
})
Exemple #24
0
class Stage(object):
    STAGE_FILE = 'Dvcfile'
    STAGE_FILE_SUFFIX = '.dvc'

    PARAM_MD5 = 'md5'
    PARAM_CMD = 'cmd'
    PARAM_DEPS = 'deps'
    PARAM_OUTS = 'outs'
    PARAM_LOCKED = 'locked'

    SCHEMA = {
        schema.Optional(PARAM_MD5):
        schema.Or(str, None),
        schema.Optional(PARAM_CMD):
        schema.Or(str, None),
        schema.Optional(PARAM_DEPS):
        schema.Or(schema.And(list, schema.Schema([dependency.SCHEMA])), None),
        schema.Optional(PARAM_OUTS):
        schema.Or(schema.And(list, schema.Schema([output.SCHEMA])), None),
        schema.Optional(PARAM_LOCKED):
        bool,
    }

    def __init__(self,
                 project,
                 path=None,
                 cmd=None,
                 cwd=os.curdir,
                 deps=[],
                 outs=[],
                 md5=None,
                 locked=False):
        self.project = project
        self.path = path
        self.cmd = cmd
        self.cwd = cwd
        self.outs = outs
        self.deps = deps
        self.md5 = md5
        self.locked = locked

    @property
    def relpath(self):
        return os.path.relpath(self.path)

    @property
    def is_data_source(self):
        return self.cmd is None

    @staticmethod
    def is_stage_file(path):
        if not os.path.isfile(path):
            return False

        if not path.endswith(Stage.STAGE_FILE_SUFFIX) and os.path.basename(
                path) != Stage.STAGE_FILE:
            return False

        return True

    def changed_md5(self):
        md5 = self.dumpd().get(self.PARAM_MD5, None)

        # backward compatibility
        if self.md5 == None:
            return False

        if self.md5 and md5 and self.md5 == md5:
            return False

        msg = "Dvc file '{}' md5 changed(expected '{}', actual '{}')"
        self.project.logger.debug(msg.format(self.relpath, self.md5, md5))
        return True

    @property
    def is_callback(self):
        return not self.is_data_source and len(self.deps) == 0

    @property
    def is_import(self):
        return not self.cmd and \
               len(self.deps) == 1 and \
               len(self.outs) == 1

    def changed(self):
        ret = False

        if self.is_callback:
            ret = True

        if self.locked:
            entries = self.outs
        else:
            entries = itertools.chain(self.outs, self.deps)

        for entry in entries:
            if entry.changed():
                ret = True

        if self.changed_md5():
            ret = True

        if ret:
            self.project.logger.debug(u'Dvc file \'{}\' changed'.format(
                self.relpath))
        else:
            self.project.logger.debug(u'Dvc file \'{}\' didn\'t change'.format(
                self.relpath))

        return ret

    def remove_outs(self, ignore_remove=False):
        for out in self.outs:
            out.remove(ignore_remove=ignore_remove)

    def remove(self):
        self.remove_outs(ignore_remove=True)
        os.unlink(self.path)

    def reproduce(self, force=False):
        if not self.changed() and not force:
            return None

        if (self.cmd or self.is_import) and not self.locked:
            # Removing outputs only if we actually have command to reproduce
            self.remove_outs(ignore_remove=False)

        self.project.logger.info(u'Reproducing \'{}\''.format(self.relpath))

        self.run()

        self.project.logger.debug(u'\'{}\' was reproduced'.format(
            self.relpath))

        return self

    @staticmethod
    def validate(d):
        try:
            schema.Schema(Stage.SCHEMA).validate(d)
        except schema.SchemaError as exc:
            Logger.debug(str(exc))
            raise StageFileFormatError()

    @staticmethod
    def loadd(project, d, path):
        Stage.validate(d)

        path = os.path.abspath(path)
        cwd = os.path.dirname(path)
        cmd = d.get(Stage.PARAM_CMD, None)
        md5 = d.get(Stage.PARAM_MD5, None)
        locked = d.get(Stage.PARAM_LOCKED, False)

        stage = Stage(project=project,
                      path=path,
                      cmd=cmd,
                      cwd=cwd,
                      md5=md5,
                      locked=locked)

        stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, []))
        stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, []))

        return stage

    @classmethod
    def _stage_fname_cwd(cls, fname, cwd, outs, add):
        if fname and cwd:
            return (fname, cwd)

        if not outs:
            return (cls.STAGE_FILE, cwd if cwd else os.getcwd())

        out = outs[0]
        if out.path_info['scheme'] == 'local':
            path = os.path
        else:
            path = posixpath

        fname = fname if fname else path.basename(
            out.path) + cls.STAGE_FILE_SUFFIX
        cwd = path.dirname(out.path) if not cwd or add else cwd

        return (fname, cwd)

    @staticmethod
    def loads(project=None,
              cmd=None,
              deps=[],
              outs=[],
              outs_no_cache=[],
              metrics_no_cache=[],
              fname=None,
              cwd=os.curdir,
              locked=False,
              add=False):
        stage = Stage(project=project, cwd=cwd, cmd=cmd, locked=locked)

        stage.outs = output.loads_from(stage, outs, use_cache=True)
        stage.outs += output.loads_from(stage, outs_no_cache, use_cache=False)
        stage.outs += output.loads_from(stage,
                                        metrics_no_cache,
                                        use_cache=False,
                                        metric=True)
        stage.deps = dependency.loads_from(stage, deps)

        fname, cwd = Stage._stage_fname_cwd(fname, cwd, stage.outs, add=add)

        cwd = os.path.abspath(cwd)
        path = os.path.join(cwd, fname)

        stage.cwd = cwd
        stage.path = path

        return stage

    @staticmethod
    def load(project, fname):
        with open(fname, 'r') as fd:
            return Stage.loadd(project, yaml.safe_load(fd), fname)

    def dumpd(self):
        deps = [x.dumpd() for x in self.deps]
        outs = [x.dumpd() for x in self.outs]

        ret = {}
        if self.cmd != None:
            ret[Stage.PARAM_CMD] = self.cmd

        if len(deps):
            ret[Stage.PARAM_DEPS] = deps

        if len(outs):
            ret[Stage.PARAM_OUTS] = outs

        if self.locked:
            ret[Stage.PARAM_LOCKED] = self.locked

        ret[Stage.PARAM_MD5] = dict_md5(ret)

        return ret

    def dump(self, fname=None):
        if not fname:
            fname = self.path

        with open(fname, 'w') as fd:
            yaml.safe_dump(self.dumpd(), fd, default_flow_style=False)

    def save(self):
        for dep in self.deps:
            dep.save()

        for out in self.outs:
            out.save()

    def run(self):
        if self.locked:
            self.project.logger.info(
                u'Verifying outputs in locked stage \'{}\''.format(
                    self.relpath))
            self.check_missing_outputs()
        elif self.is_import:
            msg = u'Importing \'{}\' -> \'{}\''
            self.project.logger.info(
                msg.format(self.deps[0].path, self.outs[0].path))

            self.deps[0].download(self.outs[0].path_info)
        elif self.is_data_source:
            self.project.logger.info(
                u'Verifying data sources in \'{}\''.format(self.relpath))
            self.check_missing_outputs()
        else:
            self.project.logger.info(u'Running command:\n\t{}'.format(
                self.cmd))

            p = subprocess.Popen(self.cmd,
                                 cwd=self.cwd,
                                 shell=True,
                                 env=os.environ,
                                 executable=os.getenv('SHELL'))
            p.communicate()
            if p.returncode != 0:
                raise StageCmdFailedError(self)

        self.save()

    def check_missing_outputs(self):
        outs = [out for out in self.outs if not out.exists]
        paths = [
            out.path if out.path_info['scheme'] != 'local' else out.rel_path
            for out in outs
        ]
        if paths:
            raise MissingDataSource(paths)

    def checkout(self):
        for out in self.outs:
            out.checkout()

    def _status(self, entries, name):
        ret = {}

        for entry in entries:
            ret.update(entry.status())

        if ret:
            return {name: ret}

        return {}

    def status(self):
        ret = {}

        if not self.locked:
            ret.update(self._status(self.deps, 'deps'))

        ret.update(self._status(self.outs, 'outs'))

        if ret or self.changed_md5() or self.is_callback:
            return {self.relpath: ret}

        return {}
Exemple #25
0
    def __init__(self, report=None, url=None):
        """
        @param report: hash with report data from Cuckoo
        @type report: dict
        @param url: URL where the report was retrieved from
        @type url: string
        """
        self._url = url

        if report is None:
            report = {}

        # some common building blocks for reuse
        dns_element_schema = {'request': str}
        description_element_schema = {'description': str}

        # defaults of optional keys are not validated. Therefore their
        # validators can't set more default values. So we can only rely on the
        # validation result to contain the top-level key defaults. To avoid
        # confusion make no assumptions about optional key existance at all and
        # only schema compliance. We still use the result though because
        # ignore_extra_keys has stripped it of extraneous data which protects
        # us somewhat from accidentally processing it.
        report = schema.Schema(
            {
                schema.Optional('network', default={}, ignore_extra_keys=True):
                {
                    schema.Optional('dns', default=[]):
                    schema.Or(list([dns_element_schema]),
                              tuple([dns_element_schema]),
                              ignore_extra_keys=True),
                },
                schema.Optional('signatures', default=[]):
                schema.Or(list([description_element_schema]),
                          tuple([description_element_schema]),
                          ignore_extra_keys=True),
                schema.Optional('info', default={}): {
                    schema.Optional('score', default=0.0): schema.Or(
                        int, float),
                },
                schema.Optional('debug', default={}): {
                    schema.Optional('errors', default=[]):
                    schema.Or(list([str]), tuple([str])),
                    schema.Optional('cuckoo', default=[]):
                    schema.Or(list([str]), tuple([str])),
                },
            },
            ignore_extra_keys=True).validate(report)

        self._requested_domains = [
            domain['request']
            for domain in report.get('network', {}).get('dns', [])
        ]

        self._signature_descriptions = [
            sig['description'] for sig in report.get('signatures', [])
        ]

        # explicitly convert to the types of our external API here if we accept
        # multiple types as input (schema.Use could convert as well but does it
        # before validation in duck-typing fashion which could make us accept
        # unintended types, e.g. a string because it can be converted to a list
        # because it's iterable).
        self._score = float(report.get('info', {}).get('score', 0.0))

        debug = report.get('debug', {})
        self._errors = list(debug.get('errors', []))
        self._server_messages = list(debug.get('cuckoo', []))
        elif evaluation == 'old-matlab':
            benchmark = CAT2000Matlab()
        elif evaluation == 'new':
            benchmark = CAT2000()
        else:
            raise ValueError(evaluation)
    elif dataset.lower() == 'mit1003':
        assert evaluation == 'new'
        benchmark = MIT1003()
    else:
        raise ValueError(dataset)

    return benchmark, model


MaybeString = schema.Or(str, None)

display_schema = schema.Schema({
    schema.Optional('name', default=None): str,
    schema.Optional('published', default=''): str,
    schema.Optional('code', default=''): MaybeString,
    schema.Optional('evaluation_comment', default=''): str,
    schema.Optional('first_tested', default=None): str,
})

config_schema = schema.Schema({
    'model': {
        'name': str,
        'filename': str,
        'probabilistic': bool,
        schema.Optional('loss', default=False): schema.Or('AUC', 'sAUC', 'IG', 'NSS', 'CC', 'KLDiv', 'SIM'),
Exemple #27
0
class SvnScm(Scm):

    SCHEMA = schema.Schema({
        'scm': 'svn',
        'url': str,
        schema.Optional('dir'): str,
        schema.Optional('if'): str,
        schema.Optional('revision'): schema.Or(int, str),
        schema.Optional('sslVerify'): bool,
    })

    def __init__(self, spec, overrides=[]):
        super().__init__(spec, overrides)
        self.__url = spec["url"]
        self.__dir = spec.get("dir", ".")
        self.__revision = spec.get("revision")
        self.__sslVerify = spec.get('sslVerify', True)

    def getProperties(self):
        ret = super().getProperties()
        ret.update({
            'scm': 'svn',
            "url": self.__url,
            "dir": self.__dir,
            'sslVerify': self.__sslVerify,
        })
        if self.__revision:
            ret["revision"] = self.__revision
        return ret

    def asScript(self):
        options = "--non-interactive"
        if not self.__sslVerify:
            options += " --trust-server-cert-failures=unknown-ca,cn-mismatch,expired,not-yet-valid,other"
        return """
{HEADER}
if [[ -d {SUBDIR}/.svn ]] ; then
    if [[ {URL} != */tags/* ]] ; then
        svn up {OPTIONS} {REVISION_ARG} {SUBDIR}
    fi
else
    if ! svn co {OPTIONS} {REVISION_ARG} {URL} {SUBDIR} ; then
        rm -rf {SUBDIR}
        exit 1
    fi
fi
""".format(HEADER=super().asScript(),
           OPTIONS=options,
           URL=quote(self.__url),
           SUBDIR=quote(self.__dir),
           REVISION_ARG=(("-r " + quote(str(self.__revision)))
                         if self.__revision else ''))

    def asDigestScript(self):
        """Return forward compatible stable string describing this svn module.

        The module is represented as "url[@rev] > dir".
        """
        return (self.__url +
                (("@" + str(self.__revision)) if self.__revision else "") +
                " > " + self.__dir)

    def asJenkins(self, workPath, credentials, options):
        scm = ElementTree.Element("scm",
                                  attrib={
                                      "class": "hudson.scm.SubversionSCM",
                                      "plugin": "[email protected]",
                                  })

        locations = ElementTree.SubElement(scm, "locations")
        location = ElementTree.SubElement(
            locations, "hudson.scm.SubversionSCM_-ModuleLocation")

        url = self.__url
        if self.__revision:
            url += ("@" + str(self.__revision))

        ElementTree.SubElement(location, "remote").text = url
        credentialsId = ElementTree.SubElement(location, "credentialsId")
        if credentials: credentialsId.text = credentials
        ElementTree.SubElement(location, "local").text = (os.path.normpath(
            os.path.join(workPath, self.__dir)))
        ElementTree.SubElement(location, "depthOption").text = "infinity"
        ElementTree.SubElement(location, "ignoreExternalsOption").text = "true"

        ElementTree.SubElement(scm, "excludedRegions")
        ElementTree.SubElement(scm, "includedRegions")
        ElementTree.SubElement(scm, "excludedUsers")
        ElementTree.SubElement(scm, "excludedRevprop")
        ElementTree.SubElement(scm, "excludedCommitMessages")
        ElementTree.SubElement(
            scm,
            "workspaceUpdater",
            attrib={"class": "hudson.scm.subversion.UpdateUpdater"})
        ElementTree.SubElement(scm, "ignoreDirPropChanges").text = "false"
        ElementTree.SubElement(scm, "filterChangelog").text = "false"

        return scm

    def getDirectories(self):
        return {self.__dir: hashString(self.asDigestScript())}

    def isDeterministic(self):
        return str(self.__revision).isnumeric()

    def hasJenkinsPlugin(self):
        return True

    def callSubversion(self, workspacePath, *args):
        cmdLine = ['svn']
        cmdLine.extend(args)
        cwd = os.path.join(workspacePath, self.__dir)
        try:
            output = subprocess.check_output(cmdLine,
                                             cwd=cwd,
                                             universal_newlines=True,
                                             stderr=subprocess.DEVNULL)
        except subprocess.CalledProcessError as e:
            raise BuildError(
                "svn error:\n Directory: '{}'\n Command: '{}'\n'{}'".format(
                    cwd, " ".join(cmdLine), e.output.rstrip()))
        except OSError as e:
            raise BuildError("Error calling svn: " + str(e))
        return output

    # Get SvnSCM status. The purpose of this function is to return the status of the given directory
    #
    # return values:
    #  - error: the scm is in a error state. Use this if svn call returns a error code.
    #  - dirty: SCM is dirty. Could be: modified files, switched to another URL or revision
    #  - clean: same URL and revision as specified in the recipe and no local changes.
    #  - empty: directory is not existing
    #
    # This function is called when build with --clean-checkout. 'error' and 'dirty' scm's are moved to attic,
    # while empty and clean directories are not.
    def status(self, workspacePath):
        if not os.path.exists(os.path.join(workspacePath, self.__dir)):
            return 'empty', '', ''

        status = 'clean'
        shortStatus = ''
        longStatus = ''

        def setStatus(shortMsg, longMsg, dirty=True):
            nonlocal status, shortStatus, longStatus
            if (shortMsg not in shortStatus):
                shortStatus += shortMsg
            longStatus += longMsg
            if (dirty):
                status = 'dirty'

        try:
            svnoutput = self.callSubversion(workspacePath, 'status')
            if len(svnoutput):
                longMsg = colorize("> modified:\n", "33")
                for line in svnoutput.split('\n'):
                    longMsg += '  ' + line.rstrip()
                setStatus('M', longMsg)

            svnoutput = self.callSubversion(workspacePath, 'info', '--xml')
            info = ElementTree.fromstring(svnoutput)
            entry = info.find('entry')
            url = entry.find('url').text
            revision = entry.attrib['revision']

            if self.__url != url:
                setStatus(
                    'S',
                    colorize(
                        "> URLs do not match!\n     recipe:\t{}\n     svn info:\t{}"
                        .format(self.__url, url), "33"))
            if self.__revision is not None and int(revision) != int(
                    self.__revision):
                setStatus(
                    'S',
                    colorize(
                        "> wrong revision: recipe: {} svn info: {}".format(
                            self.__revision, revision), "33"))

        except BuildError as e:
            print(e)
            status = 'error'

        return status, shortStatus, longStatus

    def getAuditSpec(self):
        return ("svn", self.__dir)
Exemple #28
0
class GitScm(Scm):

    SCHEMA = schema.Schema({
        'scm' : 'git',
        'url' : str,
        schema.Optional('dir') : str,
        schema.Optional('if') : schema.Or(str, IfExpression),
        schema.Optional('branch') : str,
        schema.Optional('tag') : str,
        schema.Optional('commit') : str,
        schema.Optional('rev') : str,
        schema.Optional(schema.Regex('^remote-.*')) : str,
        schema.Optional('sslVerify') : bool,
    })
    REMOTE_PREFIX = "remote-"

    def __init__(self, spec, overrides=[], secureSSL=None):
        super().__init__(spec, overrides)
        self.__url = spec["url"]
        self.__branch = None
        self.__tag = None
        self.__commit = None
        self.__remotes = {}
        if "rev" in spec:
            rev = spec["rev"]
            if rev.startswith("refs/heads/"):
                self.__branch = rev[11:]
            elif rev.startswith("refs/tags/"):
                self.__tag = rev[10:]
            elif len(rev) == 40:
                self.__commit = rev
            else:
                raise ParseError("Invalid rev format: " + rev)
        self.__branch = spec.get("branch", self.__branch)
        self.__tag = spec.get("tag", self.__tag)
        self.__commit = spec.get("commit", self.__commit)
        if self.__commit:
            # validate commit
            if re.match("^[0-9a-f]{40}$", self.__commit) is None:
                raise ParseError("Invalid commit id: " + str(self.__commit))
        elif not self.__branch and not self.__tag:
            # nothing secified at all -> master branch
            self.__branch = "master"
        self.__dir = spec.get("dir", ".")
        # convert remotes into separate dictionary
        for key, val in spec.items():
            if key.startswith(GitScm.REMOTE_PREFIX):
                stripped_key = key[len(GitScm.REMOTE_PREFIX):] # remove prefix
                if stripped_key == "origin":
                    raise ParseError("Invalid remote name: " + stripped_key)
                self.__remotes.update({stripped_key : val})
        self.__sslVerify = spec.get('sslVerify', secureSSL)

    def getProperties(self):
        properties = super().getProperties()
        properties.update({
            'scm' : 'git',
            'url' : self.__url,
            'branch' : self.__branch,
            'tag' : self.__tag,
            'commit' : self.__commit,
            'dir' : self.__dir,
            'rev' : ( self.__commit if self.__commit else
                (("refs/tags/" + self.__tag) if self.__tag else
                    ("refs/heads/" + self.__branch))
            ),
            'sslVerify' : self.__sslVerify,
        })
        for key, val in self.__remotes.items():
            properties.update({GitScm.REMOTE_PREFIX+key : val})
        return properties

    async def invoke(self, invoker):
        # make sure the git directory exists
        if not os.path.isdir(invoker.joinPath(self.__dir, ".git")):
            await invoker.checkCommand(["git", "init", self.__dir])

        # setup and update remotes
        remotes = { "origin" : self.__url }
        remotes.update(self.__remotes)
        existingRemotes = await invoker.checkOutputCommand(["git", "remote"], cwd=self.__dir)
        for remote in existingRemotes.split("\n"):
            if remote in remotes:
                cfgUrl = remotes[remote]
                realUrl = await invoker.checkOutputCommand(
                    ["git", "ls-remote", "--get-url", remote], cwd=self.__dir)
                if cfgUrl != realUrl:
                    await invoker.checkCommand(["git", "remote", "set-url", remote, cfgUrl], cwd=self.__dir)
                del remotes[remote]

        # add remaining (new) remotes
        for remote,url in remotes.items():
            await invoker.checkCommand(["git", "remote", "add", remote, url], cwd=self.__dir)

        # relax security if requested
        if not self.__sslVerify:
            await invoker.checkCommand(["git", "config", "http.sslVerify", "false"], cwd=self.__dir)

        # do the checkout
        if self.__tag or self.__commit:
            refSpec = ["+refs/heads/*:refs/remotes/origin/*"]
            if self.__tag:
                refSpec.append("refs/tags/{0}:refs/tags/{0}".format(self.__tag))
            # checkout only if HEAD is invalid
            head = await invoker.callCommand(["git", "rev-parse", "--verify", "-q", "HEAD"],
                stdout=False, cwd=self.__dir)
            if head:
                await invoker.checkCommand(["git", "fetch", "origin"] + refSpec, cwd=self.__dir)
                await invoker.checkCommand(["git", "checkout", "-q",
                    self.__commit if self.__commit else "tags/"+self.__tag], cwd=self.__dir)
        else:
            await invoker.checkCommand(["git", "fetch", "-p", "origin"], cwd=self.__dir)
            if await invoker.callCommand(["git", "rev-parse", "--verify", "-q", "HEAD"],
                           stdout=False, cwd=self.__dir):
                # checkout only if HEAD is invalid
                await invoker.checkCommand(["git", "checkout", "-b", self.__branch,
                    "remotes/origin/"+self.__branch], cwd=self.__dir)
            elif (await invoker.checkOutputCommand(["git", "rev-parse", "--abbrev-ref", "HEAD"], cwd=self.__dir)) == self.__branch:
                # pull only if on original branch
                await invoker.checkCommand(["git", "merge", "--ff-only", "refs/remotes/origin/"+self.__branch], cwd=self.__dir)
            else:
                invoker.warn("Not updating", self.__dir, "because branch was changed manually...")


    def asDigestScript(self):
        """Return forward compatible stable string describing this git module.

        The format is "url rev-spec dir" where rev-spec depends on the given reference.
        """
        if self.__commit:
            return self.__commit + " " + self.__dir
        elif self.__tag:
            return self.__url + " refs/tags/" + self.__tag + " " + self.__dir
        else:
            return self.__url + " refs/heads/" + self.__branch + " " + self.__dir

    def asJenkins(self, workPath, credentials, options):
        scm = ElementTree.Element("scm", attrib={
            "class" : "hudson.plugins.git.GitSCM",
            "plugin" : "[email protected]",
        })
        ElementTree.SubElement(scm, "configVersion").text = "2"

        userconfigs =  ElementTree.SubElement(
                ElementTree.SubElement(scm, "userRemoteConfigs"),
                "hudson.plugins.git.UserRemoteConfig")

        url = ElementTree.SubElement(userconfigs,
            "url")
        url.text = self.__url

        if credentials:
            credentialsId = ElementTree.SubElement(userconfigs,
                         "credentialsId")
            credentialsId.text = credentials

        branch = ElementTree.SubElement(
            ElementTree.SubElement(
                ElementTree.SubElement(scm, "branches"),
                "hudson.plugins.git.BranchSpec"),
            "name")
        if self.__commit:
            branch.text = self.__commit
        elif self.__tag:
            branch.text = "refs/tags/" + self.__tag
        else:
            branch.text = "refs/heads/" + self.__branch

        ElementTree.SubElement(scm, "doGenerateSubmoduleConfigurations").text = "false"
        ElementTree.SubElement(scm, "submoduleCfg", attrib={"class" : "list"})

        extensions = ElementTree.SubElement(scm, "extensions")
        ElementTree.SubElement(
            ElementTree.SubElement(extensions,
                "hudson.plugins.git.extensions.impl.RelativeTargetDirectory"),
            "relativeTargetDir").text = os.path.normpath(os.path.join(workPath, self.__dir))
        # remove untracked files and stale branches
        ElementTree.SubElement(extensions,
            "hudson.plugins.git.extensions.impl.CleanCheckout")
        ElementTree.SubElement(extensions,
            "hudson.plugins.git.extensions.impl.PruneStaleBranch")
        # set git clone options
        shallow = options.get("scm.git.shallow")
        timeout = options.get("scm.git.timeout")
        if shallow is not None or timeout is not None:
            co = ElementTree.SubElement(extensions,
                    "hudson.plugins.git.extensions.impl.CloneOption")
            if shallow is not None:
                try:
                    shallow = int(shallow)
                    if shallow < 0: raise ValueError()
                except ValueError:
                    raise BuildError("Invalid 'git.shallow' option: " + str(shallow))
                if shallow > 0:
                    ElementTree.SubElement(co, "shallow").text = "true"
                    ElementTree.SubElement(co, "noTags").text = "false"
                    ElementTree.SubElement(co, "reference").text = ""
                    ElementTree.SubElement(co, "depth").text = str(shallow)
                    ElementTree.SubElement(co, "honorRefspec").text = "false"

            if timeout is not None:
                try:
                    timeout = int(timeout)
                    if timeout < 0: raise ValueError()
                except ValueError:
                    raise BuildError("Invalid 'git.timeout' option: " + str(shallow))
                if timeout > 0:
                    ElementTree.SubElement(co, "timeout").text = str(timeout)

        if isTrue(options.get("scm.ignore-hooks", "0")):
            ElementTree.SubElement(extensions,
                "hudson.plugins.git.extensions.impl.IgnoreNotifyCommit")

        return scm

    def getDirectory(self):
        return self.__dir

    def isDeterministic(self):
        return bool(self.__tag) or bool(self.__commit)

    def hasJenkinsPlugin(self):
        return True

    def callGit(self, workspacePath, *args):
        cmdLine = ['git']
        cmdLine.extend(args)
        cwd = os.path.join(workspacePath, self.__dir)
        try:
            output = subprocess.check_output(cmdLine, cwd=cwd,
                universal_newlines=True, stderr=subprocess.DEVNULL)
        except subprocess.CalledProcessError as e:
            raise BuildError("git error:\n Directory: '{}'\n Command: '{}'\n'{}'".format(
                cwd, " ".join(cmdLine), e.output.rstrip()))
        except OSError as e:
            raise BuildError("Error calling git: " + str(e))
        return output.strip()

    def status(self, workspacePath):
        status = ScmStatus()
        try:
            onCorrectBranch = False
            onTag = False
            output = self.callGit(workspacePath, 'ls-remote' ,'--get-url')
            if output != self.__url:
                status.add(ScmTaint.switched,
                    "> URL: configured: '{}', actual: '{}'".format(self.__url, output))

            if self.__commit:
                output = self.callGit(workspacePath, 'rev-parse', 'HEAD')
                if output != self.__commit:
                    status.add(ScmTaint.switched,
                        "> commit: configured: '{}', actual: '{}'".format(self.__commit, output))
            elif self.__tag:
                output = self.callGit(workspacePath, 'tag', '--points-at', 'HEAD').splitlines()
                if self.__tag not in output:
                    actual = ("'" + ", ".join(output) + "'") if output else "not on any tag"
                    status.add(ScmTaint.switched,
                        "> tag: configured: '{}', actual: {}".format(self.__tag, actual))

                # Need to check if the tag still exists. Otherwise the "git
                # log" command at the end will trip.
                try:
                    self.callGit(workspacePath, 'rev-parse', 'tags/'+self.__tag)
                    onTag = True
                except BuildError:
                    pass
            elif self.__branch:
                output = self.callGit(workspacePath, 'rev-parse', '--abbrev-ref', 'HEAD')
                if output != self.__branch:
                    status.add(ScmTaint.switched,
                        "> branch: configured: '{}', actual: '{}'".format(self.__branch, output))
                else:
                    output = self.callGit(workspacePath, 'log', '--oneline',
                        'refs/remotes/origin/'+self.__branch+'..HEAD')
                    if output:
                        status.add(ScmTaint.unpushed_main,
                            joinLines("> unpushed commits on {}:".format(self.__branch),
                                indent(output, '   ')))
                    onCorrectBranch = True

            # Check for modifications wrt. checked out commit
            output = self.callGit(workspacePath, 'status', '--porcelain')
            if output:
                status.add(ScmTaint.modified, joinLines("> modified:",
                    indent(output, '   ')))

            # The following shows all unpushed commits reachable by any ref
            # (local branches, stash, detached HEAD, etc).
            # Exclude HEAD if the configured branch is checked out to not
            # double-count them. Does not mark the SCM as dirty. Exclude the
            # configured tag too if it is checked out. Otherwise the tag would
            # count as unpushed if it is not on a remote branch.
            what = ['--all', '--not', '--remotes']
            if onCorrectBranch: what.append('HEAD')
            if onTag: what.append("tags/"+self.__tag)
            output = self.callGit(workspacePath, 'log', '--oneline', '--decorate',
                *what)
            if output:
                status.add(ScmTaint.unpushed_local,
                    joinLines("> unpushed local commits:", indent(output, '   ')))

        except BuildError as e:
            status.add(ScmTaint.error, e.slogan)

        return status

    def getAuditSpec(self):
        return ("git", self.__dir, {})

    def hasLiveBuildId(self):
        return True

    async def predictLiveBuildId(self, step):
        if self.__commit:
            return bytes.fromhex(self.__commit)

        with stepAction(step, "LS-REMOTE", self.__url, (INFO, TRACE)) as a:
            if self.__tag:
                # Annotated tags are objects themselves. We need the commit object!
                refs = ["refs/tags/" + self.__tag + '^{}', "refs/tags/" + self.__tag]
            else:
                refs = ["refs/heads/" + self.__branch]
            cmdLine = ['git', 'ls-remote', self.__url] + refs
            try:
                proc = await asyncio.create_subprocess_exec(*cmdLine,
                    stdout=asyncio.subprocess.PIPE,
                    stderr=subprocess.DEVNULL)
                try:
                    stdout, stderr = await proc.communicate()
                    rc = await proc.wait()
                finally:
                    try:
                        proc.terminate()
                    except ProcessLookupError:
                        pass
                if rc != 0:
                    a.fail("exit {}".format(rc), WARNING)
                    return None
                output = stdout.decode(locale.getpreferredencoding(False)).strip()
            except (subprocess.CalledProcessError, OSError) as e:
                a.fail("error ({})".format(e))
                return None

            # have we found anything at all?
            if not output:
                a.fail("unknown", WARNING)
                return None

            # See if we got one of our intended refs. Git is generating lines with
            # the following format:
            #
            #   <sha1>\t<refname>
            #
            # Put the output into a dict with the refname as key. Be extra careful
            # and strip out lines not matching this pattern.
            output = {
                commitAndRef[1].strip() : bytes.fromhex(commitAndRef[0].strip())
                for commitAndRef
                in (line.split('\t') for line in output.split('\n'))
                if len(commitAndRef) == 2 }
            for ref in refs:
                if ref in output: return output[ref]

            # uhh, should not happen...
            a.fail("unknown", WARNING)
            return None

    def calcLiveBuildId(self, workspacePath):
        if self.__commit:
            return bytes.fromhex(self.__commit)
        else:
            output = self.callGit(workspacePath, 'rev-parse', 'HEAD').strip()
            return bytes.fromhex(output)

    def getLiveBuildIdSpec(self, workspacePath):
        if self.__commit:
            return "=" + self.__commit
        else:
            return "g" + os.path.join(workspacePath, self.__dir)

    @staticmethod
    def processLiveBuildIdSpec(dir):
        try:
            return subprocess.check_output(["git", "rev-parse", "HEAD"],
                cwd=dir, universal_newlines=True).strip()
        except subprocess.CalledProcessError as e:
            raise BuildError("Git audit failed: " + str(e))
        except OSError as e:
            raise BuildError("Error calling git: " + str(e))
Exemple #29
0
    Schemes.S3: OutputS3,
    Schemes.GS: OutputGS,
    Schemes.SSH: OutputSSH,
    Schemes.LOCAL: OutputLOCAL,
}

# NOTE: currently there are only 3 possible checksum names:
#
#    1) md5 (LOCAL, SSH, GS);
#    2) etag (S3);
#    3) checksum (HDFS);
#
# so when a few types of outputs share the same name, we only need
# specify it once.
CHECKSUM_SCHEMA = {
    schema.Optional(RemoteLOCAL.PARAM_CHECKSUM): schema.Or(str, None),
    schema.Optional(RemoteS3.PARAM_CHECKSUM): schema.Or(str, None),
    schema.Optional(RemoteHDFS.PARAM_CHECKSUM): schema.Or(str, None),
}

TAGS_SCHEMA = {schema.Optional(str): CHECKSUM_SCHEMA}

SCHEMA = CHECKSUM_SCHEMA.copy()
SCHEMA[OutputBase.PARAM_PATH] = str
SCHEMA[schema.Optional(OutputBase.PARAM_CACHE)] = bool
SCHEMA[schema.Optional(OutputBase.PARAM_METRIC)] = OutputBase.METRIC_SCHEMA
SCHEMA[schema.Optional(OutputBase.PARAM_TAGS)] = TAGS_SCHEMA
SCHEMA[schema.Optional(OutputBase.PARAM_PERSIST)] = bool


def _get(stage, p, info, cache, metric, persist=False, tags=None):
Exemple #30
0
from datetime import datetime

import schema as sm

from .client import HttpClient, ClientError
from .provider import ExchangeRatesProvider, ExchangeRateLoadError
from ..models import Currency, Rate

_POSSIBLE_CCY = [ccy.value for ccy in Currency]

_ccy_validator = sm.Or(*_POSSIBLE_CCY)
_rate_validator = sm.And(sm.Use(float), lambda n: n >= 0)
_date_validator = sm.Regex(r"\d{4}-\d{2}-\d{2}")

_ecb_rate_scm = sm.Schema({
    "rates": {
        _ccy_validator: _rate_validator
    },
    "base": _ccy_validator,
    "date": _date_validator
})


class ECBProvider(ExchangeRatesProvider):
    API_ENDPOINT_CONFIG_NAME = "API_ENDPOINT"

    @classmethod
    def create(cls, endpoint: str):
        if not endpoint:
            raise ValueError("API endpoint is required")