コード例 #1
0
ファイル: gitmanager.py プロジェクト: shaikidris/dgit
    def init(self, username, reponame, force, backend=None):
        """
        Initialize a Git repo

        Parameters
        ----------

        username, reponame : Repo name is tuple (name, reponame)
        force: force initialization of the repo even if exists
        backend: backend that must be used for this (e.g. s3)
        """
        key = self.key(username, reponame)

        # In local filesystem-based server, add a repo
        server_repodir = self.server_rootdir(username,
                                             reponame,
                                             create=False)

        # Force cleanup if needed
        if os.path.exists(server_repodir) and not force:
            raise RepositoryExists()

        if os.path.exists(server_repodir):
            shutil.rmtree(server_repodir)
        os.makedirs(server_repodir)

        # Initialize the repo
        with cd(server_repodir):
            git.init(".", "--bare")

        if backend is not None:
            backend.init_repo(server_repodir)

        # Now clone the filesystem-based repo
        repodir = self.rootdir(username, reponame, create=False)

        # Prepare it if needed
        if os.path.exists(repodir) and not force:
            raise Exception("Local repo already exists")
        if os.path.exists(repodir):
            shutil.rmtree(repodir)
        os.makedirs(repodir)

        # Now clone...
        with cd(os.path.dirname(repodir)):
            git.clone(server_repodir, '--no-hardlinks')

        url = server_repodir
        if backend is not None:
            url = backend.url(username, reponame)

        repo = Repo(username, reponame)
        repo.manager = self
        repo.remoteurl = url
        repo.rootdir = self.rootdir(username, reponame)

        self.add(repo)
        return repo
コード例 #2
0
ファイル: gitmanager.py プロジェクト: gitter-badger/dgit
    def init(self, username, reponame, force, backend=None):
        """
        Initialize a Git repo

        Parameters
        ----------

        username, reponame : Repo name is tuple (name, reponame)
        force: force initialization of the repo even if exists
        backend: backend that must be used for this (e.g. s3)
        """
        key = self.key(username, reponame)

        # In local filesystem-based server, add a repo
        server_repodir = self.server_rootdir(username,
                                             reponame,
                                             create=False)

        # Force cleanup if needed
        if os.path.exists(server_repodir) and not force:
            raise RepositoryExists()

        if os.path.exists(server_repodir):
            shutil.rmtree(server_repodir)
        os.makedirs(server_repodir)

        # Initialize the repo
        with cd(server_repodir):
            git.init(".", "--bare")

        if backend is not None:
            backend.init_repo(server_repodir)

        # Now clone the filesystem-based repo
        repodir = self.rootdir(username, reponame, create=False)

        # Prepare it if needed
        if os.path.exists(repodir) and not force:
            raise Exception("Local repo already exists")
        if os.path.exists(repodir):
            shutil.rmtree(repodir)
        os.makedirs(repodir)

        # Now clone...
        with cd(os.path.dirname(repodir)):
            git.clone(server_repodir, '--no-hardlinks')

        url = server_repodir
        if backend is not None:
            url = backend.url(username, reponame)

        repo = Repo(username, reponame)
        repo.manager = self
        repo.remoteurl = url
        repo.rootdir = self.rootdir(username, reponame)

        self.add(repo)
        return repo
コード例 #3
0
ファイル: gitmanager.py プロジェクト: gitter-badger/dgit
 def add_raw(self, repo, files):
     result = None
     with cd(repo.rootdir):
         try:
             result = self._run(["add"] + files)
         except:
             pass
コード例 #4
0
    def clone_repo(self, url, gitdir):

        if not self.url_is_valid(url):
            raise Exception("Invalid URL")

        try:
            os.makedirs(gitdir)
        except:
            pass

        print("Syncing into local directory", gitdir)
        with cd(gitdir):
            if self.client == 'aws':
                cmd = ["aws", "s3", "sync", '--delete', url + "/", "."]
            else:
                cmd = ["s3cmd", "-c", self.s3cfg, "sync", url + "/", "."]
            # print("CMD", cmd)
            output = self.run(cmd)
            #print(output)
            print("Sync'd dataset with s3")

        # Make sure that hook is has correct permissions
        hooksdir = os.path.join(gitdir, 'hooks')
        postrecv_filename = os.path.join(hooksdir, 'post-receive')
        if os.path.exists(postrecv_filename):
            self.make_hook_executable(postrecv_filename)
        else:
            self.init_repo(gitdir)
コード例 #5
0
 def add_raw(self, repo, files):
     result = None
     with cd(repo.rootdir):
         try:
             result = self._run(["add"] + files)
         except:
             pass
コード例 #6
0
ファイル: gitmanager.py プロジェクト: gitter-badger/dgit
    def _run_generic_command(self, repo, cmd):
        """
        Run a generic command within the repo. Assumes that you are
        in the repo's root directory
        """
        
        result = None
        with cd(repo.rootdir):
            # Dont use sh. It is not collecting the stdout of all
            # child processes.
            output = self._run(cmd)
            try:
                result = {
                    'cmd': cmd,
                    'status': 'success',
                    'message': output,
                }
            except Exception as e:
                result = {
                    'cmd': cmd,
                    'status': 'error',
                    'message': str(e)
                }

        return result
コード例 #7
0
def executable_commit(filename,
                      what=['commit', 'username', 'useremail', 'date']):
    mapping = {
        'commit': '%H',
        'username': '******',
        'useremail': '%ce',
        'date': '%cd'
    }

    missing = [mapping[w] for w in what if w not in mapping]
    if len(missing) > 0:
        print("Cannot gather commit attributes of executable", missing)
        raise Exception("Invalid specification")

    codes = ",".join([mapping[w] for w in what if w in mapping])

    with cd(os.path.dirname(filename)):
        cmd = 'git log -n 1  --date=iso --pretty="%s" -- %s ' % (codes,
                                                                 filename)
        output = run(cmd)
        output = output.strip()
        output = output.split(",")
        return {what[i]: output[i] for i in range(len(what))}

    return {}
コード例 #8
0
def executable_repopath(filename):

    with cd(os.path.dirname(filename)):
        cmd = 'git rev-parse --show-prefix'
        output = run(cmd)
        output = output.strip()
        return {'path': os.path.join(output, os.path.basename(filename))}
コード例 #9
0
ファイル: gitmanager.py プロジェクト: shaikidris/dgit
    def _run_generic_command(self, repo, cmd):
        """
        Run a generic command within the repo. Assumes that you are
        in the repo's root directory
        """
        
        result = None
        with cd(repo.rootdir):
            # Dont use sh. It is not collecting the stdout of all
            # child processes.
            output = self._run(cmd)
            try:
                result = {
                    'cmd': cmd,
                    'status': 'success',
                    'message': output,
                }
            except Exception as e:
                result = {
                    'cmd': cmd,
                    'status': 'error',
                    'message': str(e)
                }

        return result
コード例 #10
0
    def evaluate(self, repo, spec, args):
        """
        Check the integrity of the datapackage.json
        """

        status = []
        with cd(repo.rootdir):
            files = spec.get('files', ['*'])
            resource_files = repo.find_matching_files(files)
            files = glob2.glob("**/*")
            disk_files = [f for f in files if os.path.isfile(f) and f != "datapackage.json"]

            allfiles = list(set(resource_files + disk_files))
            allfiles.sort()

            for f in allfiles:
                if f in resource_files and f in disk_files:
                    r = repo.get_resource(f)
                    coded_sha256 = r['sha256']
                    computed_sha256 = compute_sha256(f)
                    if computed_sha256 != coded_sha256:
                        status.append({
                            'target': f,
                            'rules': "",
                            'validator': self.name,
                            'description': self.description,
                            'status': 'ERROR',
                            'message': "Mismatch in checksum on disk and in datapackage.json"
                        })
                    else:
                        status.append({
                            'target': f,
                            'rules': "",
                            'validator': self.name,
                            'description': self.description,
                            'status': 'OK',
                            'message': ""
                        })
                elif f in resource_files:
                    status.append({
                        'target': f,
                        'rules': "",
                        'validator': self.name,
                        'description': self.description,
                        'status': 'ERROR',
                        'message': "In datapackage.json but not in repo"
                    })
                else:
                    status.append({
                        'target': f,
                        'rules': "",
                        'validator': self.name,
                        'description': self.description,
                        'status': 'ERROR',
                        'message': "In repo but not in datapackage.json"
                        })


        return status
コード例 #11
0
def executable_filetype(filename):

    with cd(os.path.dirname(filename)):
        cmd = '/usr/bin/file ' + filename
        output = run(cmd)
        output = output.strip()
        output = output[output.index(":") + 1:]
        return {'filetype': output}
コード例 #12
0
    def  evaluate(self, repo, spec, force=False, args=[]): 
        """
        Evaluate an SQL query, cache the results in server
        """
        
        files = spec.get('files', [])

        if len(files) == 0: 
            # Nothing to do 
            return [] 

        db=MySQLdb.connect(host=self.host,
                           port=self.port, 
                           db=self.db,
                           user=self.username,
                           passwd=self.password)
        cur = db.cursor()

        result = []
        with cd(repo.rootdir): 
            for f in files: 
                
                cachepath = repo.cache_path(self.name, f + '.data')                
                if not force and repo.cache_check(cachepath):
                    #print("Found in cache")
                    result.append({
                        'target': f,
                        'transformer': self.name,
                        'status': 'OK',
                        'message': 'Result already cached ({})'.format(cachepath['relative'])
                    })
                    continue

                # print("Not found in cache. So executing")
                # Run the query 
                query = open(f).read()
                (info, schema, data) = self.execute(cur, query) 

                # Save the results 
                for output in [['info', info], ['schema', schema], ['data', data]]:
                    cachepath = repo.cache_path(self.name, f + "." + output[0])
                    repo.cache_write(cachepath, output[1]) 

                result.append({
                    'target': files[0],
                    'transformer': self.name,
                    'status': 'OK',
                    'message': 'Executed the query'
                })
                
        return result 
コード例 #13
0
    def delete(self, repo, args=[]):
        """
        Delete files from the repo
        """

        result = None
        with cd(repo.rootdir):
            try:
                cmd = ['rm'] + list(args)
                result = {'status': 'success', 'message': self._run(cmd)}
            except Exception as e:
                result = {'status': 'error', 'message': str(e)}

            # print(result)
            return result
コード例 #14
0
def repo_origin(filename, what=['Push  URL']):

    with cd(os.path.dirname(filename)):
        cmd = "git remote show origin"
        output = run(cmd)
        #* remote origin
        #Fetch URL: [email protected]:jaredpar/VsVim.git
        #Push  URL: [email protected]:jaredpar/VsVim.git
        #HEAD branch: master
        #Remote branches:

        response = {}
        output = output.split("\n")
        output = output[1:]
        for o in output:
            for w in what:
                if w in o:
                    response[w] = o[o.index(":") + 1:]

    return response
コード例 #15
0
ファイル: gitmanager.py プロジェクト: gitter-badger/dgit
    def delete(self, repo, args=[]):
        """
        Delete files from the repo
        """

        result = None
        with cd(repo.rootdir):
            try:
                cmd = ['rm'] + list(args)
                result = {
                    'status': 'success',
                    'message': self._run(cmd)
                }
            except Exception as e:
                result = {
                    'status': 'error',
                    'message': str(e)
                }

            # print(result)
            return result
コード例 #16
0
 def add_files(self, repo, files):
     """
     Add files to the repo
     """
     rootdir = repo.rootdir
     for f in files:
         relativepath = f['relativepath']
         sourcepath = f['localfullpath']
         if sourcepath is None:
             # This can happen if the relative path is a URL
             continue  #
         # Prepare the target path
         targetpath = os.path.join(rootdir, relativepath)
         try:
             os.makedirs(os.path.dirname(targetpath))
         except:
             pass
         # print(sourcepath," => ", targetpath)
         print("Updating: {}".format(relativepath))
         shutil.copyfile(sourcepath, targetpath)
         with cd(repo.rootdir):
             self._run(['add', relativepath])
コード例 #17
0
ファイル: gitmanager.py プロジェクト: gitter-badger/dgit
 def add_files(self, repo, files):
     """
     Add files to the repo
     """
     rootdir = repo.rootdir
     for f in files:
         relativepath = f['relativepath']
         sourcepath = f['localfullpath']
         if sourcepath is None:
             # This can happen if the relative path is a URL
             continue #
         # Prepare the target path
         targetpath = os.path.join(rootdir, relativepath)
         try:
             os.makedirs(os.path.dirname(targetpath))
         except:
             pass
         # print(sourcepath," => ", targetpath)
         print("Updating: {}".format(relativepath))
         shutil.copyfile(sourcepath, targetpath)
         with cd(repo.rootdir):
             self._run(['add', relativepath])
コード例 #18
0
    def clone(self, url, backend=None):
        """
        Clone a URL

        Parameters
        ----------

        url : URL of the repo. Supports s3://, git@, http://
        """

        # s3://bucket/git/username/repo.git
        username = self.username
        reponame = url.split("/")[-1]  # with git
        reponame = reponame.replace(".git", "")

        key = (username, reponame)

        # In local filesystem-based server, add a repo
        server_repodir = self.server_rootdir(username, reponame, create=False)

        rootdir = self.rootdir(username, reponame, create=False)

        if backend is None:
            # Backend is standard git repo (https://, git@...)
            with cd(os.path.dirname(rootdir)):
                self._run(['clone', '--no-hardlinks', url])
        else:
            # Backend is s3
            # Sync if needed.
            if not os.path.exists(server_repodir):
                # s3 -> .dgit/git/pingali/hello.git -> .dgit/datasets/pingali/hello
                backend.clone_repo(url, server_repodir)

            # After sync clone,
            with cd(os.path.dirname(rootdir)):
                self._run(['clone', '--no-hardlinks', server_repodir])

        # Insert the notes push
        if True:
            configfile = os.path.join(rootdir, '.git', 'config')
            content = open(configfile).read()
            original = "fetch = +refs/heads/*:refs/remotes/origin/*"
            replacement = """fetch = +refs/heads/*:refs/remotes/origin/*\n        fetch = +refs/notes/*:refs/notes/*"""
            if "notes" not in content:
                content = content.replace(original, replacement)
                with open(configfile, 'w') as fd:
                    fd.write(content)

            # Pull the notes if any as well..
            with cd(rootdir):
                self._run(['pull', 'origin'])

        # Insert the object into the internal table we maintain...
        r = Repo(username, reponame)
        r.rootdir = rootdir
        r.remoteurl = url
        r.manager = self

        package = os.path.join(r.rootdir, 'datapackage.json')
        packagedata = open(package).read()
        r.package = json.JSONDecoder(
            object_pairs_hook=collections.OrderedDict).decode(packagedata)

        return self.add(r)
コード例 #19
0
    def evaluate(self, repo, spec, args):
        """
        Evaluate the files identified for checksum.
        """

        status = []

        # Do we have to any thing at all? 
        if len(spec['files']) == 0: 
            return status 

        with cd(repo.rootdir):
            
            rules = None 
            if 'rules-files' in spec and len(spec['rules-files']) > 0: 
                rulesfiles = spec['rules-files']
                rules = dict([(f, json.loads(open(f).read())) for f in rulesfiles])
            elif 'rules' in spec: 
                rules = {
                    'inline': spec['rules'] 
                }
                
            if rules is None or len(rules) == 0:
                print("Regression quality validation has been enabled but no rules file has been specified")
                print("Example: { 'min-r2': 0.25 }. Put this either in file or in dgit.json")
                raise InvalidParameters("Regression quality checking rules missing")

            files = dict([(f, open(f).read()) for f in spec['files']])

            for r in rules:
                if 'min-r2' not in rules[r]:
                    continue
                minr2 = float(rules[r]['min-r2'])
                for f in files:
                    match = re.search(r"R-squared:\s+(\d.\d+)", files[f])
                    if match is None:
                        status.append({
                            'target': f,
                            'validator': self.name,
                            'description': self.description,
                            'rules': r,
                            'status': "ERROR",
                            'message': "Invalid model output"
                            })
                    else:
                        r2 = match.group(1)
                        r2 = float(r2)
                        if r2 > minr2:
                            status.append({
                                'target': f,
                                'validator': self.name,
                                'description': self.description,
                                'rules': r,
                                'status': "OK",
                                'message': "Acceptable R2"
                            })
                        else:
                            status.append({
                                'target': f,
                                'validator': self.name,
                                'description': self.description,
                                'rules': r,
                                'status': "ERROR",
                                'message': "R2 is too low"
                            })

        return status
コード例 #20
0
def repo_remote_url(filename):

    with cd(os.path.dirname(filename)):
        cmd = "git config --get remote.origin.url"
        output = run(cmd)
        return {'remote.origin.url': output.strip()}
コード例 #21
0
ファイル: gitmanager.py プロジェクト: gitter-badger/dgit
    def clone(self, url, backend=None):
        """
        Clone a URL

        Parameters
        ----------

        url : URL of the repo. Supports s3://, git@, http://
        """


        # s3://bucket/git/username/repo.git
        username = self.username
        reponame = url.split("/")[-1] # with git
        reponame = reponame.replace(".git","")

        key = (username, reponame)

        # In local filesystem-based server, add a repo
        server_repodir = self.server_rootdir(username,
                                             reponame,
                                             create=False)

        rootdir = self.rootdir(username,  reponame, create=False)


        if backend is None:
            # Backend is standard git repo (https://, git@...)
            with cd(os.path.dirname(rootdir)):
                self._run(['clone', '--no-hardlinks', url])
        else:
            # Backend is s3
            # Sync if needed.
            if not os.path.exists(server_repodir):
                # s3 -> .dgit/git/pingali/hello.git -> .dgit/datasets/pingali/hello
                backend.clone_repo(url, server_repodir)

            # After sync clone,
            with cd(os.path.dirname(rootdir)):
                self._run(['clone', '--no-hardlinks', server_repodir])


        # Insert the notes push
        if True:
            configfile = os.path.join(rootdir, '.git', 'config')
            content = open(configfile).read()
            original = "fetch = +refs/heads/*:refs/remotes/origin/*"
            replacement ="""fetch = +refs/heads/*:refs/remotes/origin/*\n        fetch = +refs/notes/*:refs/notes/*"""
            if "notes" not in content:
                content = content.replace(original, replacement)
                with open(configfile, 'w') as fd:
                    fd.write(content)

            # Pull the notes if any as well..
            with cd(rootdir):
                self._run(['pull','origin'])

        # Insert the object into the internal table we maintain...
        r = Repo(username, reponame)
        r.rootdir = rootdir
        r.remoteurl = url
        r.manager = self

        package = os.path.join(r.rootdir, 'datapackage.json')
        packagedata = open(package).read()
        r.package = json.JSONDecoder(object_pairs_hook=collections.OrderedDict).decode(packagedata)

        return self.add(r)
コード例 #22
0
    def evaluate(self, repo, spec, args):
        """
        Evaluate the files identified for checksum.
        """

        status = []

        # Do we have to any thing at all?
        if len(spec['files']) == 0:
            return status

        with cd(repo.rootdir):

            rules = None
            if 'rules-files' in spec and len(spec['rules-files']) > 0:
                rulesfiles = spec['rules-files']
                rules = {}
                for f in rulesfiles:
                    d = json.loads(open(f).read())
                    rules.update(d)
            elif 'rules' in spec:
                rules = {'inline': spec['rules']}

            if rules is None or len(rules) == 0:
                print(
                    "Regression quality validation has been enabled but no rules file has been specified"
                )
                print(
                    "Example: { 'min-r2': 0.25 }. Put this either in file or in dgit.json"
                )
                raise InvalidParameters(
                    "Regression quality checking rules missing")

            files = dict([(f, open(f).read()) for f in spec['files']])

            for r in rules:
                if 'min-r2' not in rules[r]:
                    continue
                minr2 = float(rules[r]['min-r2'])
                for f in files:
                    match = re.search(r"R-squared:\s+(\d.\d+)", files[f])
                    if match is None:
                        status.append({
                            'target': f,
                            'validator': self.name,
                            'description': self.description,
                            'rules': r,
                            'status': "ERROR",
                            'message': "Invalid model output"
                        })
                    else:
                        r2 = match.group(1)
                        r2 = float(r2)
                        if r2 > minr2:
                            status.append({
                                'target': f,
                                'validator': self.name,
                                'description': self.description,
                                'rules': r,
                                'status': "OK",
                                'message': "Acceptable R2"
                            })
                        else:
                            status.append({
                                'target': f,
                                'validator': self.name,
                                'description': self.description,
                                'rules': r,
                                'status': "ERROR",
                                'message': "R2 is too low"
                            })

        return status