Esempio n. 1
0
    def store_path(self, artifact, path, name=None, checksum=True, max_objects=None):
        self.init_boto()
        bucket, key = self._parse_uri(path)
        max_objects = max_objects or DEFAULT_MAX_OBJECTS
        if not checksum:
            return [ArtifactManifestEntry(name or key, path, digest=path)]

        objs = [self._s3.Object(bucket, key)]
        start_time = None
        multi = False
        try:
            objs[0].load()
        except self._botocore.exceptions.ClientError as e:
            if e.response['Error']['Code'] == "404":
                multi = True
                start_time = time.time()
                termlog('Generating checksum for up to %i objects with prefix "%s"... ' % (max_objects, key), newline=False)
                objs = self._s3.Bucket(bucket).objects.filter(Prefix=key).limit(max_objects)
            else:
                raise CommError("Unable to connect to S3 (%s): %s" % (e.response['Error']['Code'], e.response['Error']['Message']))

        # Weird iterator scoping makes us assign this to a local function
        size = self._size_from_obj
        entries = [self._entry_from_obj(obj, path, name, prefix=key, multi=multi) for obj in objs if size(obj) > 0]
        if start_time is not None:
            termlog('Done. %.1fs' % (time.time() - start_time), prefix=False)
        if len(entries) >= max_objects:
            raise ValueError('Exceeded %i objects tracked, pass max_objects to add_reference' % max_objects)
        return entries
Esempio n. 2
0
    def store_path(self, artifact, path, name=None, checksum=True, max_objects=None):
        url = urlparse(path)
        local_path = '%s%s' % (url.netloc, url.path)
        max_objects = max_objects or DEFAULT_MAX_OBJECTS
        # We have a single file or directory
        # Note, we follow symlinks for files contained within the directory
        entries = []
        if checksum == False:
            return [ArtifactManifestEntry(name or os.path.basename(path), path, digest=path)]

        if os.path.isdir(local_path):
            i = 0
            start_time = time.time()
            termlog('Generating checksum for up to %i files in "%s"...' % (max_objects, local_path), newline=False)
            for root, dirs, files in os.walk(local_path):
                for sub_path in files:
                    i += 1
                    if i >= max_objects:
                        raise ValueError('Exceeded %i objects tracked, pass max_objects to add_reference' % max_objects)
                    entry = ArtifactManifestEntry(os.path.basename(sub_path),
                        os.path.join(path, sub_path), size=os.path.getsize(sub_path), digest=md5_file_b64(sub_path))
                    entries.append(entry)
            termlog('Done. %.1fs' % (time.time() - start_time), prefix=False)
        elif os.path.isfile(local_path):
            name = name or os.path.basename(local_path)
            entry = ArtifactManifestEntry(name, path, size=os.path.getsize(local_path), digest=md5_file_b64(local_path))
            entries.append(entry)
        else:
            # TODO: update error message if we don't allow directories.
            raise ValueError('Path "%s" must be a valid file or directory path' % path)
        return entries
Esempio n. 3
0
 def check_anonymous(self):
     # If there's no API key set, ask if the run should be logged anonymously. Only launch this prompt in
     # environments with a tty.
     if not self.api.api_key and sys.stdin.isatty():
         # Require anonymous mode to be explicitly enabled for now
         if os.environ.get(env.ANONYMOUS) != "enable":
             return False
         termlog(
             'No API key found. Would you like to log runs anonymously to {}? (y/n)'
             .format(self.api.app_url))
         resp = str(input().lower().strip())
         while not (resp == 'y' or resp == 'n'):
             termlog('Invalid response. Please enter y/n.')
             resp = str(input()).lower().strip()
         if resp == 'y':
             key = self.api.create_anonymous_api_key()
             url = self.api.app_url + '/login?apiKey={}'.format(key)
             termlog(
                 'Your anonymous login link: {}. Do not share or lose this link!'
                 .format(url))
             os.environ[env.API_KEY] = key
             self.api.set_setting('anonymous', True)
             util.write_netrc(self.api.api_url, "user", key)
             util.write_settings(settings=self.api.settings())
             self.api.reauth()
             return True
     return False
Esempio n. 4
0
    def add_dir(self, local_path, name=None):
        self._ensure_can_add()
        if not os.path.isdir(local_path):
            raise ValueError('Path is not a directory: %s' % local_path)

        termlog('Adding directory to artifact (%s)... ' %
            os.path.join('.', os.path.normpath(local_path)), newline=False)
        start_time = time.time()

        paths = []
        for dirpath, _, filenames in os.walk(local_path, followlinks=True):
            for fname in filenames:
                physical_path = os.path.join(dirpath, fname)
                logical_path = os.path.relpath(physical_path, start=local_path)
                if name is not None:
                    logical_path = os.path.join(name, logical_path)
                paths.append((logical_path, physical_path))

        def add_manifest_file(log_phy_path):
            logical_path, physical_path = log_phy_path
            self._manifest.add_entry(
                ArtifactManifestEntry(
                    logical_path,
                    None,
                    digest=md5_file_b64(physical_path),
                    size=os.path.getsize(physical_path),
                    local_path=physical_path
                )
            )

        import multiprocessing.dummy  # this uses threads
        NUM_THREADS = 8
        pool = multiprocessing.dummy.Pool(NUM_THREADS)
        pool.map(add_manifest_file, paths)
        pool.close()
        pool.join()

        termlog('Done. %.1fs' % (time.time() - start_time), prefix=False)
Esempio n. 5
0
    def store_path(self, artifact, path, name=None, checksum=True, max_objects=None):
        self.init_gcs()
        bucket, key = self._parse_uri(path)
        max_objects = max_objects or DEFAULT_MAX_OBJECTS

        if checksum == False:
            return [ArtifactManifestEntry(name or key, path, digest=path)]
        start_time = None
        obj = self._client.bucket(bucket).get_blob(key)
        multi = obj is None
        if multi:
            start_time = time.time()
            termlog('Generating checksum for up to %i objects with prefix "%s"... ' % (max_objects, key), newline=False)
            objects = self._client.bucket(bucket).list_blobs(prefix=key, max_results=max_objects)
        else:
            objects = [obj]

        entries = [self._entry_from_obj(obj, path, name, prefix=key, multi=multi) for obj in objects]
        if start_time is not None:
            termlog('Done. %.1fs' % (time.time() - start_time), prefix=False)
        if len(entries) >= max_objects:
            raise ValueError('Exceeded %i objects tracked, pass max_objects to add_reference' % max_objects)
        return entries