def store_path(self, artifact, path, name=None, checksum=True, max_objects=None): self.init_boto() bucket, key = self._parse_uri(path) max_objects = max_objects or DEFAULT_MAX_OBJECTS if not checksum: return [ArtifactManifestEntry(name or key, path, digest=path)] objs = [self._s3.Object(bucket, key)] start_time = None multi = False try: objs[0].load() except self._botocore.exceptions.ClientError as e: if e.response['Error']['Code'] == "404": multi = True start_time = time.time() termlog('Generating checksum for up to %i objects with prefix "%s"... ' % (max_objects, key), newline=False) objs = self._s3.Bucket(bucket).objects.filter(Prefix=key).limit(max_objects) else: raise CommError("Unable to connect to S3 (%s): %s" % (e.response['Error']['Code'], e.response['Error']['Message'])) # Weird iterator scoping makes us assign this to a local function size = self._size_from_obj entries = [self._entry_from_obj(obj, path, name, prefix=key, multi=multi) for obj in objs if size(obj) > 0] if start_time is not None: termlog('Done. %.1fs' % (time.time() - start_time), prefix=False) if len(entries) >= max_objects: raise ValueError('Exceeded %i objects tracked, pass max_objects to add_reference' % max_objects) return entries
def store_path(self, artifact, path, name=None, checksum=True, max_objects=None): url = urlparse(path) local_path = '%s%s' % (url.netloc, url.path) max_objects = max_objects or DEFAULT_MAX_OBJECTS # We have a single file or directory # Note, we follow symlinks for files contained within the directory entries = [] if checksum == False: return [ArtifactManifestEntry(name or os.path.basename(path), path, digest=path)] if os.path.isdir(local_path): i = 0 start_time = time.time() termlog('Generating checksum for up to %i files in "%s"...' % (max_objects, local_path), newline=False) for root, dirs, files in os.walk(local_path): for sub_path in files: i += 1 if i >= max_objects: raise ValueError('Exceeded %i objects tracked, pass max_objects to add_reference' % max_objects) entry = ArtifactManifestEntry(os.path.basename(sub_path), os.path.join(path, sub_path), size=os.path.getsize(sub_path), digest=md5_file_b64(sub_path)) entries.append(entry) termlog('Done. %.1fs' % (time.time() - start_time), prefix=False) elif os.path.isfile(local_path): name = name or os.path.basename(local_path) entry = ArtifactManifestEntry(name, path, size=os.path.getsize(local_path), digest=md5_file_b64(local_path)) entries.append(entry) else: # TODO: update error message if we don't allow directories. raise ValueError('Path "%s" must be a valid file or directory path' % path) return entries
def check_anonymous(self): # If there's no API key set, ask if the run should be logged anonymously. Only launch this prompt in # environments with a tty. if not self.api.api_key and sys.stdin.isatty(): # Require anonymous mode to be explicitly enabled for now if os.environ.get(env.ANONYMOUS) != "enable": return False termlog( 'No API key found. Would you like to log runs anonymously to {}? (y/n)' .format(self.api.app_url)) resp = str(input().lower().strip()) while not (resp == 'y' or resp == 'n'): termlog('Invalid response. Please enter y/n.') resp = str(input()).lower().strip() if resp == 'y': key = self.api.create_anonymous_api_key() url = self.api.app_url + '/login?apiKey={}'.format(key) termlog( 'Your anonymous login link: {}. Do not share or lose this link!' .format(url)) os.environ[env.API_KEY] = key self.api.set_setting('anonymous', True) util.write_netrc(self.api.api_url, "user", key) util.write_settings(settings=self.api.settings()) self.api.reauth() return True return False
def add_dir(self, local_path, name=None): self._ensure_can_add() if not os.path.isdir(local_path): raise ValueError('Path is not a directory: %s' % local_path) termlog('Adding directory to artifact (%s)... ' % os.path.join('.', os.path.normpath(local_path)), newline=False) start_time = time.time() paths = [] for dirpath, _, filenames in os.walk(local_path, followlinks=True): for fname in filenames: physical_path = os.path.join(dirpath, fname) logical_path = os.path.relpath(physical_path, start=local_path) if name is not None: logical_path = os.path.join(name, logical_path) paths.append((logical_path, physical_path)) def add_manifest_file(log_phy_path): logical_path, physical_path = log_phy_path self._manifest.add_entry( ArtifactManifestEntry( logical_path, None, digest=md5_file_b64(physical_path), size=os.path.getsize(physical_path), local_path=physical_path ) ) import multiprocessing.dummy # this uses threads NUM_THREADS = 8 pool = multiprocessing.dummy.Pool(NUM_THREADS) pool.map(add_manifest_file, paths) pool.close() pool.join() termlog('Done. %.1fs' % (time.time() - start_time), prefix=False)
def store_path(self, artifact, path, name=None, checksum=True, max_objects=None): self.init_gcs() bucket, key = self._parse_uri(path) max_objects = max_objects or DEFAULT_MAX_OBJECTS if checksum == False: return [ArtifactManifestEntry(name or key, path, digest=path)] start_time = None obj = self._client.bucket(bucket).get_blob(key) multi = obj is None if multi: start_time = time.time() termlog('Generating checksum for up to %i objects with prefix "%s"... ' % (max_objects, key), newline=False) objects = self._client.bucket(bucket).list_blobs(prefix=key, max_results=max_objects) else: objects = [obj] entries = [self._entry_from_obj(obj, path, name, prefix=key, multi=multi) for obj in objects] if start_time is not None: termlog('Done. %.1fs' % (time.time() - start_time), prefix=False) if len(entries) >= max_objects: raise ValueError('Exceeded %i objects tracked, pass max_objects to add_reference' % max_objects) return entries