def bundle_command(args, rc): from databundles.identity import Identity from databundles.identity import DatasetNumber if args.subcommand == 'new': # Remove the creator code and version. name = '-'.join(Identity.name_parts(args)[:-2]) if not os.path.exists(name): os.makedirs(name) elif not os.path.isdir(name): raise IOError("Directory already exists: "+name) config ={'identity':{ 'id': str(DatasetNumber()), 'source': args.source, 'creator': args.creator, 'dataset':args.dataset, 'subset': args.subset, 'variation': args.variation, 'revision': args.revision }} file_ = os.path.join(name, 'bundle.yaml') yaml.dump(config, file(file_, 'w'), indent=4, default_flow_style=False) bundle_file = os.path.join(os.path.dirname(__file__),'support','bundle.py') shutil.copy(bundle_file ,name )
def dependencies(self): '''Return a set of dependencies for the source packages''' from collections import defaultdict import os from databundles.identity import Identity from databundles.run import import_file if not self._dependencies: depset = defaultdict(set) for root, _, files in os.walk(self.dir_): if 'bundle.yaml' in files: rp = os.path.realpath(os.path.join(root, 'bundle.py')) mod = import_file(rp) bundle = mod.Bundle(root) deps = bundle.library.dependencies for _,v in deps.items(): ident = Identity.parse_name(v) # Remove revision #print "XXX {:50s} {:30s} {}".format(v, ident.name, ident.to_dict()) depset[bundle.identity.name].add(ident.name) self._dependencies = depset return dict(self._dependencies.items())
def bundle_deps(self,name, reverse=False): '''Dependencies for a particular bundle''' from databundles.identity import Identity ident = Identity.parse_name(name) name = ident.name out = [] all_deps = self.dependencies if reverse: out = set() def reverse_set(name): o = set() for k,v in all_deps.items(): if name in v: o.add(k) return o deps = reverse_set(name) while len(deps): out.update(deps) next_deps = set() for name in deps: next_deps.update(reverse_set(name)) deps = next_deps out = list(out) else: deps = all_deps.get(ident.name,[]) while len(deps) > 0: out += deps next_deps = [] for d in deps: if d in all_deps: next_deps += all_deps[d] deps = next_deps final = [] for n in reversed(out): if not n in final: final.append(n) return final
def create(self): """Create the database from the base SQL""" if not self.exists(): import databundles #@UnresolvedImport from databundles.orm import Dataset from databundles.identity import Identity try: script_str = os.path.join(os.path.dirname(databundles.__file__), Database.PROTO_SQL_FILE) except: # Not sure where to find pkg_resources, so this will probably # fail. from pkg_resources import resource_string #@UnresolvedImport script_str = resource_string(databundles.__name__, Database.PROTO_SQL_FILE) dir_ = os.path.dirname(self.path); if not os.path.isdir(dir_): os.makedirs(dir_) self.load_sql(script_str) # Create the Dataset s = self.session ds = Dataset(**self.bundle.config.identity) ds.name = Identity.name_str(ds) s.add(ds) s.commit() # call the post create function if self._post_create: self._post_create(self) return self
def get_ref(self,bp_id): from databundles.identity import ObjectNumber, DatasetNumber, PartitionNumber, Identity, PartitionIdentity if isinstance(bp_id, Identity): if bp_id.id_: bp_id = bp_id.id_ else: bp_id = bp_id.name # If dataset is not None, it means the file already is in the cache. dataset = None try: on = ObjectNumber.parse(bp_id) if not ( isinstance(on, DatasetNumber) or isinstance(on, PartitionNumber)): raise ValueError("Object number must be for a Dataset or Partition: {} ".format(bp_id)) dataset, partition = self._get_bundle_path_from_id(bp_id) #@UnusedVariable except: pass # Try it as a dataset name if not dataset: r = self.find(QueryCommand().identity(name = bp_id) ) if len(r) > 1: raise Exception("Got more than one result") elif len(r) == 0: r = None else: r = r.pop() if r: dataset, partition = self._get_bundle_path_from_id(r.id_) # Try the name as a partition name if not dataset: q = self.find(QueryCommand().partition(name = bp_id) ) if q: r = q.pop(0) if r: dataset, partition = self._get_bundle_path_from_id(r.id_) # No luck so far, so now try to get it from the remote library if not dataset and self.remote: import socket try: r = self.remote.find(bp_id) if r: r = r[0] if r.is_partition: dataset = r.as_dataset partition = r else: dataset = r partition = None except socket.error: self.logger.error("Connection to remote ") elif dataset: from identity import new_identity dataset = Identity(**dataset.to_dict()) partition = new_identity(partition.to_dict()) if partition else None if not dataset: return False, False return dataset, partition
def source_build(args,rc, src): '''Build a single bundle, or a set of bundles in a directory. The build process will build all dependencies for each bundle before buildng the bundle. ''' from databundles.identity import Identity from ..source.repository import new_repository repo = new_repository(rc.sourcerepo(args.name)) dir_ = None name = None if args.dir: if os.path.exists(args.dir): dir_ = args.dir name = None else: name = args.dir try: Identity.parse_name(name) except: err("Argument '{}' must be either a bundle name or a directory") return if not dir_: dir_ = rc.sourcerepo.dir def build(bundle_dir): from databundles.library import new_library from databundles.source.repository.git import GitShellService # Stash must happen before pull, and pull must happen # before the class is loaded in load_bundle, otherwize the class # can't be updated by the pull. And, we have to use the GitShell # sevice directly, because thenew_repository route will ooad the bundle gss = GitShellService(bundle_dir) if args.stash: prt("{} Stashing ", bundle_dir) gss.stash() if args.pull: prt("{} Pulling ", bundle_dir) gss.pull() # Import the bundle file from the directory bundle_class = load_bundle(bundle_dir) bundle = bundle_class(bundle_dir) l = new_library(rc.library(args.library)) if l.get(bundle.identity.vid) and not args.force: prt("{} Bundle is already in library", bundle.identity.name) return elif bundle.is_built and not args.force and not args.clean: prt("{} Bundle is already built",bundle.identity.name) return else: if args.dryrun: prt("{} Would build but in dry run ", bundle.identity.name) return repo.bundle = bundle if args.clean: bundle.clean() # Re-create after cleaning is important for something ... bundle = bundle_class(bundle_dir) prt("{} Building ", bundle.identity.name) if not bundle.run_prepare(): err("{} Prepare failed", bundle.identity.name) if not bundle.run_build(): err("{} Build failed", bundle.identity.name) if args.install and not args.dryrun: if not bundle.run_install(force=True): err('{} Install failed', bundle.identity.name) build_dirs = {} # Find all of the dependencies for the named bundle, and make those first. for root, _, files in os.walk(rc.sourcerepo.dir): if 'bundle.yaml' in files: bundle_class = load_bundle(root) bundle = bundle_class(root) build_dirs[bundle.identity.name] = root if name: deps = repo.bundle_deps(name) deps.append(name) else: deps = [] # Walk the subdirectory for the files to build, and # add all of their dependencies for root, _, files in os.walk(dir_): if 'bundle.yaml' in files: bundle_class = load_bundle(root) bundle = bundle_class(root) for dep in repo.bundle_deps(bundle.identity.name): if dep not in deps: deps.append(dep) deps.append(bundle.identity.name) for n in deps: try: dir_ = build_dirs[n] except KeyError: err("Failed to find directory for bundle {}".format(n)) prt('') prt("{} Building in {}".format(n, dir_)) build(dir_)