def store_datasets(dataset_names, remove_local = False, elements = None): datasetnames = dataset_names if len(args.datasets)>5: if not args.all: print tc.colored( "%d datasets, showing first 5, use --all to show all" % len(args.datasets), "red", "on_white") datasetnames = args.datasets[:5] for fname in datasetnames: print " DATASET: %s" % tc.colored(fname, attrs=["bold"]) setref = GeneralData.create_data_object(fname) if setref == None: continue setref.put("_data.warehouse.types", setref.get_types()) if elements: setref.put("_data.warehouse.elements", elements) # populate_region may rely on the elements if hasattr(setref, "populate_region"): setref.populate_region() pkg = package_class(setref=setref) setref.put("_data.warehouse.store_path", pkg.get_store_path(setref, elements = elements)) setref.put("_data.warehouse.store_dir", os.path.dirname(pkg.get_store_path(setref))) print " TYPES: %s" % tc.colored(", ".join( setref.get_types() ) , "blue", "on_white") print "STORE_KEY: %s" % pkg.get_store_path(setref) print "STORE_DIR: %s" % os.path.dirname(pkg.get_store_path(setref)) setref.do_write_header() if args.store or args.archive: pkg.transport_to_warehouse(remove_local = remove_local)
def sample_shape(self, line): args = line.split() options = parser.parse_args(args = args) phrase = options.phrase print "Looking for '%s' in fieldname" % (phrase) mdb = MDBStorage(collection_name = "shapes") reg = ".*?%s.*?" % phrase cursor = mdb.collection.find({"geojson.feature.properties.fieldname": {"$regex": reg} } ) numfound = cursor.count() if numfound == 0: print tc.colored("NO SHAPES MATCH", "red") return None print "numfound = %d selecting 0'th" % numfound if options.verbose: outd = [] for shape in cursor: outd.append(shape["_id"]) print ks.dict2pretty("shapes found", outd) hbshape = HBShapeData({"setref":cursor[0]}) addn = {"hbshape": hbshape, "shape_id": hbshape.meta("_id"), "loaded_from_mongo":True} addnmsg = {"hbshape":type(hbshape), "shape_id": addn["shape_id"], "loaded_from_mongo":True} print ks.dict2pretty("adding to namespace", addnmsg) self.ipython.push(addn) return None
def terminal_string(pl): lines = [] for i in range(len(pl.parts)): part = pl.parts[i] line = "".join([ksutil._pad_str( str(part.terminal()), 7), ksutil._pad_str( str(part.key), 10), ksutil._pad_str( str(part.struct),20), "key is %s"%type(part.key) ]) if part.terminal(): line = tc.colored(line, color = None, on_color = "on_yellow", attrs=[]) line += tc.colored(" ", color = None, on_color = None) lines.append(line) return "\n".join(lines)
def pretty_string(self, start_indent=0): retstr = "" filename = "%s" % tc.colored( "filename :", attrs=["bold"]) + " %s/%s" % ( self.dirname, tc.colored(self.basename, attrs=["bold"])) datatypes = "%s" % (tc.colored("data types:", attrs=["bold"]) + " %s" % repr(self.get_types())) reprstring = "%s" % (tc.colored("data_obj :", attrs=["bold"]) + " %s" % type(self)) retstr += "%s%s" % (ksutil.calc_fulltab(start_indent), filename) retstr += "\n%s%s" % (ksutil.calc_fulltab(start_indent), datatypes) retstr += "\n%s%s" % (ksutil.calc_fulltab(start_indent), reprstring) return retstr
def terminal_string(pl): lines = [] for i in range(len(pl.parts)): part = pl.parts[i] line = "".join([ ksutil._pad_str(str(part.terminal()), 7), ksutil._pad_str(str(part.key), 10), ksutil._pad_str(str(part.struct), 20), "key is %s" % type(part.key) ]) if part.terminal(): line = tc.colored(line, color=None, on_color="on_yellow", attrs=[]) line += tc.colored(" ", color=None, on_color=None) lines.append(line) return "\n".join(lines)
def pretty_string(self, start_indent = 0): retstr = "" filename = "%s" % tc.colored("filename :", attrs=["bold"]) + " %s/%s" % (self.dirname, tc.colored( self.basename, attrs=["bold"]) ) datatypes = "%s" % (tc.colored("data types:", attrs=["bold"]) + " %s" % repr(self.get_types())) reprstring = "%s" % (tc.colored("data_obj :", attrs=["bold"]) + " %s" % type(self)) retstr += "%s%s" % (ksutil.calc_fulltab(start_indent) , filename) retstr += "\n%s%s" % (ksutil.calc_fulltab(start_indent) , datatypes) retstr += "\n%s%s" % (ksutil.calc_fulltab(start_indent) , reprstring) return retstr
def sample_shape(self, line): args = line.split() options = parser.parse_args(args=args) phrase = options.phrase print "Looking for '%s' in fieldname" % (phrase) mdb = MDBStorage(collection_name="shapes") reg = ".*?%s.*?" % phrase cursor = mdb.collection.find( {"geojson.feature.properties.fieldname": { "$regex": reg }}) numfound = cursor.count() if numfound == 0: print tc.colored("NO SHAPES MATCH", "red") return None print "numfound = %d selecting 0'th" % numfound if options.verbose: outd = [] for shape in cursor: outd.append(shape["_id"]) print ks.dict2pretty("shapes found", outd) hbshape = HBShapeData({"setref": cursor[0]}) addn = { "hbshape": hbshape, "shape_id": hbshape.meta("_id"), "loaded_from_mongo": True } addnmsg = { "hbshape": type(hbshape), "shape_id": addn["shape_id"], "loaded_from_mongo": True } print ks.dict2pretty("adding to namespace", addnmsg) self.ipython.push(addn) return None
def showInputs(self, rc): # log.fullinfo("helloWorld") from astrodata.adutils import termcolor as tc inps = rc.get_inputs(); # print "primitives_NOVEM: JSONPrimitives.helloWorld(..)" log.status("%d inputs" % len(inps)) i = 0 for inp in inps: if rc["use_repr"]: tstr = repr(inp.json) else: tstr = inp.pretty_string() i += 1 log.status(tc.colored("#%d"%i, "grey", "on_white")) log.status(tstr) log.debug(inp.pretty_setref()) yield rc
def showInputs(self, rc): # log.fullinfo("helloWorld") from astrodata.adutils import termcolor as tc inps = rc.get_inputs() # print "primitives_NOVEM: JSONPrimitives.helloWorld(..)" log.status("%d inputs" % len(inps)) i = 0 for inp in inps: if rc["use_repr"]: tstr = repr(inp.json) else: tstr = inp.pretty_string() i += 1 log.status(tc.colored("#%d" % i, "grey", "on_white")) log.status(tstr) log.debug(inp.pretty_setref()) yield rc
def ingestion_loop_iterator(watch): done = False while not done: result = None watchdue = watch.is_due_for_check() if watchdue: sys.stdout.write(tc.colored("\rchecking at %s (d_p243)%s" % (datetime.now(), ""), "blue") ) # "\033[J"), sys.stdout.flush() result = watch.do_check() #print "d_p58: due?", watchdue if result: yield {"state":"watching", "was_due":watchdue, "prefix":watch.prefix, "result":result } else: yield None yield {"state":"exiting"}
def write(self, suffix=None, ** args): # make our filename relative to the output dir oldname = self.filename outname = os.path.join(self.output_directory, self.basename) self.filename = outname newname = self.filename if suffix: newname = self.add_suffix(suffix) if os.path.exists(newname): if ("be_quiet" not in args) or (args["be_quiet"] != True): print "(gd225) %s already exists" % newname # check exists_policy if self.allow_extant_write(): # e.g.: SetrefData simply moves the extant out of the way using a ";N" postfix if ("be_quiet" not in args) or (args["be_quiet"] != True): print " but %s allows extant write" % tc.colored(repr(type(self)), attrs=["bold"]) else: raise GD_OperationNotAllowed("General Data does not allow overwriting data by default.") self.do_write(newname) self._saved = True return True
def dict2pretty(name, var, indent=0, namewidth = None, complete = False, say_type = None): #retstr = pformat(var) retstr = u"" fulltab = calc_fulltab(indent) tabspc = calc_fulltab(1) _o_namewidth = namewidth if not namewidth: namewidth = len(str(name)) if isinstance(var, dict): retstr += "\n%(indent)s%(key)s %(type)s:" % { "indent":fulltab, "key": tc.colored(name, attrs=["bold"]), "type": tc.colored(repr(type(var)), attrs=["dark"]), "extra": tabspc } sub_namewidth = maxkeylen(var) #print "ks19: sub_nw=", sub_namewidth if len(var) == 0: retstr += "\n%(indent)s%(tab)s:::empty:::" % {"indent":fulltab, "tab":tabspc } keys = var.keys() keys.sort() for key in keys: value = var[key] #print key,value newstr = dict2pretty(key, value, indent+1, namewidth = sub_namewidth ) #print "ks28: indent =", indent, namewidth, _o_namewidth #print "ks31: newstr", newstr retstr += newstr elif isinstance(var, list): retstr += "\n%(indent)s%(key)s %(type)s:" % { "indent":fulltab, "key": tc.colored(name, attrs=["bold"]), "type": tc.colored(repr(type(var)), attrs=["dark"]), } listlen = len(var) if len(var) < 50: allstr = True reprline = [] for v in var: reprline.append( repr(v)) if not isinstance(v, basestring): allstr = False if allstr: oneline = ", ".join(var) else: oneline = ", ".join(reprline) if len(oneline)<120: return dict2pretty(name, oneline, indent, say_type = type(var), namewidth = namewidth) if listlen > 10 and not complete == True: last = listlen - 1 mid = int(last/2); retstr += dict2pretty("[0]", var[0], indent+1, namewidth = namewidth) retstr += dict2pretty("[%d]"%mid, var[mid],indent+1, namewidth = namewidth) retstr += dict2pretty("[%d]"%last, var[last], indent+1, namewidth = namewidth) else: for i in range(0, listlen): key = "[%d]"%i value = var[i]; if hasattr(value, "pretty_string"): retstr += tc.colored("\n[%d]" % i, attrs=["bold"]) retstr += value.pretty_string(start_indent = indent+1) else: retstr += dict2pretty(key, value, indent+1, namewidth = namewidth) else: if say_type: stype = repr(say_type) else: vtype = type(var) if vtype.__name__ not in dir(__builtins__): stype ="<object>" stype = repr(type(var)) else: stype = repr(type(var)) if isinstance(var, basestring): pvar = var.strip() else: pvar = repr(var) retstr += "\n%(indent)s%(key)s = %(val)s %(type)s" % { "indent": fulltab, "key": tc.colored( _pad_str(name, namewidth) , attrs=["bold"]), "type": tc.colored( _pad_str(stype, len(stype)), attrs=["dark"]), "val": unicode(_pad_str(pvar,20), errors="replace") } if indent == 0: retstr = retstr.strip() return retstr
def do_check(self): ### get files now = datetime.now() self._last_check = now ret = {} slist = self.ingest_package.get_store_list(elements = self.source["elements"]) for filnam in slist: basename = os.path.basename(filnam) if len(basename)>0: if "transferred_files" not in ret: ret["transferred_files"] = [] indivpkg = self.ingest_package.__class__(storename = filnam) indivpkg.deliver_from_warehouse(move = "_ingested") ret["transferred_files"].append(filnam) ## RUN COMMANDS # if document is a known type (see warehouse_daemon.py in lookups) xfers = None if "transferred_files" in ret: xfers = ret["transferred_files"] if xfers: if "commands" in self.source: commands = self.source["commands"] for command in commands: patt = command["file_pattern"] for filename in xfers: if re.match(patt, filename): if False: # if command["clean_working_directory"]: # @@WARN: possibly dangerous, Ideally isolate # the daemon with it's own account/permisions rmcontents = os.getcwd() shutil.rmtree(rmcontents) os.mkdir(rmcontent) for command_line in command["command_lines"]: vargs = {"dataset": os.path.basename(filename), "context": ConfigSpace.get_current_default_context() } command_line = command_line.format(**vargs) print tc.colored("-"*(len(command_line)+len("running:")+2), None, "on_green") print tc.colored("running:", None, "on_green"), "%s" % command_line print tc.colored("-"*(len(command_line)+len("running:")+2), None, "on_green") cmdparts = command_line.split() # check parts you want to glob convparts = [] for part in cmdparts: if "*" in part: convparts.extend(glob(part)) else: convparts.append(part) exit_code = subprocess.call(convparts) print " exit_code = %s" % exit_code if command["clean_working_directory"]: # @@WARN: possibly dangerous, Ideally isolate # the daemon with it's own account/permisions rmcontents = os.getcwd() clear_directory(rmcontents) if not ret: return None else: return ret
def sample_tiff(self, line): import glob args = line.split() options = parser.parse_args(args=args) date_r = options.date_range phrase = options.phrase verbose = options.verbose index = int(options.index) settype = options.settype make_list = options.make_list directory = options.dir canddict = {} # options.verbose used below print "Using date_range = %s and phrase = %s" % (date_r, phrase) globpart = ( "*%(datestr)s*%(phrase)s*.tif" % { "datestr": date_r, # can't be range atm "phrase": phrase }) if not options.cwd: dirs = self.get_config_paths() datadir = dirs["processed_data"] % os.environ else: datadir = os.path.abspath(os.curdir) print "Data Directory: %s" % datadir if options.subdir: datadir = os.path.join(datadir, options.subdir) globpath = os.path.join(datadir, globpart) fils = glob.glob(globpath) if len(fils) == 0: print tc.colored("globbed: %s" % globpath, "blue") print tc.colored("NO FILES MATCH", "red") return None hbgeolist = [] for fil in fils: canddict[fil] = {"path": fil, "basename": os.path.basename(fil)} if make_list: hbgeolist.append(HBGeoTIFF(fil)) if settype or verbose: for fil in fils: dat = HBGeoTIFF(fil) canddict[fil]["types"] = dat.get_types() dat = None if verbose: print ks.dict2pretty("found", [{ "name": canddict[fil]["basename"], "types": canddict[fil]["types"] } for fil in fils]) print "found %d files, choosing image index = %d" % (len(fils), index) fil = fils[index] hbgeo = HBGeoTIFF(fil) ipython = self.ipython addn = {"hbgeo": hbgeo, "tiffname": fil} addn["hbgeos"] = hbgeolist addnmsg = {"hbgeo": type(hbgeo), "tiffname": os.path.basename(fil)} print ks.dict2pretty("adding to namespace", addnmsg) ipython.push(addn) return None
def sample_tiff(self, line): import glob args = line.split() options = parser.parse_args(args = args) date_r = options.date_range phrase = options.phrase verbose = options.verbose index = int(options.index) settype = options.settype make_list = options.make_list directory = options.dir canddict = {} # options.verbose used below print "Using date_range = %s and phrase = %s" % (date_r, phrase) globpart = ("*%(datestr)s*%(phrase)s*.tif" % { "datestr":date_r, # can't be range atm "phrase":phrase } ) if not options.cwd: dirs = self.get_config_paths() datadir = dirs["processed_data"]% os.environ else: datadir = os.path.abspath(os.curdir) print "Data Directory: %s" % datadir if options.subdir: datadir = os.path.join(datadir, options.subdir) globpath = os.path.join(datadir, globpart) fils = glob.glob(globpath) if len(fils) == 0: print tc.colored("globbed: %s" %globpath, "blue") print tc.colored("NO FILES MATCH", "red") return None hbgeolist = [] for fil in fils: canddict[fil] = { "path": fil, "basename": os.path.basename(fil) } if make_list: hbgeolist.append(HBGeoTIFF(fil)) if settype or verbose: for fil in fils: dat = HBGeoTIFF(fil) canddict[fil]["types"] = dat.get_types() dat = None if verbose: print ks.dict2pretty("found", [ { "name" :canddict[fil]["basename"], "types":canddict[fil]["types"] } for fil in fils]) print "found %d files, choosing image index = %d" % (len(fils), index) fil = fils[index] hbgeo = HBGeoTIFF(fil) ipython = self.ipython addn = {"hbgeo": hbgeo, "tiffname": fil} addn["hbgeos"] = hbgeolist addnmsg = {"hbgeo":type(hbgeo), "tiffname": os.path.basename(fil)} print ks.dict2pretty("adding to namespace", addnmsg) ipython.push(addn) return None
remove_local = False if args.remove_local != None: remove_local = args.remove_local for key in package_class_struct: package_key = key package_class = package_class_struct[key] break; # only one supported atm, always first, controlled by path order if args.info: print ks.dict2pretty("contributing files", package_classes["_contributors"]) for i in range(len(package_class_list)): package_def = package_class_list[i] print ks.dict2pretty("packager #%d" % i, package_def) print "choosing %s package class %s" % (tc.colored(key, attrs=["bold"]), tc.colored(package_class, attrs=["dark"])) pkg = package_class() print ks.dict2pretty("shelf_addresses", pkg.shelf_addresses) print ks.dict2pretty("type_shelf_names", pkg.type_shelf_names) print ks.dict2pretty("type_store_precedence", pkg.type_store_precedence) print ks.dict2pretty("daemon_settings: ingest_sources", dp.ingest_sources) if args.fetch : args.manifest = True if args.manifest: elements = { "shelf_name" : args.shelf, "out_shelf_name": args.out_shelf, } pkg = package_class() if args.date_range: