def toDatasets(dsstrs, delim=r'\s', eqdelim='='): out = [] delim = re.compile(r'[%s]' % delim) for dsstr in dsstrs: args = delim.split(dsstr) type = filter(lambda a: a.find(eqdelim) < 0, args) if len(type) > 1: fail("Dataset with multiple types specified: %s", dsstr) if len(type) == 0: type = ["unknown"] ds = Dataset(type[0]) ds.ids = {} for arg in args: if arg.find(eqdelim) < 0: continue parts = arg.split(eqdelim, 1) try: parts[1] = int(parts[1]) except ValueError: pass ds.ids[parts[0]] = parts[1] out.append(ds) return out
def toDatasets(lines, ctrl, intids=None): if not isinstance(lines, list): lines = [lines] if intids is None: intids = ctrl["intids"] out = [] if ctrl["format"]: for line in lines: dataset = ctrl["format"].parse(line) out.append(dataset) else: for line in lines: if ctrl["iddelim"]: args = line.split(ctrl["iddelim"]) else: args = line.split() type = filter(lambda a: a.find(ctrl["eqdelim"]) < 0, args) if len(type) > 1: raise ValueError("Multiple dataset types given: " + " ".join(type)) if len(type) == 0: type = ["unknown"] ds = Dataset(type[0]) ds.ids = {} for arg in args: if arg.find(ctrl["eqdelim"]) < 0: continue parts = arg.split(ctrl["eqdelim"], 1) ds.ids[parts[0]] = parts[1] if intids: # convert values of selected to integers for idname in intids: if ds.ids.has_key(idname): try: ds.ids[idname] = int(ds.ids[idname]) except ValueError, ex: raise ValueError("ID %s value is not an int: %s" % (idname, ds.ids[idname])) out.append(ds)
def _determineJobIdentity(self, outputs, inputs=None): # return an identifier for the job implied by the outputs and inputs. # this identifier is returned in the form of a Dataset type (even # though, semantically, it represents a job. if inputs is None: inputs = [] if self.jobIdConf: # determine our template dataset for our identity template = None if self.jobIdConf.exists("templateType"): # find first dataset (in output, then input) matching # this dataset type. type = self.jobIdConf.getString("templateType") selecttype = lambda d: d.type == type template = filter(selecttype, outputs) if len(template) == 0: template = filter(selecttype, inputs) if len(template) > 0: template = template[0] if not template: # default to the first output (then input) dataset template = len(outputs) > 0 and outputs[0] or inputs[0] out = Dataset(template.type) if self.jobIdConf.exists("type"): out.type = self.jobIdConf.getString("type") if self.jobIdConf.exists("id"): out.ids = {} for id in self.jobIdConf.getStringArray("id"): out.ids[id] = template.ids[id] # the identity dataset is complete return out elif len(outputs) > 0: return outputs[0] elif len(inputs) > 0: return inputs[0] else: return Dataset("unknown")
def parse(self, line): m = self.search(line) data = m.groupdict() tp = "unknown" if data.has_key("type"): tp = data["type"] out = Dataset(tp) del data["type"] out.ids = data for key in out.ids.keys(): if self.ids[key] == 'i': try: out.ids[key] = int(out.ids[key]) except ValueError, e: warn("Value is not an integer: %s", out.ids[key]) elif self.ids[key] == 'f': try: out.ids[key] = float(out.ids[key]) except ValueError, e: warn("Value is not a floating-point number: %s", out.ids[key])