Ejemplo n.º 1
0
 def start_element(self, name, attrs):
     """
     Function called by the parser every time a new element starts
     """
     # Keep track if we are parsing the main job element
     if name == "mainjob":
         self._parsing_main_job = True
     if name == "machine":
         self._parsing_machine = True
     # Keep track if we are inside one of the job elements
     if (name == "setup" or name == "prejob" or name == "mainjob"
             or name == "postjob" or name == "cleanup"):
         self._parsing_job_element = True
     if name == "argument-vector" and name in self._ks_elements:
         # Start parsing arguments
         self._parsing_arguments = True
     elif name == "cwd" and name in self._ks_elements:
         # Start parsing cwd
         self._parsing_cwd = True
     elif name == "checksum" and name in self._ks_elements:
         # PM-1180 <checksum type="sha256" value="f2307670158c64c4407971f8fad67772724b0bad92bfb48f386b0814ba24e3af"/>
         self._keys[name] = {}
         for attr_name in self._ks_elements[name]:
             if attr_name in attrs:
                 self._keys[name][attr_name] = attrs[attr_name]
     elif name == "data":
         # Start parsing data for stdout and stderr output
         self._parsing_data = True
     elif name == "file" and name in self._ks_elements:
         if self._parsing_main_job == True:
             # Special case for name inside the mainjob element (will change this later)
             for my_element in self._ks_elements[name]:
                 if my_element in attrs:
                     self._keys[my_element] = attrs[my_element]
     elif name == "ram" and name in self._ks_elements:
         if self._parsing_machine == True:
             # Special case for ram inside the machine element (will change this later)
             for my_element in self._ks_elements[name]:
                 if my_element in attrs:
                     self._keys[my_element] = attrs[my_element]
     elif name == "uname" and name in self._ks_elements:
         if self._parsing_machine == True:
             # Special case for uname inside the machine element (will change this later)
             for my_element in self._ks_elements[name]:
                 if my_element in attrs:
                     self._keys[my_element] = attrs[my_element]
     elif name == "signalled":
         # PM-1109 grab the attributes we are interested in
         self._keys[name] = {}  #a dictionary indexed by attributes
         self._parsing_signalled = True
         self._keys[name]["action"] = ""  #grabbed later in char data
         for attr in attrs:
             if attr in self._ks_elements[name]:
                 self._keys[name][attr] = attrs[attr]
     elif name == "statcall":
         if "id" in attrs:
             if attrs["id"] == "stdout" and "stdout" in self._ks_elements:
                 self._parsing_stdout = True
             elif attrs["id"] == "stderr" and "stderr" in self._ks_elements:
                 self._parsing_stderr = True
             elif attrs["id"] == "final":
                 self._parsing_final_statcall = True
                 self._lfn = attrs["lfn"]
     elif name == "statinfo":
         if self._parsing_final_statcall is True:
             statinfo = FileMetadata()
             for my_element in self._ks_elements[name]:
                 if my_element in attrs:
                     statinfo.add_attribute(my_element, attrs[my_element])
             if "outputs" not in self._keys:
                 self._keys["outputs"] = {}  #a dictionary indexed by lfn
             lfn = self._lfn
             statinfo.set_id(lfn)
             if lfn is None or not statinfo:
                 logger.warning(
                     "Malformed/Empty stat record for output lfn %s %s" %
                     (lfn, statinfo))
             self._keys["outputs"][lfn] = statinfo
     elif name == "usage" and name in self._ks_elements:
         if self._parsing_job_element:
             # Special case for handling utime and stime, which need to be added
             for my_element in self._ks_elements[name]:
                 if my_element in attrs:
                     if my_element in self._keys:
                         try:
                             self._keys[my_element] = self._keys[
                                 my_element] + float(attrs[my_element])
                         except ValueError:
                             logger.warning(
                                 "cannot convert element %s to float!" %
                                 (my_element))
                     else:
                         try:
                             self._keys[my_element] = float(
                                 attrs[my_element])
                         except ValueError:
                             logger.warning(
                                 "cannot convert element %s to float!" %
                                 (my_element))
     else:
         # For all other elements, check if we want them
         if name in self._ks_elements:
             for my_element in self._ks_elements[name]:
                 if my_element in attrs:
                     self._keys[my_element] = attrs[my_element]
Ejemplo n.º 2
0
 def start_element(self, name, attrs):
     """
     Function called by the parser every time a new element starts
     """
     # Keep track if we are parsing the main job element
     if name == "mainjob":
         self._parsing_main_job = True
     if name == "machine":
         self._parsing_machine = True
     # Keep track if we are inside one of the job elements
     if (name == "setup" or name == "prejob" or
         name == "mainjob" or name == "postjob" or name == "cleanup"):
             self._parsing_job_element = True
     if name == "argument-vector" and name in self._ks_elements:
         # Start parsing arguments
         self._parsing_arguments = True
     elif name == "cwd" and name in self._ks_elements:
         # Start parsing cwd
         self._parsing_cwd = True
     elif name == "checksum" and name in self._ks_elements:
         # PM-1180 <checksum type="sha256" value="f2307670158c64c4407971f8fad67772724b0bad92bfb48f386b0814ba24e3af"/>
         self._keys[name] = {}
         for attr_name in self._ks_elements[name]:
             if attr_name in attrs:
                 self._keys[ name ] [attr_name] = attrs[attr_name]
     elif name == "data":
         # Start parsing data for stdout and stderr output
         self._parsing_data = True
     elif name == "file" and name in self._ks_elements:
         if self._parsing_main_job == True :
             # Special case for name inside the mainjob element (will change this later)
             for my_element in self._ks_elements[name]:
                 if my_element in attrs:
                     self._keys[my_element] = attrs[my_element]
     elif name == "ram" and name in self._ks_elements:
         if self._parsing_machine == True:
             # Special case for ram inside the machine element (will change this later)
             for my_element in self._ks_elements[name]:
                 if my_element in attrs:
                     self._keys[my_element] = attrs[my_element]
     elif name == "uname" and name in self._ks_elements:
         if self._parsing_machine == True:
             # Special case for uname inside the machine element (will change this later)
             for my_element in self._ks_elements[name]:
                 if my_element in attrs:
                     self._keys[my_element] = attrs[my_element]
     elif name == "signalled":
         # PM-1109 grab the attributes we are interested in
         self._keys[ name ] = {} #a dictionary indexed by attributes
         self._parsing_signalled = True
         self._keys[ name ]["action"] = "" #grabbed later in char data
         for attr in attrs:
             if attr in self._ks_elements[name]:
                 self._keys[name][attr] = attrs[attr]
     elif name == "statcall":
         if "id" in attrs:
             if attrs["id"] == "stdout" and "stdout" in self._ks_elements:
                 self._parsing_stdout = True
             elif attrs["id"] == "stderr" and "stderr" in self._ks_elements:
                 self._parsing_stderr = True
             elif attrs["id"] == "final" :
                 self._parsing_final_statcall = True
                 self._lfn = attrs["lfn"]
     elif name == "statinfo":
         if self._parsing_final_statcall is True:
             statinfo = FileMetadata()
             for my_element in self._ks_elements[name]:
                 if my_element in attrs:
                     statinfo.add_attribute( my_element, attrs[my_element])
             if "outputs" not in self._keys:
                 self._keys[ "outputs" ] = {} #a dictionary indexed by lfn
             lfn = self._lfn
             statinfo.set_id( lfn )
             if lfn is None or not statinfo:
                 logger.warning( "Malformed/Empty stat record for output lfn %s %s"  %(lfn, statinfo))
             self._keys["outputs"][lfn] = statinfo
     elif name == "usage" and name in self._ks_elements:
         if self._parsing_job_element:
             # Special case for handling utime and stime, which need to be added
             for my_element in self._ks_elements[name]:
                 if my_element in attrs:
                     if my_element in self._keys:
                         try:
                             self._keys[my_element] = self._keys[my_element] + float(attrs[my_element])
                         except ValueError:
                             logger.warning("cannot convert element %s to float!" % (my_element))
                     else:
                         try:
                             self._keys[my_element] = float(attrs[my_element])
                         except ValueError:
                             logger.warning("cannot convert element %s to float!" % (my_element))
     else:
         # For all other elements, check if we want them
         if name in self._ks_elements:
             for my_element in self._ks_elements[name]:
                 if my_element in attrs:
                     self._keys[my_element] = attrs[my_element]
Ejemplo n.º 3
0
    def map_yaml_to_ver2_format(self, data):
        """
        Maps from new yaml dict format to old v2 format we used with the xml records
        """
        # unmappable:
        #  "file": ["name"]

        # new format -> old format
        my_map = [[["hostname"], ["hostname"]], [["resource"], ["resource"]],
                  [["user"], ["user"]], [["hostaddr"], ["hostaddr"]],
                  [["transformation"], ["transformation"]],
                  [["derivation"], ["derivation"]],
                  [["mainjob", "duration"], ["duration"]],
                  [["mainjob", "start"], ["start"]],
                  [["usage", "utime"], ["utime"]],
                  [["usage", "stime"], ["stime"]],
                  [["machine", "ram_total"], ["ram"]],
                  [["machine", "uname_system"], ["system"]],
                  [["machine", "uname_release"], ["release"]],
                  [["machine", "uname_machine"], ["machine"]],
                  [["mainjob", "executable", "file_name"], ["name"]],
                  [["mainjob", "status", "raw"], ["raw"]],
                  [["mainjob", "status", "signalled_signal"], ["signal"]],
                  [["mainjob", "status", "signalled_name"], ["action"]],
                  [["mainjob", "status", "corefile"], ["corefile"]],
                  [["mainjob", "status", "regular_exitcode"], ["exitcode"]],
                  [["cwd"], ["cwd"]], [["files", "stdout", "data"],
                                       ["stdout"]],
                  [["files", "stderr", "data"], ["stderr"]]]

        #        stampede_elements = {"invocation": ["hostname", "resource", "user", "hostaddr", "transformation", "derivation"],
        #                             "mainjob": ["duration", "start"],
        #                             "usage": ["utime", "stime"],
        #                             "ram": ["total"],
        #                             "uname": ["system", "release", "machine"],
        #                             "file": ["name"],
        #                             "status": ["raw"],
        #                             "signalled": ["signal", "corefile", "action"], #action is the char data in signalled element
        #                             "regular": ["exitcode"],
        #                             "argument-vector": [],
        #                             "cwd": [],
        #                             "stdout": [],
        #                             "stderr": [],
        #                             "statinfo": ["lfn", "size", "ctime", "user" ],
        #                             "checksum": ["type", "value", "timing"],
        #                             "type": ["type", "value"]}

        new_data = {}
        new_data['invocation'] = True
        new_data["checksum"] = {}
        new_data["outputs"] = {}
        for mapping in my_map:
            self.dicts_remap(data, mapping[0], new_data, mapping[1])

        # some mappings are based on lfns
        if "files" in data:
            for lfn in data["files"]:
                file_data = data["files"][lfn]
                output = file_data["output"] if "output" in file_data.keys(
                ) else False
                if not output:
                    continue
                meta = FileMetadata()
                meta._id = lfn
                """
                add whatever 4.9 attributes are
                  {
                    "_type": "file", 
                    "_id": "f.b2", 
                    "_attributes": {
                      "ctime": "2019-02-19T16:42:52-08:00", 
                      "checksum.timing": "0.144", 
                      "user": "******", 
                      "checksum.type": "sha256", 
                      "checksum.value": "4a77bee20a28a446506ef7531ffc038053f52e5211d93a95fe5193746af8d23a", 
                      "size": "123"
                    }
                  }, 
                """
                if "user" in data["files"][lfn]:
                    meta.add_attribute("user", str(file_data["user"]))
                if "size" in data["files"][lfn]:
                    meta.add_attribute("size", str(file_data["size"]))
                if "ctime" in data["files"][lfn]:
                    meta.add_attribute("ctime", file_data["ctime"])
                if "sha256" in data["files"][lfn]:
                    meta.add_attribute("checksum.type", "sha256")
                    meta.add_attribute("checksum.value", file_data["sha256"])
                    if "checksum_timing" in data["files"][lfn]:
                        meta.add_attribute("checksum.timing",
                                           str(file_data["checksum_timing"]))
                # what else?

                new_data["outputs"][lfn] = meta

        return new_data
Ejemplo n.º 4
0
    def map_yaml_to_ver2_format(self, data):
        """
        Maps from new yaml dict format to old v2 format we used with the xml records
        """
        # unmappable:
        #  "file": ["name"]

        # new format -> old format
        my_map = [ [ ["hostname"],                               ["hostname"] ],
                   [ ["resource"],                               ["resource"] ],
                   [ ["user"],                                   ["user"] ],
                   [ ["hostaddr"],                               ["hostaddr"] ],
                   [ ["transformation"],                         ["transformation"] ],
                   [ ["derivation"],                             ["derivation"] ],
                   [ ["mainjob", "duration"],                    ["duration"] ] ,
                   [ ["mainjob", "start"],                       ["start"] ] ,
                   [ ["usage", "utime"],                         ["utime"] ] ,
                   [ ["usage", "stime"],                         ["stime"] ] ,
                   [ ["machine", "ram_total"],                   ["ram"] ] ,
                   [ ["machine", "uname_system"],                ["system"] ] ,
                   [ ["machine", "uname_release"],               ["release"] ] ,
                   [ ["machine", "uname_machine"],               ["machine"] ] ,
                   [ ["mainjob", "executable", "file_name"],     ["name"] ] ,
                   [ ["mainjob", "status", "raw"],               ["raw"] ] ,
                   [ ["mainjob", "status", "signalled_signal"],  ["signal"] ] ,
                   [ ["mainjob", "status", "signalled_name"],    ["action"] ] ,
                   [ ["mainjob", "status", "corefile"],          ["corefile"] ] ,
                   [ ["mainjob", "status", "regular_exitcode"],  ["exitcode"] ] ,
                   [ ["cwd"],                                    ["cwd"] ] ,
                   [ ["files", "stdout", "data"],                ["stdout"] ] ,
                   [ ["files", "stderr", "data"],                ["stderr"] ] ]


        #        stampede_elements = {"invocation": ["hostname", "resource", "user", "hostaddr", "transformation", "derivation"],
        #                             "mainjob": ["duration", "start"],
        #                             "usage": ["utime", "stime"],
        #                             "ram": ["total"],
        #                             "uname": ["system", "release", "machine"],
        #                             "file": ["name"],
        #                             "status": ["raw"],
        #                             "signalled": ["signal", "corefile", "action"], #action is the char data in signalled element
        #                             "regular": ["exitcode"],
        #                             "argument-vector": [],
        #                             "cwd": [],
        #                             "stdout": [],
        #                             "stderr": [],
        #                             "statinfo": ["lfn", "size", "ctime", "user" ],
        #                             "checksum": ["type", "value", "timing"],
        #                             "type": ["type", "value"]}

        new_data = {}
        new_data['invocation'] = True
        new_data["checksum"] = {}
        new_data["outputs"] = {}
        for mapping in my_map:
            self.dicts_remap(data, mapping[0], new_data, mapping[1])

        # some mappings are based on lfns
        if "files" in data:
            for lfn in data["files"]:
                file_data = data["files"][lfn]
                output = file_data["output"] if "output" in file_data.keys() else False
                if not output:
                    continue
                meta = FileMetadata()
                meta._id = lfn

                """
                add whatever 4.9 attributes are
                  {
                    "_type": "file", 
                    "_id": "f.b2", 
                    "_attributes": {
                      "ctime": "2019-02-19T16:42:52-08:00", 
                      "checksum.timing": "0.144", 
                      "user": "******", 
                      "checksum.type": "sha256", 
                      "checksum.value": "4a77bee20a28a446506ef7531ffc038053f52e5211d93a95fe5193746af8d23a", 
                      "size": "123"
                    }
                  }, 
                """
                if "user" in data["files"][lfn]:
                    meta.add_attribute("user",str(file_data["user"]))
                if "size" in data["files"][lfn]:
                    meta.add_attribute("size",str(file_data["size"]))
                if "ctime" in data["files"][lfn]:
                    meta.add_attribute("ctime", file_data["ctime"])
                if "sha256" in data["files"][lfn]:
                    meta.add_attribute("checksum.type", "sha256")
                    meta.add_attribute("checksum.value", file_data["sha256"])
                    if "checksum_timing" in data["files"][lfn]:
                        meta.add_attribute("checksum_timing", str(file_data["checksum_timing"]))
                # what else?

                new_data["outputs"][lfn] = meta

        return new_data