Python md5_hash_of_file Examples, regression_utils.md5_hash_of_file Python Examples

Example #1

0

Show file

File: regression_local_monitor.py Project: Thomas00010111/EMOD

 def compareOtherOutputs( self, report_name, ref_path, test_path, failures ):
     ref_md5 = ru.md5_hash_of_file( ref_path )
     test_md5 = ru.md5_hash_of_file( test_path )
     if ref_md5 == test_md5:
         # print( "CSV files passed MD5 comparison test." )
         return False, ""
     else:
         print( self.config_id + " completed but did not match reference! (" + str(self.duration) + ") - " + report_name )
         return True, "Failes MD5 check."

Example #2

0

Show file

File: regression_local_monitor.py Project: clorton/EMOD

 def compareOtherOutputs( self, report_name, ref_path, test_path, failures ):
     ref_md5 = ru.md5_hash_of_file( ref_path )
     test_md5 = ru.md5_hash_of_file( test_path )
     if ref_md5 == test_md5:
         # print( "CSV files passed MD5 comparison test." )
         return False, ""
     else:
         print( self.scenario_path + " completed but did not match reference! (" + str(self.duration) + ") - " + report_name )
         return True, "Failes MD5 check."

Example #3

0

Show file

File: regression_local_monitor.py Project: waternk/EMOD

    def compareCsvOutputs(self, ref_path, test_path, failures):
        # print( "Comparing CSV files: ref = " + ref_path + ", test = " + test_path )
        # Do Md5 comp first.
        ref_md5 = ru.md5_hash_of_file(ref_path)
        test_md5 = ru.md5_hash_of_file(test_path)
        if ref_md5 == test_md5:
            # print( "CSV files passed MD5 comparison test." )
            return False, ""

        fail_validation = False
        err_msg = ""

        # print( "CSV files failed MD5 comparison test." )
        # First (md5) test failed. Do line length, then line-by-line
        ref_length = ru.file_len(ref_path)
        test_length = ru.file_len(test_path)
        if ref_length != test_length:
            fail_validation = True
            err_msg = "Reference output {0} has {1} lines but test output {2} has {3} lines".format(
                ref_path, ref_length, test_path, test_length)

        else:
            with open(ref_path, "r") as ref_file, open(test_path,
                                                       "r") as test_file:
                line_num = 0
                for ref_line in ref_file:
                    line_num = line_num + 1
                    test_line = test_file.readline()
                    if ref_line != test_line:
                        ref_line_tokens = ref_line.split(',')
                        test_line_tokens = test_line.split(',')
                        for col_idx in range(len(ref_line_tokens)):
                            if ref_line_tokens[col_idx] != test_line_tokens[
                                    col_idx]:
                                break
                        err_msg = "First mismatch at line {0} of {1} column {2}: reference line...\n{3}vs test line...\n{4}{5} vs {6}".format(
                            line_num, ref_path, col_idx, ref_line, test_line,
                            ref_line_tokens[col_idx],
                            test_line_tokens[col_idx])
                        fail_validation = True
                        break

        print(err_msg)
        failure_txt = err_msg
        #self.report.addFailingTest( self.scenario_path, failure_txt, test_path, self.scenario_type )
        return fail_validation, failure_txt

Example #4

0

Show file

File: regression_runner.py Project: clorton/EMOD

    def hash_for_file(self, filename):

        if filename not in self.known_files:
            self.log("Calculating hash for '{0}'.".format(filename))
            md5_hash = ru.md5_hash_of_file(filename)
            self.known_files[filename] = md5_hash
        else:
            self.log("Found hash for '{0}' in known files.".format(filename))

        return self.known_files[filename]

Example #5

0

Show file

File: regression_runner.py Project: DLukacevic-IDM/EMOD

    def hash_for_file(self, filename):

        if filename not in self.known_files:
            self.log("Calculating hash for '{0}'.".format(filename))
            md5_hash = ru.md5_hash_of_file(filename)
            self.known_files[filename] = md5_hash
        else:
            self.log("Found hash for '{0}' in known files.".format(filename))

        return self.known_files[filename]

Example #6

0

Show file

File: regression_local_monitor.py Project: clorton/EMOD

    def compareCsvOutputs( self, ref_path, test_path, failures ):
        # print( "Comparing CSV files: ref = " + ref_path + ", test = " + test_path )
        # Do Md5 comp first.
        ref_md5 = ru.md5_hash_of_file( ref_path )
        test_md5 = ru.md5_hash_of_file( test_path )
        if ref_md5 == test_md5:
            # print( "CSV files passed MD5 comparison test." )
            return False, ""

        fail_validation = False
        err_msg = ""

        # print( "CSV files failed MD5 comparison test." )
        # First (md5) test failed. Do line length, then line-by-line
        ref_length = ru.file_len( ref_path )
        test_length = ru.file_len( test_path )
        if ref_length != test_length:
            fail_validation = True
            err_msg = "Reference output {0} has {1} lines but test output {2} has {3} lines".format( ref_path, ref_length, test_path, test_length )

        else:
            ref_file = open( ref_path )
            test_file = open( test_path )
            line_num = 0
            for ref_line in ref_file:
                line_num = line_num + 1
                test_line = test_file.readline()
                if ref_line != test_line:
                    ref_line_tokens = ref_line.split(',')
                    test_line_tokens = test_line.split(',')
                    for col_idx in range( len( ref_line_tokens) ):
                        if ref_line_tokens[col_idx] != test_line_tokens[col_idx]:
                            break
                    err_msg = "First mismatch at line {0} of {1} column {2}: reference line...\n{3}vs test line...\n{4}{5} vs {6}".format( line_num, ref_path, col_idx, ref_line, test_line, ref_line_tokens[col_idx], test_line_tokens[col_idx] ) 
                    fail_validation = True
                    ref_file.close()
                    test_file.close()
                    break

        print( err_msg )
        failure_txt = err_msg
        #self.report.addFailingTest( self.scenario_path, failure_txt, test_path, self.scenario_type )
        return fail_validation, failure_txt

Example #7

0

Show file

 def __init__(self, params):
     self.params = params
     self.dtk_hash = ru.md5_hash_of_file(self.params.executable_path)
     self.sim_dir_sem = threading.Semaphore()
     self.emodules_map["interventions"] = []
     self.emodules_map["disease_plugins"] = []
     self.emodules_map["reporter_plugins"] = []
     if params.dll_root is not None and params.use_dlls is True:
         #print( "dll_root (remote) = " + params.dll_root )
         self.copyEModulesOver(params)
     else:
         print("Not using DLLs")

Example #8

0

Show file

File: regression_runner.py Project: clorton/EMOD

    def __init__(self, params):
        self.params = params
        self.dtk_hash = ru.md5_hash_of_file(self.params.executable_path)
        self.sim_dir_sem = threading.Semaphore()
        self.emodules_map["interventions"] = []
        self.emodules_map["disease_plugins"] = []
        self.emodules_map["reporter_plugins"] = []
        if params.dll_root is not None and params.use_dlls is True:
            # print( "dll_root (remote) = " + params.dll_root )
            self.copyEModulesOver(params)
        else:
            print("Not using DLLs")

        return

Example #9

0

Show file

    def copyEModulesOver(self, params):

        print "src_root = " + params.src_root

        if params.dll_path is not None:
            emodule_dir = params.dll_path
        else:
            if params.scons:
                emodule_dir = os.path.join(params.src_root, "build")
                emodule_dir = os.path.join(emodule_dir, "x64")
            else:
                emodule_dir = os.path.join(params.src_root, "x64")
            if params.debug == True:
                emodule_dir = os.path.join(emodule_dir, "Debug")
            elif params.quick_start == True:
                emodule_dir = os.path.join(emodule_dir, "QuickStart")
            else:
                emodule_dir = os.path.join(emodule_dir, "Release")

        print('Assuming emodules (dlls) are in local directory: ' +
              emodule_dir)

        if os.path.exists(emodule_dir) == False:
            print(
                "Except that directory does not exist!  Not copying emodules.")
            return

        #print "dll_root = " + params.dll_root

        dll_dirs = ["disease_plugins", "reporter_plugins", "interventions"]

        for dll_subdir in dll_dirs:
            dlls = glob.glob(
                os.path.join(os.path.join(emodule_dir, dll_subdir), "*.dll"))
            for dll in dlls:
                dll_hash = ru.md5_hash_of_file(dll)
                #print( dll_hash )
                # 1) calc md5 of dll
                # 2) check for existence of rivendell (or whatever) for <root>/emodules/<subdir>/<md5>
                # 3) if no exist, create and copy
                # 4) put full path in emodules_json
                # 5) write out emodules_json when done to target sim dir
                try:
                    target_dir = os.path.join(params.dll_root, dll_subdir)
                    target_dir = os.path.join(target_dir, dll_hash)

                    if params.sec:
                        print(dll +
                              " will be used without checking 'new-ness'.")
                    elif not (os.path.isdir(target_dir)):
                        print(dll + ": copying to cluster")
                    else:
                        print(dll + ": Already on cluster")

                    if not (
                            os.path.isdir(target_dir)
                    ) and params.sec == False:  # sec = command-line option to skip this
                        os.makedirs(target_dir)
                        shutil.copy(
                            dll, os.path.join(target_dir,
                                              os.path.basename(dll)))

                    self.emodules_map[dll_subdir].append(
                        os.path.join(target_dir, os.path.basename(dll)))

                except IOError:
                    print "Failed to copy dll " + dll + " to " + os.path.join(
                        os.path.join(params.dll_root, dll_dirs[1]),
                        os.path.basename(dll))
                    ru.final_warnings += "Failed to copy dll " + dll + " to " + os.path.join(
                        os.path.join(params.dll_root, dll_dirs[1]),
                        os.path.basename(dll)) + "\n"

Example #10

0

Show file

File: regression_runner.py Project: clorton/EMOD

    def copyEModulesOver(self, params):

        print "src_root = " + params.src_root

        if params.dll_path is not None:
            emodule_dir = params.dll_path
        else:
            if params.scons:
                emodule_dir = os.path.join(params.src_root, "build")
                emodule_dir = os.path.join(emodule_dir, "x64")
            else:
                emodule_dir = os.path.join(params.src_root, "x64")
            if params.debug:
                emodule_dir = os.path.join(emodule_dir, "Debug")
            elif params.quick_start:
                emodule_dir = os.path.join(emodule_dir, "QuickStart")
            else:
                emodule_dir = os.path.join(emodule_dir, "Release")

        print('Assuming emodules (dlls) are in local directory: ' + emodule_dir)

        if not os.path.exists(emodule_dir):
            print("Except that directory does not exist!  Not copying emodules.")
            return

        # print "dll_root = " + params.dll_root

        dll_dirs = ["disease_plugins",  "reporter_plugins", "interventions"]

        for dll_subdir in dll_dirs:
            dlls = glob.glob(os.path.join(os.path.join(emodule_dir, dll_subdir), "*.dll"))
            for dll in dlls:
                dll_hash = ru.md5_hash_of_file(dll)
                # print( dll_hash )
                # 1) calc md5 of dll
                # 2) check for existence of rivendell (or whatever) for <root>/emodules/<subdir>/<md5>
                # 3) if no exist, create and copy
                # 4) put full path in emodules_json
                # 5) write out emodules_json when done to target sim dir
                try:
                    target_dir = os.path.join(params.dll_root, dll_subdir)
                    target_dir = os.path.join(target_dir, dll_hash)

                    if params.sec:
                        print(dll + " will be used without checking 'new-ness'.")
                    elif not os.path.isdir(target_dir):
                        print(dll + ": copying to cluster")
                    else:
                        print(dll + ": Already on cluster")

                    if not os.path.isdir(target_dir) and not params.sec:   # sec = command-line option to skip this
                        os.makedirs(target_dir)
                        shutil.copy(dll, os.path.join(target_dir, os.path.basename(dll)))

                    self.emodules_map[dll_subdir].append(os.path.join(target_dir, os.path.basename(dll)))

                except IOError:
                    print "Failed to copy dll " + dll + " to " + os.path.join(os.path.join(params.dll_root, dll_dirs[1]), os.path.basename(dll))
                    ru.final_warnings += "Failed to copy dll " + dll + " to " + os.path.join(os.path.join(params.dll_root, dll_dirs[1]), os.path.basename(dll)) + "\n"

        return

Example #11

0

Show file

File: regression_local_monitor.py Project: waternk/EMOD

    def compareJsonOutputs(self, sim_dir, report_name, ref_path, test_path,
                           failures):
        fail_validation = False
        failure_txt = ""

        try:
            ru.load_json(os.path.join(ru.cache_cwd, ref_path))
        except Exception:
            print("Exception {0} {1} loading json file: {2}.".format(
                sys.exc_info()[0],
                sys.exc_info()[1], (os.path.join(ru.cache_cwd, ref_path))))
            return

        ref_json = ru.load_json(os.path.join(sim_dir, ref_path))

        if "Channels" not in ref_json.keys():
            ref_md5 = ru.md5_hash_of_file(ref_path)
            test_md5 = ru.md5_hash_of_file(test_path)
            if ref_md5 == test_md5:
                return False, ""
            else:
                print(self.scenario_path +
                      " completed but did not match reference! (" +
                      str(self.duration) + ") - " + report_name)
                return True, "Non-Channel JSON failed MD5."
        else:
            test_json = ru.load_json(os.path.join(sim_dir, test_path))

            if "Channels" not in test_json.keys():
                return True, "Reference has Channel data and Test file does not."

            ref_md5 = self.get_json_data_hash(ref_json["Channels"])
            test_md5 = self.get_json_data_hash(test_json["Channels"])

            ref_channels = set(ref_json["Channels"])
            test_channels = set(test_json["Channels"])

            if ref_md5 == test_md5:
                return False, ""

            missing_channels = ref_channels - test_channels
            new_channels = test_channels - ref_channels

            if len(missing_channels) > 0:
                fail_validation = True
                print("ERROR: Missing channels - " +
                      ', '.join(missing_channels))
                failure_txt += "Missing channels:\n" + '\n'.join(
                    missing_channels) + "\n"
                self.report.addFailingTest(
                    self.scenario_path, failure_txt,
                    os.path.join(sim_dir, ("output/" + report_name)),
                    self.scenario_type)

            if len(new_channels) > 0:
                print(
                    "WARNING: The test " + report_name + " has " +
                    str(len(new_channels)) +
                    " channels not found in the reference.  Please update the reference "
                    + report_name + ".")
                ru.final_warnings += self.scenario_path + " - New channels not found in reference:\n  " + '\n  '.join(
                    new_channels
                ) + "\nPlease update reference from " + os.path.join(
                    sim_dir, os.path.join("output", "InsetChart.json")) + "!\n"
                self.report.addFailingTest(
                    self.scenario_path, failure_txt,
                    os.path.join(sim_dir, ("output/" + report_name)),
                    self.scenario_type)

            if "Header" in ref_json.keys() and ref_json["Header"][
                    "Timesteps"] != test_json["Header"]["Timesteps"]:
                warning_msg = "WARNING: test " + report_name + " has timesteps " + str(
                    test_json["Header"]["Timesteps"]
                ) + " DIFFERRING from ref " + report_name + " timesteps " + str(
                    ref_json["Header"]["Timesteps"]) + "!\n"
                if self.params.hide_graphs:
                    # This is treated as automated running mode (or bamboo nightly build mode)
                    fail_validation = True
                    failure_txt += warning_msg
                else:
                    # This is treated as manual running mode
                    ru.final_warnings += warning_msg
                    print(warning_msg)

            if not fail_validation:
                #print( "Hasn't failed validation on second level review. Time to look channel by channel, timestep by timestep." )
                # BinnedReport and its derived classes have "Subchannel_Metadata" in the header
                if "Header" in ref_json.keys(
                ) and "Subchannel_Metadata" in ref_json["Header"].keys():
                    self.compareBinnedReportType(ref_json, test_json, failures)
                elif "Header" in ref_json.keys(
                ) and "Report_Type" in ref_json["Header"].keys(
                ) and ref_json["Header"]["Report_Type"] == "InsetChart":
                    # Assuming a BaseChannelReport
                    self.compareChannelReportType(ref_json, test_json,
                                                  failures)
                else:
                    fail_validation = True
                    failures.append(
                        report_name +
                        " - Files are different but cannot do deep dive.")

            if len(failures) > 0:
                fail_validation = True
                failure_txt += "Channel Timestep Reference_Value Test_Value\n" + ''.join(
                    failures)
                print(self.scenario_path +
                      " completed but did not match reference! (" +
                      str(self.duration) + ") - " + report_name)

        return fail_validation, failure_txt

Example #12

0

Show file

File: regression_local_monitor.py Project: clorton/EMOD

    def compareJsonOutputs( self, sim_dir, report_name, ref_path, test_path, failures ):
        fail_validation = False
        failure_txt = ""

        try:
            json.loads( open( os.path.join( ru.cache_cwd, ref_path ) ).read() )
        except Exception as ex:
            print( "Exception {0} loading json file: {1}.".format( ex, ( os.path.join( ru.cache_cwd, ref_path ) ) ) )
            return

        ref_json = json.loads( open( os.path.join( ru.cache_cwd, ref_path ) ).read() )
        if "Channels" not in ref_json.keys():
            ref_md5  = ru.md5_hash_of_file( ref_path )
            test_md5 = ru.md5_hash_of_file( test_path )
            if ref_md5 == test_md5:
                return False, ""
            else:
                print( self.scenario_path + " completed but did not match reference! (" + str(self.duration) + ") - " + report_name )
                return True, "Non-Channel JSON failed MD5."
        else:
            test_json = json.loads( open( os.path.join( sim_dir, test_path ) ).read() )

            if "Channels" not in test_json.keys():
                return True, "Reference has Channel data and Test file does not."

            ref_md5  = self.get_json_data_hash( ref_json["Channels"] )
            test_md5 = self.get_json_data_hash( test_json["Channels"] )

            ref_channels = set(ref_json["Channels"])
            test_channels = set(test_json["Channels"])

            if ref_md5 == test_md5:
                return False, ""

            missing_channels = ref_channels - test_channels
            new_channels = test_channels - ref_channels

            if len(missing_channels) > 0:
                fail_validation = True
                print("ERROR: Missing channels - " + ', '.join(missing_channels))
                failure_txt += "Missing channels:\n" + '\n'.join(missing_channels) + "\n"
                self.report.addFailingTest( self.scenario_path, failure_txt, os.path.join( sim_dir, ( "output/" + report_name ) ), self.scenario_type )

            if len(new_channels) > 0:
                print("WARNING: The test "+report_name+" has " + str(len(new_channels)) + " channels not found in the reference.  Please update the reference "+report_name+".")
                ru.final_warnings += self.scenario_path + " - New channels not found in reference:\n  " + '\n  '.join(new_channels) + "\nPlease update reference from " + os.path.join( sim_dir, os.path.join( "output", "InsetChart.json" ) ) + "!\n"
                self.report.addFailingTest( self.scenario_path, failure_txt, os.path.join( sim_dir, ( "output/" + report_name ) ), self.scenario_type )

            if "Header" in ref_json.keys() and ref_json["Header"]["Timesteps"] != test_json["Header"]["Timesteps"]:
                warning_msg = "WARNING: test "+report_name+" has timesteps " + str(test_json["Header"]["Timesteps"])  + " DIFFERRING from ref "+report_name+" timesteps " + str(ref_json["Header"]["Timesteps"]) + "!\n"
                if self.params.hide_graphs:
                    # This is treated as automated running mode (or bamboo nightly build mode)
                    fail_validation = True
                    failure_txt += warning_msg
                else:
                    # This is treated as manual running mode
                    ru.final_warnings += warning_msg
                    print(warning_msg)

            if not fail_validation:
                #print( "Hasn't failed validation on second level review. Time to look channel by channel, timestep by timestep." )
                # BinnedReport and its derived classes have "Subchannel_Metadata" in the header
                if "Header" in ref_json.keys() and "Subchannel_Metadata" in ref_json["Header"].keys():
                    self.compareBinnedReportType( ref_json, test_json, failures )
                elif "Header" in ref_json.keys() and "Report_Type" in ref_json["Header"].keys() and ref_json["Header"]["Report_Type"] =="InsetChart":
                    # Assuming a BaseChannelReport
                    self.compareChannelReportType( ref_json, test_json, failures )
                else:
                    fail_validation = True
                    failures.append(report_name + " - Files are different but cannot do deep dive.")

            if len(failures) > 0:
                fail_validation = True
                failure_txt += "Channel Timestep Reference_Value Test_Value\n" + ''.join(failures)
                print( self.scenario_path + " completed but did not match reference! (" + str(self.duration) + ") - " + report_name )

        return fail_validation, failure_txt