def __eq__(self, other): if self.tag != other.tag: return False if self.tag == "script": if self.src != 'None' and self.src == other.src: #print "DEBUG: euqal external scripts" return True elif self.src != 'None': #print "DEBUG: NOT equal external scripts" return False elif self.src == 'None' and self.src != other.src: #print "DEBUG: one external one internal scripts" return False else: # both src are None #print "DEBUG: compare two internal scripts" if 'json' in self.tp and 'json' in other.tp: try: #print "JSON: %s " %self.val obj1 = json.loads(self.val) keys1 = set(obj1.keys()) obj2 = json.loads(other.val) keys2 = set(obj2.keys()) intersect = keys1.intersection(keys2) if len(intersect) >= 2 and \ len(intersect) > len(keys1)/2: #print "RESULT: TRUE" return True else: #print "RESULT FALSE %d %d %d" %(len(intersect), len(keys1), len(keys2)) return False except Exception as e: print "failed parse JSON: %s " % str(e) return compare_two_string(self.val, other.val) if self.tag == "link": if self.href != 'None' and self.href == other.src: return True elif self.href != 'None': return False elif self.href == 'None' and self.href != other.href: return False else: self_data = "%s-%s" % (self.tp, self.data_reactid) other_data = "%s-%s" % (other.tp, other.data_reactid) return self_data == other_data self_data = "%s-%s" % (self.cls, self.val) other_data = "%s-%s" % (other.cls, other.val) return self_data == other_data
def __eq__(self, other): if self.tag != other.tag: return False if self.tag == "script": if self.src != 'None' and self.src == other.src: #print "DEBUG: euqal external scripts" return True elif self.src != 'None': #print "DEBUG: NOT equal external scripts" return False elif self.src == 'None' and self.src != other.src: #print "DEBUG: one external one internal scripts" return False else: # both src are None #print "DEBUG: compare two internal scripts" if 'json' in self.tp and 'json' in other.tp: try: #print "JSON: %s " %self.val obj1 = json.loads(self.val) keys1 = set(obj1.keys()) obj2 = json.loads(other.val) keys2 = set(obj2.keys()) intersect = keys1.intersection(keys2) if len(intersect) >= 2 and \ len(intersect) > len(keys1)/2: #print "RESULT: TRUE" return True else: #print "RESULT FALSE %d %d %d" %(len(intersect), len(keys1), len(keys2)) return False except Exception as e: print "failed parse JSON: %s " %str(e) return compare_two_string(self.val, other.val) if self.tag == "link": if self.href != 'None' and self.href == other.src: return True elif self.href != 'None': return False elif self.href == 'None' and self.href != other.href: return False else: self_data = "%s-%s" % (self.tp, self.data_reactid) other_data = "%s-%s" % (other.tp, other.data_reactid) return self_data == other_data self_data = "%s-%s" % (self.cls, self.val) other_data = "%s-%s" % (other.cls,other.val) return self_data == other_data
def mmdiffR(src_ld, dst_ld, D, \ src_script_hosts, src_script_contents, \ dst_script_hosts, dst_script_contents): M = len(src_ld) #[1 ... M-1] N = len(dst_ld) #[1 ... N-1] i = M - 1 j = N - 1 ins_scripts_hosts = set() del_scripts_hosts = set() ins_scripts_contents = set() del_scripts_contents = set() updated_scripts_count = 0 while i > 0 and j > 0: if (D[i][j] == D[i-1][j] + costDelete(src_ld[i])) and \ (j == N-1 or dst_ld[j+1].level <= src_ld[i].level ): print "DEL:", src_ld[i].tag if src_ld[i].tag == "script": if src_ld[i].src != 'None': del_scripts_hosts.add(src_ld[i].src) elif src_ld[i].val != "": del_scripts_contents.add(src_ld[i].val) try: print " DEL:%s" % src_ld[i].toString().replace('\n', '\t') except Exception as e: print " Error displaying contents: ", str(e) i = i - 1 elif (D[i][j] == D[i][j-1] + costInsert(dst_ld[j])) and \ (i == M-1 or src_ld[i+1].level <= dst_ld[j].level): print "INS:", dst_ld[j].tag if dst_ld[j].tag == "script": if dst_ld[j].src != 'None': ins_scripts_hosts.add(dst_ld[j].src) elif dst_ld[j].val != "": ins_scripts_contents.add(dst_ld[j].val) try: print " INS:%s" % dst_ld[j].toString().replace('\n', '\t') except Exception as e: print " Error displaying contents: ", str(e) j = j - 1 elif not src_ld[i] == dst_ld[j]: print "UPD: %s => %s " % (src_ld[i].tag, dst_ld[j].tag) if src_ld[i].tag == "script" and dst_ld[j].tag == "script": updated_scripts_count += 1 try: print " REPLACE SRC:", src_ld[i].toString().replace( '\n', '\t') print " DST:", dst_ld[j].toString().replace( '\n', '\t') print " RS:", compare_two_string( src_ld[i].val, dst_ld[j].val), "END" except Exception as e: print " Error displaying contents: ", str(e) i = i - 1 j = j - 1 else: i = i - 1 j = j - 1 while i > 0: print "DEL:", src_ld[i].tag i = i - 1 while j > 0: print "INS:", dst_ld[j].tag j = j - 1 #print "insert hosts:" #for item in ins_scripts_hosts: # print item #print "delete hosts" cost = 0 for item in del_scripts_hosts: if item in dst_script_hosts: cost += (SCRIPT_COST - 1) #print item for item in del_scripts_contents: for other in dst_script_contents: if compare_two_string(item, other): cost += (SCRIPT_COST - 1) break for item in ins_scripts_hosts: if item in src_script_hosts: cost += (SCRIPT_COST - 1) #print item for item in ins_scripts_contents: for other in src_script_contents: if compare_two_string(item, other): cost += (SCRIPT_COST - 1) break print "inserted hosts scripts: %d" % len(ins_scripts_hosts) print "deleted hosts scripts: %d" % len(del_scripts_hosts) print "inserted hosts contents: %d" % len(ins_scripts_contents) print "deleted hosts contents: %d" % len(del_scripts_contents) print "updated scripts: %d" % updated_scripts_count ''' print "insert contents:" for item in ins_scripts_contents: try: print item except Exception as e: print "Error: ",str(e) print "delete contentes:" for item in del_scripts_contents: try: print item except Exception as e: print "Error: ",str(e) ''' rs = float(D[M - 1][N - 1] - cost) norm = float(M + N) print "Final cost: %f[%f]" % (rs / norm, D[M - 1][N - 1] / norm) return rs / norm
def mmdiffR(src_ld, dst_ld, D, \ src_script_hosts, src_script_contents, \ dst_script_hosts, dst_script_contents): M = len(src_ld) #[1 ... M-1] N = len(dst_ld) #[1 ... N-1] i = M - 1 j = N - 1 ins_scripts_hosts = set() del_scripts_hosts = set() ins_scripts_contents = set() del_scripts_contents = set() updated_scripts_count = 0 while i >0 and j > 0: if (D[i][j] == D[i-1][j] + costDelete(src_ld[i])) and \ (j == N-1 or dst_ld[j+1].level <= src_ld[i].level ): print "DEL:",src_ld[i].tag if src_ld[i].tag == "script": if src_ld[i].src != 'None': del_scripts_hosts.add(src_ld[i].src) elif src_ld[i].val != "": del_scripts_contents.add(src_ld[i].val) try: print " DEL:%s" % src_ld[i].toString().replace('\n','\t') except Exception as e: print " Error displaying contents: ",str(e) i = i - 1 elif (D[i][j] == D[i][j-1] + costInsert(dst_ld[j])) and \ (i == M-1 or src_ld[i+1].level <= dst_ld[j].level): print "INS:",dst_ld[j].tag if dst_ld[j].tag == "script": if dst_ld[j].src != 'None': ins_scripts_hosts.add(dst_ld[j].src) elif dst_ld[j].val != "": ins_scripts_contents.add(dst_ld[j].val) try: print " INS:%s" % dst_ld[j].toString().replace('\n','\t') except Exception as e: print " Error displaying contents: ",str(e) j = j - 1 elif not src_ld[i] == dst_ld[j]: print "UPD: %s => %s " % (src_ld[i].tag, dst_ld[j].tag) if src_ld[i].tag == "script" and dst_ld[j].tag == "script": updated_scripts_count += 1 try: print " REPLACE SRC:",src_ld[i].toString().replace('\n','\t') print " DST:",dst_ld[j].toString().replace('\n','\t') print " RS:",compare_two_string(src_ld[i].val, dst_ld[j].val), "END" except Exception as e: print " Error displaying contents: ",str(e) i = i - 1 j = j - 1 else: i = i - 1 j = j - 1 while i > 0: print "DEL:",src_ld[i].tag i = i - 1 while j > 0: print "INS:",dst_ld[j].tag j = j - 1 #print "insert hosts:" #for item in ins_scripts_hosts: # print item #print "delete hosts" cost = 0 for item in del_scripts_hosts: if item in dst_script_hosts: cost += (SCRIPT_COST-1) #print item for item in del_scripts_contents: for other in dst_script_contents: if compare_two_string(item, other): cost += (SCRIPT_COST-1) break for item in ins_scripts_hosts: if item in src_script_hosts: cost += (SCRIPT_COST-1) #print item for item in ins_scripts_contents: for other in src_script_contents: if compare_two_string(item, other): cost += (SCRIPT_COST-1) break print "inserted hosts scripts: %d" % len(ins_scripts_hosts) print "deleted hosts scripts: %d" % len(del_scripts_hosts) print "inserted hosts contents: %d" % len(ins_scripts_contents) print "deleted hosts contents: %d" % len(del_scripts_contents) print "updated scripts: %d" % updated_scripts_count ''' print "insert contents:" for item in ins_scripts_contents: try: print item except Exception as e: print "Error: ",str(e) print "delete contentes:" for item in del_scripts_contents: try: print item except Exception as e: print "Error: ",str(e) ''' rs = float(D[M-1][N-1] - cost) norm = float(M + N) print "Final cost: %f[%f]" %(rs/norm, D[M-1][N-1]/norm) return rs/norm