예제 #1
0
    def __eq__(self, other):
        if self.tag != other.tag:
            return False

        if self.tag == "script":
            if self.src != 'None' and self.src == other.src:
                #print "DEBUG: euqal external scripts"
                return True
            elif self.src != 'None':
                #print "DEBUG: NOT equal external scripts"
                return False
            elif self.src == 'None' and self.src != other.src:
                #print "DEBUG: one external one internal scripts"
                return False
            else:  # both src are None
                #print "DEBUG: compare two internal scripts"
                if 'json' in self.tp and 'json' in other.tp:
                    try:
                        #print "JSON: %s " %self.val
                        obj1 = json.loads(self.val)
                        keys1 = set(obj1.keys())
                        obj2 = json.loads(other.val)
                        keys2 = set(obj2.keys())
                        intersect = keys1.intersection(keys2)
                        if len(intersect) >= 2 and \
                         len(intersect) > len(keys1)/2:
                            #print "RESULT: TRUE"
                            return True
                        else:
                            #print "RESULT FALSE %d %d %d" %(len(intersect), len(keys1), len(keys2))
                            return False
                    except Exception as e:
                        print "failed parse JSON: %s " % str(e)
                return compare_two_string(self.val, other.val)

        if self.tag == "link":
            if self.href != 'None' and self.href == other.src:
                return True
            elif self.href != 'None':
                return False
            elif self.href == 'None' and self.href != other.href:
                return False
            else:
                self_data = "%s-%s" % (self.tp, self.data_reactid)
                other_data = "%s-%s" % (other.tp, other.data_reactid)
                return self_data == other_data

        self_data = "%s-%s" % (self.cls, self.val)
        other_data = "%s-%s" % (other.cls, other.val)
        return self_data == other_data
예제 #2
0
	def __eq__(self, other): 
		if self.tag != other.tag:
			return False

		if self.tag == "script":
			if self.src != 'None' and self.src == other.src:
				#print "DEBUG: euqal external scripts"
				return True
			elif self.src != 'None':
				#print "DEBUG: NOT equal external scripts"
				return False
			elif self.src == 'None' and self.src != other.src:
				#print "DEBUG: one external one internal scripts"
				return False
			else: # both src are None
				#print "DEBUG: compare two internal scripts"
				if 'json' in self.tp and 'json' in other.tp:
					try:
						#print "JSON: %s " %self.val
						obj1 = json.loads(self.val)
						keys1 = set(obj1.keys())
						obj2 = json.loads(other.val)
						keys2 = set(obj2.keys())
						intersect = keys1.intersection(keys2)
						if len(intersect) >= 2 and \
							len(intersect) > len(keys1)/2:
							#print "RESULT: TRUE"
							return True
						else:
							#print "RESULT FALSE %d %d %d" %(len(intersect), len(keys1), len(keys2))
							return False
					except Exception as e:
						print "failed parse JSON: %s " %str(e)
				return compare_two_string(self.val, other.val)

		if self.tag == "link":
			if self.href != 'None' and self.href == other.src:
				return True
			elif self.href != 'None':
				return False
			elif self.href == 'None' and self.href != other.href:
				return False
			else:
				self_data = "%s-%s" % (self.tp, self.data_reactid)
				other_data = "%s-%s" % (other.tp, other.data_reactid)
				return self_data == other_data

		self_data = "%s-%s" % (self.cls, self.val)
		other_data = "%s-%s" % (other.cls,other.val)
		return self_data == other_data
예제 #3
0
def mmdiffR(src_ld, dst_ld, D, \
 src_script_hosts, src_script_contents, \
 dst_script_hosts, dst_script_contents):
    M = len(src_ld)  #[1 ... M-1]
    N = len(dst_ld)  #[1 ... N-1]
    i = M - 1
    j = N - 1
    ins_scripts_hosts = set()
    del_scripts_hosts = set()

    ins_scripts_contents = set()
    del_scripts_contents = set()
    updated_scripts_count = 0

    while i > 0 and j > 0:
        if (D[i][j] == D[i-1][j] + costDelete(src_ld[i])) and \
         (j == N-1 or dst_ld[j+1].level <= src_ld[i].level ):
            print "DEL:", src_ld[i].tag
            if src_ld[i].tag == "script":
                if src_ld[i].src != 'None':
                    del_scripts_hosts.add(src_ld[i].src)
                elif src_ld[i].val != "":
                    del_scripts_contents.add(src_ld[i].val)
                try:
                    print "  DEL:%s" % src_ld[i].toString().replace('\n', '\t')
                except Exception as e:
                    print "  Error displaying contents: ", str(e)
            i = i - 1
        elif (D[i][j] == D[i][j-1] + costInsert(dst_ld[j])) and \
         (i == M-1 or src_ld[i+1].level <= dst_ld[j].level):
            print "INS:", dst_ld[j].tag
            if dst_ld[j].tag == "script":
                if dst_ld[j].src != 'None':
                    ins_scripts_hosts.add(dst_ld[j].src)
                elif dst_ld[j].val != "":
                    ins_scripts_contents.add(dst_ld[j].val)
                try:
                    print "  INS:%s" % dst_ld[j].toString().replace('\n', '\t')
                except Exception as e:
                    print "  Error displaying contents: ", str(e)
            j = j - 1
        elif not src_ld[i] == dst_ld[j]:
            print "UPD: %s => %s " % (src_ld[i].tag, dst_ld[j].tag)
            if src_ld[i].tag == "script" and dst_ld[j].tag == "script":
                updated_scripts_count += 1
                try:
                    print "  REPLACE SRC:", src_ld[i].toString().replace(
                        '\n', '\t')
                    print "          DST:", dst_ld[j].toString().replace(
                        '\n', '\t')
                    print "           RS:", compare_two_string(
                        src_ld[i].val, dst_ld[j].val), "END"
                except Exception as e:
                    print "  Error displaying contents: ", str(e)
            i = i - 1
            j = j - 1
        else:
            i = i - 1
            j = j - 1

    while i > 0:
        print "DEL:", src_ld[i].tag
        i = i - 1

    while j > 0:
        print "INS:", dst_ld[j].tag
        j = j - 1

    #print "insert hosts:"
    #for item in ins_scripts_hosts:
    #	print item
    #print "delete hosts"
    cost = 0
    for item in del_scripts_hosts:
        if item in dst_script_hosts:
            cost += (SCRIPT_COST - 1)
        #print item
    for item in del_scripts_contents:
        for other in dst_script_contents:
            if compare_two_string(item, other):
                cost += (SCRIPT_COST - 1)
                break
    for item in ins_scripts_hosts:
        if item in src_script_hosts:
            cost += (SCRIPT_COST - 1)
        #print item
    for item in ins_scripts_contents:
        for other in src_script_contents:
            if compare_two_string(item, other):
                cost += (SCRIPT_COST - 1)
                break

    print "inserted hosts scripts: %d" % len(ins_scripts_hosts)
    print "deleted hosts scripts: %d" % len(del_scripts_hosts)
    print "inserted hosts contents: %d" % len(ins_scripts_contents)
    print "deleted hosts contents: %d" % len(del_scripts_contents)
    print "updated scripts: %d" % updated_scripts_count
    '''
	print "insert contents:"
	for item in ins_scripts_contents:
		try:
			print item
		except Exception as e:
			print "Error: ",str(e)
	print "delete contentes:"
	for item in del_scripts_contents:
		try:
			print item
		except Exception as e:
			print "Error: ",str(e)
	'''
    rs = float(D[M - 1][N - 1] - cost)
    norm = float(M + N)
    print "Final cost: %f[%f]" % (rs / norm, D[M - 1][N - 1] / norm)
    return rs / norm
예제 #4
0
def mmdiffR(src_ld, dst_ld, D, \
	src_script_hosts, src_script_contents, \
	dst_script_hosts, dst_script_contents):
	M = len(src_ld) #[1 ... M-1]
	N = len(dst_ld) #[1 ... N-1]
	i = M - 1
	j = N - 1
	ins_scripts_hosts = set()
	del_scripts_hosts = set()

	ins_scripts_contents = set()
	del_scripts_contents = set()
	updated_scripts_count = 0

	while i >0 and j > 0:
		if (D[i][j] == D[i-1][j] + costDelete(src_ld[i])) and \
			(j == N-1 or dst_ld[j+1].level <= src_ld[i].level ):
			print "DEL:",src_ld[i].tag
			if src_ld[i].tag == "script":
				if src_ld[i].src != 'None':
					del_scripts_hosts.add(src_ld[i].src)
				elif src_ld[i].val != "":
					del_scripts_contents.add(src_ld[i].val)
				try:
					print "  DEL:%s" % src_ld[i].toString().replace('\n','\t')
				except Exception as e:
					print "  Error displaying contents: ",str(e)
			i = i - 1
		elif (D[i][j] == D[i][j-1] + costInsert(dst_ld[j])) and \
			(i == M-1 or src_ld[i+1].level <= dst_ld[j].level):
			print "INS:",dst_ld[j].tag
			if dst_ld[j].tag == "script":
				if dst_ld[j].src != 'None':
					ins_scripts_hosts.add(dst_ld[j].src)
				elif dst_ld[j].val != "":
					ins_scripts_contents.add(dst_ld[j].val)
				try:	
					print "  INS:%s" % dst_ld[j].toString().replace('\n','\t')
				except Exception as e:
					print "  Error displaying contents: ",str(e)
			j = j - 1
		elif not src_ld[i] == dst_ld[j]:
			print "UPD: %s => %s " % (src_ld[i].tag, dst_ld[j].tag)
			if src_ld[i].tag == "script" and dst_ld[j].tag == "script":
				updated_scripts_count += 1
				try:
					print "  REPLACE SRC:",src_ld[i].toString().replace('\n','\t')
					print "          DST:",dst_ld[j].toString().replace('\n','\t')
					print "           RS:",compare_two_string(src_ld[i].val, dst_ld[j].val), "END"
				except Exception as e:
					print "  Error displaying contents: ",str(e)
			i = i - 1
			j = j - 1
		else:
			i = i - 1
			j = j - 1

	while i > 0:
		print "DEL:",src_ld[i].tag
		i = i - 1

	while j > 0:
		print "INS:",dst_ld[j].tag
		j = j - 1

	
	#print "insert hosts:"
	#for item in ins_scripts_hosts:
	#	print item
	#print "delete hosts"
	cost = 0
	for item in del_scripts_hosts:
		if item in dst_script_hosts:
			cost += (SCRIPT_COST-1)
		#print item
	for item in del_scripts_contents:
		for other in dst_script_contents:
			if compare_two_string(item, other):
				cost += (SCRIPT_COST-1)
				break
	for item in ins_scripts_hosts:
		if item in src_script_hosts:
			cost += (SCRIPT_COST-1)
		#print item
	for item in ins_scripts_contents:
		for other in src_script_contents:
			if compare_two_string(item, other):
				cost += (SCRIPT_COST-1)
				break

	print "inserted hosts scripts: %d" % len(ins_scripts_hosts)
	print "deleted hosts scripts: %d" % len(del_scripts_hosts)
	print "inserted hosts contents: %d" % len(ins_scripts_contents)
	print "deleted hosts contents: %d" % len(del_scripts_contents)
	print "updated scripts: %d" % updated_scripts_count

	'''
	print "insert contents:"
	for item in ins_scripts_contents:
		try:
			print item
		except Exception as e:
			print "Error: ",str(e)
	print "delete contentes:"
	for item in del_scripts_contents:
		try:
			print item
		except Exception as e:
			print "Error: ",str(e)
	'''
	rs = float(D[M-1][N-1] - cost)
	norm = float(M + N)
	print "Final cost: %f[%f]" %(rs/norm, D[M-1][N-1]/norm)
	return rs/norm