def loadFeature(self, name, file):
		print "* loading feature: " + str(file) 
		sys.stdout.flush()
		fileObj = open(file, "rb")
		tmp = pickle.load(fileObj)
		fileObj.close()

		numNone = 0
		numTotal = 0
		for docpair in tmp.keys():

			val = tmp[docpair]
			if val != None:
				if docpair in self.docPairsToRawEdges.keys():
						self.docPairsToRawEdges[docpair].addFeature(name, tmp[docpair])
				else:
					e = Edge(docpair)
					e.addFeature(name, tmp[docpair])
					self.docPairsToRawEdges[docpair] = e
					#self.edges.add(e)
			else:
				numNone = numNone + 1
			numTotal = numTotal + 1

		print "total: " + str(numTotal)
		print "none: " + str(numNone)
		sys.stdout.flush()
	def normalizeFeatures(self):
		
		# goes through each feature
		for feature in self.featureNames:
			print "----------\nfeature :" + str(feature)
			sys.stdout.flush()
			min = 99999
			max = -99999
			sum = 0
			
			# the sum of these two will total the size of self.docPairsToRawEdges
			missingValue = 0
			hadValue = 0

			values = []

			# goes through all docpairs
			for docpair in self.docPairsToRawEdges.keys():
				e = self.docPairsToRawEdges[docpair]

				# checks if the current docpair has the feature (it could be missing)
				if feature in e.features.keys():
					
					val = e.features[feature] # appends to the list of vals (bc we will randomly sample from this)
					values.append(val)
					sum = sum + val
					if val < min:
						min = val
					if val > max:
						max = val
					hadValue = hadValue + 1
				else:
					missingValue = missingValue + 1
			avg = float(sum) / float(hadValue)
			print "min: " + str(min)
			print "max: " + str(max)
			print "avg: " + str(avg)
			print str(missingValue) + " missing values, out of " + str(len(self.docPairsToRawEdges)) + " total (" + str(float(missingValue)/float(len(self.docPairsToRawEdges))) + ")" 
			sys.stdout.flush()
			denom = max - min

			# goes through all docpairs again, to:
			# - normalizes all values; and
			# - fills in missing values by randomly sampling (fills in for both raw and normalized values)
			for docpair in self.docPairsToRawEdges.keys():
				e = self.docPairsToRawEdges[docpair]

				# checks if the current docpair has the feature (it could be missing)
				if feature in e.features.keys():
					normVal = float(e.features[feature] - min) / float(denom)

					# a normalized Edge already exists for the given docpair
					if docpair in self.docPairsToNormEdges.keys():
						enorm = self.docPairsToNormEdges[docpair]
						enorm.addFeature(feature, normVal)
					else:
				 		enorm = Edge(docpair)
				 		enorm.addFeature(feature, normVal)
				 		self.docPairsToNormEdges[docpair] = enorm
				else: # this docpair edge doesn't have the feature, let's randomly pick a value
					randvalue = values[randint(0,len(values)-1)]
					normVal = float(randvalue - min) / float(denom)

					#print "rand value: " + str(randvalue)
					e.addFeature(feature, randvalue)

					# a normalized Edge already exists for the given docpair
					if docpair in self.docPairsToNormEdges.keys():
						enorm = self.docPairsToNormEdges[docpair]
						enorm.addFeature(feature, normVal)
					else:
				 		enorm = Edge(docpair)
				 		enorm.addFeature(feature, normVal)
				 		self.docPairsToNormEdges[docpair] = enorm
			print "done: "
			print "size of rawedges: " + str(len(self.docPairsToRawEdges))
			print "size of normedges: " + str(len(self.docPairsToNormEdges))
			sys.stdout.flush()