def computeBiggestCoordinatesFromResults( self, resCandidates ):
		''' Given all the results, compute the biggest group and return a random coordinates inside 
		'''
		# results e.g. [ [5.0, [lat,lon], "tags"] ]
		newResCandidates = ResCandidates()
		coordList = []
		for res in resCandidates.getList():
			# get coordinates in string format: for [computeBiggestGroupOfCoordinates]
			strCoo = res.getCoordStr()
			coordList.append( strCoo )
#			coo = res.getCoord()	# for [computeYaelKmeansOfCoordinates]
#			coordList.append( coo )
		# Print input coordinates list
		if self.verbose:
			print >> sys.stderr, ""
			print >> sys.stderr, "computeBiggestCoordinatesFromResults] coordList:", coordList
		# Compute the biggest group
		centerCoo, sumOfD = self.computeBiggestGroupOfCoordinates( coordList )
#		centerCoo = self.computeYaelKmeansOfCoordinates( coordList )
		# find the relative "result" with all the info
		if len(centerCoo) > 0:
			for res in resCandidates.getList():
				coord = res.getCoord()
				if coord == centerCoo:
					newResCandidates.addObj( res )
					return newResCandidates
		# otherwise something went wrong
		newResCandidates.add( 0.0, self.defaultCoordinates, "no results" )
		return newResCandidates
 def getContentVideoResults(self, filename):
     resCandidates = ResCandidates()
     if self.FnRes.has_key(filename):
         # get list of coordiantes
         listOfCoo = self.FnRes[filename]
         # Select the best coo among the ones in the listOfCoo
         coo, sumOfD = self.identifyCloserCoordinatesInTuple(listOfCoo)
         # Add the best coordinattes in the result
         resCandidates.add(sumOfD, coo, "_byContentMatching")
     return resCandidates
	def getContentVideoResults(self, filename):
		resCandidates = ResCandidates()
		if self.FnRes.has_key( filename ):
			# get list of coordiantes
			listOfCoo = self.FnRes[ filename ]
			# Select the best coo among the ones in the listOfCoo
			coo, sumOfD = self.identifyCloserCoordinatesInTuple( listOfCoo )
			# Add the best coordinattes in the result
			resCandidates.add( sumOfD, coo, "_byContentMatching" )
		return resCandidates
    def getUserMostProbableLocation(self, uId):
        ''' Return the most probable location of the userId Given
		'''
        resCandidates = ResCandidates()
        if not self.UserPlaces.has_key(uId):
            return resCandidates
        # check the coordinates with highest frequency
        maxFreq = 0
        bestCoord = self.defaultCoordinates
        for coordFreq in self.UserPlaces[uId]:
            lat = coordFreq[0]
            lon = coordFreq[1]
            acc = coordFreq[2]
            freq = coordFreq[3]
            if max(maxFreq, freq) == freq:
                bestCoord = [lat, lon]
        if bestCoord != self.defaultCoordinates:
            resCandidates.add(-1, bestCoord, uId + "_userPlaces")
#			resCandidates = [[ -1, bestCoord, uId+"_userPlaces" ]]
        return resCandidates
	def getUserMostProbableLocation( self, uId ):
		''' Return the most probable location of the userId Given
		'''
		resCandidates = ResCandidates()
		if not self.UserPlaces.has_key(uId):
			return resCandidates
		# check the coordinates with highest frequency
		maxFreq = 0
		bestCoord = self.defaultCoordinates
		for coordFreq in self.UserPlaces[uId]:
			lat = coordFreq[0]
			lon = coordFreq[1]
			acc = coordFreq[2]
			freq = coordFreq[3]
			if max(maxFreq, freq) == freq:
				bestCoord = [ lat, lon ]
		if bestCoord != self.defaultCoordinates:
			resCandidates.add( -1, bestCoord, uId+"_userPlaces" )
#			resCandidates = [[ -1, bestCoord, uId+"_userPlaces" ]]
		return resCandidates
    def computeBiggestCoordinatesFromResults(self, resCandidates):
        ''' Given all the results, compute the biggest group and return a random coordinates inside 
		'''
        # results e.g. [ [5.0, [lat,lon], "tags"] ]
        newResCandidates = ResCandidates()
        coordList = []
        for res in resCandidates.getList():
            # get coordinates in string format: for [computeBiggestGroupOfCoordinates]
            strCoo = res.getCoordStr()
            coordList.append(strCoo)
#			coo = res.getCoord()	# for [computeYaelKmeansOfCoordinates]
#			coordList.append( coo )
# Print input coordinates list
        if self.verbose:
            print >> sys.stderr, ""
            print >> sys.stderr, "computeBiggestCoordinatesFromResults] coordList:", coordList
        # Compute the biggest group
        centerCoo, sumOfD = self.computeBiggestGroupOfCoordinates(coordList)
        #		centerCoo = self.computeYaelKmeansOfCoordinates( coordList )
        # find the relative "result" with all the info
        if len(centerCoo) > 0:
            for res in resCandidates.getList():
                coord = res.getCoord()
                if coord == centerCoo:
                    newResCandidates.addObj(res)
                    return newResCandidates
        # otherwise something went wrong
        newResCandidates.add(0.0, self.defaultCoordinates, "no results")
        return newResCandidates
    def multithreadTestVideosParser(self, line, varLock=""):
        '''
		'''
        methodUsed = "All Methods"
        blk = line.strip().split("\t")
        # get the tags
        tags = clean_line(blk[3])
        mtags = clean_line(blk[4])
        print >> sys.stderr, "---"
        print >> sys.stderr, "* input tags: %s [mtags: %s]" % (tags, mtags)
        # apply tags filters -> mtags contain the geo machine tags
        #			if len(mtags) > 1:
        #				tags += " "+ mtags # give more power to the geo tags
        # Read Real Location: "45.516639|-122.681053|16|Portland|Oregon|etats-Unis|16"
        location = blk[5].strip()
        geo = location.split("|")
        acc = geo[2]
        realCoord = [float(geo[0]), float(geo[1])]
        resCandidates = ResCandidates()

        # 1st METHOD: check tags in training set
        if len(tags) > 1:
            methodUsed = "TrainSet"
            resCandidates = self.matchWithGroupsOfTags(tags, mtags)
        else:
            self.updateLockVariables(methodUsed)
        # print out info
        if self.verbose:
            print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(
            ), methodUsed

        # 2nd METHOD: (if there are NOT RESULTS) use the most probable location for this user
        if resCandidates.size() == 0:
            methodUsed = "UserCommonLocation"
            ownGeo = blk[6].split("|")
            ownId = ownGeo[0]
            resCandidates = self.getUserMostProbableLocation(ownId)
            if resCandidates.size() > 0:
                self.updateLockVariables(methodUsed)
            # print out info
            if self.verbose:
                print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(
                ), methodUsed

        # 3rd METHOD: (if there are NOT RESULTS) USE the USER HOMETOWN as tags
        if resCandidates.size() == 0:
            methodUsed = "HomeTown"
            # Owner location: ['14678786@N00', 'milwaukee, United States']
            ownGeo = blk[6].split("|")
            # prepare hometown tags
            ht = ownGeo[1].split(",")
            hometownTags = ""
            for w in ht:
                if len(w) > 1:
                    hometownTags += w + " "
            hometownTags = hometownTags[:-1]
            if (hometownTags) >= 1:
                # check with the new set of tags
                resCandidates = self.matchWithGroupsOfTags(hometownTags, "")
                if resCandidates.size() > 0:
                    self.updateLockVariables(methodUsed)
                    # print out info
                if self.verbose:
                    print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(
                    ), methodUsed

        # 4th METHOD: (if there are NOT RESULTS) define zero position
        if resCandidates.size() == 0:
            methodUsed = "DefaultCoordinates"
            self.updateLockVariables(methodUsed)
            hometown = False
            resCandidates.add(-1.0, self.defaultCoordinates, "no results")
            # print out info
            if self.verbose:
                print >> sys.stderr, "*> parseTestVideos] defined default coordinates"

        # return the bigger coordinates group
        if resCandidates.size() > 2:
            # print out info
            if verbose:
                print >> sys.stderr, "*> parseTestVideos] BiggestCoordinatesGroup: from %d results got 1" % resCandidates.size(
                )
                candies = []
                for res in resCandidates.getList():
                    candies.append(
                        str(res.getScore()) + "," + str(res.getCoord()[0]) +
                        "|" + str(res.getCoord()[1]))
            resCandidates = self.computeBiggestCoordinatesFromResults(
                resCandidates)
            # print out info
            if self.verbose:
                for can in candies:
                    print >> sys.stderr, can
                print >> sys.stderr, "*> parseTestVideos] FINAL => ", resCandidates.getList(
                )[0].getCoord()

        # write the buffer into a file
        self.serialize(resCandidates, realCoord, methodUsed)
        print >> sys.stderr, "---\n"
    def parseTestVideos(self, withMtags=True, officialRun=False):
        ''' Given the TestSet file, read and parse each video meta-data,
			select the tags and retrieve the most suitable places for those tags
		'''
        print >> sys.stderr, "*> [SelectCoordMethod: %s] [ScoreMetric: %s] [MatchingType: %s]" % (
            self.selectCoordMethod, self.scoreMetric, self.matchingType)
        print >> sys.stderr, "*> [TestSet]",
        t1 = time.time()
        userMostProbableLocation = 0
        userHomeCoordinates = 0
        nullCoordinates = 0
        contentbased = 0
        noTags = 0
        totLines = 4532.0
        lines = 0
        # videoId <t> title <t> url <t> tags <t> mtags <t> location <t> ownerLocation
        for line in self.TestFile:
            lines += 1
            print >> sys.stderr, "---"
            methodUsed = "All Methods"
            ################
            # get filename and url
            blk = line.strip().split("\t")
            url = blk[2].strip()
            filename = blk[1].split("|")[0]
            if '.jpg' in filename:
                filename.replace('.jpg', '')
            ################
            # get the tags, mtags, geo and ownGeo
            tags, mtags, geo, ownGeo = self.getInfoFromTestVideo(
                line, withMtags)
            # print info
            #			if type(tags) == 'unicode':
            #				print >> sys.stderr, "* input tags: %s [mtags: %s]" % (tags.encode('utf-8','ignore'), mtags.encode('utf-8','ignore'))
            #			else:
            #				print >> sys.stderr, "* input tags: %s [mtags: %s]" % (tags, mtags)
            realCoord = [float(geo[0]), float(geo[1])
                         ] if not officialRun else ""
            resCandidates = ResCandidates()

            # 1st METHOD: check tags in training set
            if len(tags) > 1:
                methodUsed = "TrainSet"
                resCandidates = self.matchWithGroupsOfTags(tags, mtags)
            else:
                noTags += 1
            # print out info
            if self.verbose:
                print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(
                ), methodUsed

            # 2nd METHOD: (if there are NOT RESULTS) use the most probable location for this user
            if resCandidates.size() == 0:
                methodUsed = "UserCommonLocation"
                ownId = ownGeo[0]
                resCandidates = self.getUserMostProbableLocation(ownId.lower())
                if resCandidates.size() > 0:
                    print >> sys.stderr, "*> Used UserCommonLocation for user %s" % ownId
                    userMostProbableLocation += 1
                else:
                    print >> sys.stderr, "*> NO UserCommonLocation for user %s" % ownId
                # print out info
                if self.verbose:
                    print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(
                    ), methodUsed

            # 3rd METHOD: (if there are NOT RESULTS) USE the USER HOMETOWN as tags
            if resCandidates.size() == 0:
                methodUsed = "HomeTown"
                # prepare hometown tags
                ht = ownGeo[1].split(",")
                hometownTags = ""
                for w in ht:
                    if len(w) > 1:
                        hometownTags += w + " "
                hometownTags = hometownTags[:-1]
                if (hometownTags) < 2:
                    continue
                # check with the new set of tags
                resCandidates = self.matchWithGroupsOfTags(hometownTags, "")
                if resCandidates.size() > 0:
                    userHomeCoordinates += 1
                # print out info
                if self.verbose:
                    print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(
                    ), methodUsed

            # 4th METHOD: Check Content-Video results
            if resCandidates.size() == 0:
                resCandidates = self.getContentVideoResults(filename)
                if resCandidates.size() > 0:
                    print >> sys.stderr, "*> Used Content-Based Approach"
                    contentbased += 1
                else:
                    print >> sys.stderr, "*> NO Content-Based Information"

            # 4th METHOD: (if there are NOT RESULTS) define zero position
            if resCandidates.size() == 0:
                methodUsed = "DefaultCoordinates"
                nullCoordinates += 1
                hometown = False
                resCandidates.add(-1.0, self.defaultCoordinates, "no results")
                # print out info
                if self.verbose:
                    print >> sys.stderr, "*> parseTestVideos] defined default coordinates"

            # return the bigger coordinates group
            if resCandidates.size() > 2:
                # print out info
                if verbose:
                    print >> sys.stderr, "*> parseTestVideos] BiggestCoordinatesGroup: from %d results got 1" % resCandidates.size(
                    )
                    candies = []
                    for res in resCandidates.getList():
                        candies.append(
                            str(res.getScore()) + "," +
                            str(res.getCoord()[0]) + "|" +
                            str(res.getCoord()[1]))
                resCandidates = self.computeBiggestCoordinatesFromResults(
                    resCandidates)
                # print out info
                if self.verbose:
                    for can in candies:
                        print >> sys.stderr, can
                    print >> sys.stderr, "*> parseTestVideos] FINAL => ", resCandidates.getList(
                    )[0].getCoord()

            # write the buffer into a file
            if not officialRun:
                self.serialize(resCandidates, realCoord, methodUsed)
                self.serializeFormatted(resCandidates, filename, url)
            else:
                self.serializeFormatted(resCandidates, filename, url)
            # print info
            if (lines % 100) == 0:
                print >> sys.stderr, "\r*> [TestSet] %2.2f%s parsed [%d usrLoc, %d usrHome, %d content-based, %d defCoordinates, %d noTags]" % (
                    (lines / totLines * 100), '%', userMostProbableLocation,
                    userHomeCoordinates, contentbased, nullCoordinates,
                    noTags),
        # final info
        t2 = time.time()
        print >> sys.stderr, "\r*> [TestSet]  %d videos [%d userLocation, %d hometown, %d content-based, %d defaultCoordinates, %d with no tags] ~%.2fs" % (
            lines, userMostProbableLocation, userHomeCoordinates, contentbased,
            nullCoordinates, noTags, (t2 - t1) * 1.0)
        # compute the average of the statistics
        print >> sys.stderr, "---"
        if not officialRun:
            for k in self.limits:
                p = float(self.stats[k]) / lines * 100
                print >> sys.stderr, "*> [TestSet] %d videos (%.2f%s) inside a radius of %dkm" % (
                    self.stats[k], p, '%', k)
        # close test file
        self.TestFile.close()
    def matchWithGroupsOfTags(self, tagsLine, mtagsLine):
        ''' Compare the Test Video Tags and MTags with the TrainSet and GeoNames DB.
			Higher priority to the mtags (if there are, I filter the GroupOfTags just with matches of mtags),
			Otherwise, same procedure for tags. After, we recheck all the matches, and compute the number of 
			tag included in each GroupOfTags -> computing score.
		'''
        # create output with all the top Result Candidates
        resCandidates = ResCandidates()
        matches = MatchCandidates()
        tags = tagsLine.strip().split(" ")

        ########################################################
        # Get the GoT where the index matches with mtag or tag #
        ########################################################
        # If there are Machine Tags, retrieve the GroupOfTags just from them
        if len(mtagsLine.strip()) > 2:
            mtags = mtagsLine.strip().split(" ")
            tags = tags + mtags  # Merge the mtags with the tags
            for mtag in mtags:
                # Check the TrainSet
                if self.TrainTagIndex.has_key(mtag):
                    for groupOfTags in self.TrainTagIndex[mtag]:
                        matches.update(groupOfTags, 0)
                # Check the GeoNames
                if self.GeoNamesIndex.has_key(mtag):
                    for groupOfTags in self.GeoNamesIndex[mtag]:
                        matches.update(groupOfTags, 0)
        # If the mtags didn't find any match, use the tags
        if matches.size() == 0:
            # Create the list of GroupOfTags containing the TestTags
            for tag in tags:
                # Check the TrainSet
                if self.TrainTagIndex.has_key(tag):
                    for groupOfTags in self.TrainTagIndex[tag]:
                        matches.update(groupOfTags, 0)
                # Check the GeoNames
                if self.GeoNamesIndex.has_key(tag):
                    for groupOfTags in self.GeoNamesIndex[tag]:
                        matches.update(groupOfTags, 0)

        # if there are no matches, return 0
        if matches.size() == 0:
            return resCandidates

        ###################################################################
        # Count the number of matches between the tags+mtags and the candidates #
        ###################################################################
        # Checking how many matches there are in all the GoT
        for tag in tags:
            for groupOfTags in matches.getKeys():
                gotSplit = groupOfTags.strip().split(" ")
                # check a partial match inside the entire groupOfTags
                if matchingType == 'partial':
                    if tag in groupOfTags:
                        matches.update(groupOfTags, 1)
                # check the match for every tag inside the groupOfTags
                elif matchingType == 'perfect':
                    for got in gotSplit:
                        # perfect match = exact
                        if tag == got:
                            matches.update(groupOfTags, 1)
#								break

# computing the score for each groups of tags
        matches.computeScores(self.scoreMetric, len(tags))
        # get the scores of the topN matches
        maxScores = matches.getTopNScores(self.topn)
        # get the keys of the topnN matches
        topnKeys = matches.getKeysWithGivenScores(maxScores)
        if self.verbose:
            print >> sys.stderr, "matchWithGroupOfTags] we are here, maxScores: %d, topNelements: %d" % (
                maxScores[0], len(topnKeys))

        ########################################################################
        # From all the topN candidates compute the most likelihood coordinates #
        ########################################################################
        # for all the topN matches, return the most likelihood coordinates
        for got in topnKeys:
            # check if the key is from TrainTags
            if self.TrainTagCoord.has_key(got):
                coord = self.getMostLikelihoodCoordinates(
                    self.TrainTagCoord[got])
# check if the key is from GeoNames
            elif self.GeoNames.has_key(got):
                coord = self.getMostLikelihoodCoordinates(self.GeoNames[got])
            else:
                continue
            # if coord is empty
            if len(coord) == 0:
                continue
            # update the results
            resCandidates.add(matches.getValue(got), coord, got)
        # return all the results
        return resCandidates
	def multithreadTestVideosParser(self, line, varLock=""):
		'''
		'''
		methodUsed = "All Methods"
		blk = line.strip().split("\t")
		# get the tags
		tags = clean_line( blk[3] )
		mtags = clean_line( blk[4] )
		print >> sys.stderr, "---"
		print >> sys.stderr, "* input tags: %s [mtags: %s]" % (tags, mtags)
		# apply tags filters -> mtags contain the geo machine tags
#			if len(mtags) > 1:
#				tags += " "+ mtags # give more power to the geo tags
		# Read Real Location: "45.516639|-122.681053|16|Portland|Oregon|etats-Unis|16"
		location = blk[5].strip()
		geo = location.split("|")
		acc = geo[2]
		realCoord = [float(geo[0]), float(geo[1])]
		resCandidates = ResCandidates()
			
		# 1st METHOD: check tags in training set
		if len(tags) > 1:
			methodUsed = "TrainSet"
			resCandidates = self.matchWithGroupsOfTags( tags, mtags )
		else:
			self.updateLockVariables( methodUsed )
		# print out info
		if self.verbose:
			print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(), methodUsed
			
		# 2nd METHOD: (if there are NOT RESULTS) use the most probable location for this user
		if resCandidates.size() == 0:
			methodUsed = "UserCommonLocation"
			ownGeo = blk[6].split("|")
			ownId = ownGeo[0]
			resCandidates = self.getUserMostProbableLocation( ownId )
			if resCandidates.size() > 0:
				self.updateLockVariables( methodUsed )
			# print out info
			if self.verbose:
				print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(), methodUsed

		# 3rd METHOD: (if there are NOT RESULTS) USE the USER HOMETOWN as tags
		if resCandidates.size() == 0:
			methodUsed = "HomeTown"
			# Owner location: ['14678786@N00', 'milwaukee, United States']
			ownGeo = blk[6].split("|")
			# prepare hometown tags
			ht = ownGeo[1].split(",")
			hometownTags = ""
			for w in ht:
				if len(w) > 1:
					hometownTags += w +" "
			hometownTags = hometownTags[:-1]
			if (hometownTags) >= 1:
				# check with the new set of tags
				resCandidates = self.matchWithGroupsOfTags( hometownTags, "" )
				if resCandidates.size() > 0:
					self.updateLockVariables( methodUsed )
					# print out info
				if self.verbose:
					print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(), methodUsed

		# 4th METHOD: (if there are NOT RESULTS) define zero position
		if resCandidates.size() == 0:
			methodUsed = "DefaultCoordinates"
			self.updateLockVariables( methodUsed )
			hometown = False
			resCandidates.add( -1.0, self.defaultCoordinates, "no results" )
			# print out info
			if self.verbose:
				print >> sys.stderr, "*> parseTestVideos] defined default coordinates"

		# return the bigger coordinates group 
		if resCandidates.size() > 2:
			# print out info
			if verbose:
				print >> sys.stderr, "*> parseTestVideos] BiggestCoordinatesGroup: from %d results got 1" % resCandidates.size()
				candies = []
				for res in resCandidates.getList():
					candies.append( str(res.getScore()) +","+ str(res.getCoord()[0]) +"|"+ str(res.getCoord()[1]) ) 
			resCandidates = self.computeBiggestCoordinatesFromResults( resCandidates )
			# print out info
			if self.verbose:
				for can in candies:
					print >> sys.stderr, can
				print >> sys.stderr, "*> parseTestVideos] FINAL => ", resCandidates.getList()[0].getCoord()

		# write the buffer into a file
		self.serialize( resCandidates, realCoord, methodUsed )
		print >> sys.stderr, "---\n"
	def parseTestVideos(self, withMtags=True, officialRun=False ):
		''' Given the TestSet file, read and parse each video meta-data,
			select the tags and retrieve the most suitable places for those tags
		'''
		print >> sys.stderr, "*> [SelectCoordMethod: %s] [ScoreMetric: %s] [MatchingType: %s]" % ( self.selectCoordMethod, self.scoreMetric, self.matchingType )
		print >> sys.stderr, "*> [TestSet]",
		t1 = time.time()
		userMostProbableLocation = 0
		userHomeCoordinates = 0
		nullCoordinates = 0
		contentbased = 0
		noTags = 0
		totLines = 4532.0
		lines = 0
		# videoId <t> title <t> url <t> tags <t> mtags <t> location <t> ownerLocation
		for line in self.TestFile:
			lines += 1
			print >> sys.stderr, "---"
			methodUsed = "All Methods"
			################
			# get filename and url
			blk = line.strip().split("\t")
			url = blk[2].strip()
			filename = blk[1].split("|")[0]
			if '.jpg' in filename:
				filename.replace('.jpg', '')
			################
			# get the tags, mtags, geo and ownGeo
			tags, mtags, geo, ownGeo = self.getInfoFromTestVideo( line, withMtags )
			# print info
#			if type(tags) == 'unicode':
#				print >> sys.stderr, "* input tags: %s [mtags: %s]" % (tags.encode('utf-8','ignore'), mtags.encode('utf-8','ignore'))
#			else:
#				print >> sys.stderr, "* input tags: %s [mtags: %s]" % (tags, mtags)
			realCoord = [float(geo[0]), float(geo[1])] if not officialRun else ""
			resCandidates = ResCandidates()
			
			# 1st METHOD: check tags in training set
			if len(tags) > 1:
				methodUsed = "TrainSet"
				resCandidates = self.matchWithGroupsOfTags( tags, mtags )
			else:
				noTags += 1
			# print out info
			if self.verbose:
				print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(), methodUsed
			
			# 2nd METHOD: (if there are NOT RESULTS) use the most probable location for this user
			if resCandidates.size() == 0:
				methodUsed = "UserCommonLocation"
				ownId = ownGeo[0]
				resCandidates = self.getUserMostProbableLocation( ownId.lower() )
				if resCandidates.size() > 0:
					print >> sys.stderr, "*> Used UserCommonLocation for user %s" % ownId
					userMostProbableLocation += 1
				else:
					print >> sys.stderr, "*> NO UserCommonLocation for user %s" % ownId
				# print out info
				if self.verbose:
					print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(), methodUsed

			# 3rd METHOD: (if there are NOT RESULTS) USE the USER HOMETOWN as tags
			if resCandidates.size() == 0:
				methodUsed = "HomeTown"
				# prepare hometown tags
				ht = ownGeo[1].split(",")
				hometownTags = ""
				for w in ht:
					if len(w) > 1:
						hometownTags += w +" "
				hometownTags = hometownTags[:-1]
				if (hometownTags) < 2:
					continue
				# check with the new set of tags
				resCandidates = self.matchWithGroupsOfTags( hometownTags, "" )
				if resCandidates.size() > 0:
					userHomeCoordinates += 1
				# print out info
				if self.verbose:
					print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(), methodUsed

			# 4th METHOD: Check Content-Video results
			if resCandidates.size() == 0:
				resCandidates = self.getContentVideoResults( filename )
				if resCandidates.size() > 0:
					print >> sys.stderr, "*> Used Content-Based Approach"
					contentbased += 1
				else:
					print >> sys.stderr, "*> NO Content-Based Information"

			# 4th METHOD: (if there are NOT RESULTS) define zero position
			if resCandidates.size() == 0:
				methodUsed = "DefaultCoordinates"
				nullCoordinates += 1
				hometown = False
				resCandidates.add( -1.0, self.defaultCoordinates, "no results" )
				# print out info
				if self.verbose:
					print >> sys.stderr, "*> parseTestVideos] defined default coordinates"

			# return the bigger coordinates group 
			if resCandidates.size() > 2:
				# print out info
				if verbose:
					print >> sys.stderr, "*> parseTestVideos] BiggestCoordinatesGroup: from %d results got 1" % resCandidates.size()
					candies = []
					for res in resCandidates.getList():
						candies.append( str(res.getScore()) +","+ str(res.getCoord()[0]) +"|"+ str(res.getCoord()[1]) ) 
				resCandidates = self.computeBiggestCoordinatesFromResults( resCandidates )
				# print out info
				if self.verbose:
					for can in candies:
						print >> sys.stderr, can
					print >> sys.stderr, "*> parseTestVideos] FINAL => ", resCandidates.getList()[0].getCoord()

			# write the buffer into a file
			if not officialRun:
				self.serialize( resCandidates, realCoord, methodUsed )
				self.serializeFormatted( resCandidates, filename, url )
			else:
				self.serializeFormatted( resCandidates, filename, url )
			# print info
			if (lines % 100) == 0:
				print >> sys.stderr, "\r*> [TestSet] %2.2f%s parsed [%d usrLoc, %d usrHome, %d content-based, %d defCoordinates, %d noTags]" % ( (lines/totLines*100), '%', userMostProbableLocation, userHomeCoordinates, contentbased, nullCoordinates, noTags ),
		# final info
		t2 = time.time()
		print >> sys.stderr, "\r*> [TestSet]  %d videos [%d userLocation, %d hometown, %d content-based, %d defaultCoordinates, %d with no tags] ~%.2fs" % ( lines, userMostProbableLocation, userHomeCoordinates, contentbased, nullCoordinates, noTags, (t2-t1)*1.0 )
		# compute the average of the statistics
		print >> sys.stderr, "---"
		if not officialRun:
			for k in self.limits:
				p = float(self.stats[k])/lines*100
				print >> sys.stderr, "*> [TestSet] %d videos (%.2f%s) inside a radius of %dkm" % (self.stats[k], p, '%', k)
		# close test file
		self.TestFile.close()
	def matchWithGroupsOfTags( self, tagsLine, mtagsLine ):
		''' Compare the Test Video Tags and MTags with the TrainSet and GeoNames DB.
			Higher priority to the mtags (if there are, I filter the GroupOfTags just with matches of mtags),
			Otherwise, same procedure for tags. After, we recheck all the matches, and compute the number of 
			tag included in each GroupOfTags -> computing score.
		'''
		# create output with all the top Result Candidates
		resCandidates = ResCandidates()
		matches = MatchCandidates()
		tags = tagsLine.strip().split(" ")
		
		########################################################
		# Get the GoT where the index matches with mtag or tag #
		########################################################
		# If there are Machine Tags, retrieve the GroupOfTags just from them
		if len(mtagsLine.strip()) > 2:
			mtags = mtagsLine.strip().split(" ")
			tags = tags + mtags # Merge the mtags with the tags
			for mtag in mtags:
				# Check the TrainSet
				if self.TrainTagIndex.has_key(mtag):
					for groupOfTags in self.TrainTagIndex[mtag]:
						matches.update(groupOfTags, 0)
				# Check the GeoNames
				if self.GeoNamesIndex.has_key(mtag):
					for groupOfTags in self.GeoNamesIndex[mtag]:
						matches.update(groupOfTags, 0)
		# If the mtags didn't find any match, use the tags
		if matches.size() == 0:
			# Create the list of GroupOfTags containing the TestTags
			for tag in tags:
				# Check the TrainSet
				if self.TrainTagIndex.has_key(tag):
					for groupOfTags in self.TrainTagIndex[tag]:
						matches.update(groupOfTags, 0)
				# Check the GeoNames
				if self.GeoNamesIndex.has_key(tag):
					for groupOfTags in self.GeoNamesIndex[tag]:
						matches.update(groupOfTags, 0)

		# if there are no matches, return 0
		if matches.size() == 0:
			return resCandidates
		
		###################################################################
		# Count the number of matches between the tags+mtags and the candidates #
		###################################################################
		# Checking how many matches there are in all the GoT
		for tag in tags:
			for groupOfTags in matches.getKeys():
				gotSplit = groupOfTags.strip().split(" ")
				# check a partial match inside the entire groupOfTags
				if matchingType == 'partial':
					if tag in groupOfTags:
						matches.update(groupOfTags, 1)
				# check the match for every tag inside the groupOfTags
				elif matchingType == 'perfect':
					for got in gotSplit:
						# perfect match = exact
							if tag == got:
								matches.update(groupOfTags, 1)
#								break
		
		# computing the score for each groups of tags
		matches.computeScores( self.scoreMetric, len(tags) )
		# get the scores of the topN matches
		maxScores = matches.getTopNScores( self.topn )
		# get the keys of the topnN matches
		topnKeys = matches.getKeysWithGivenScores( maxScores )
		if self.verbose:
			print >> sys.stderr, "matchWithGroupOfTags] we are here, maxScores: %d, topNelements: %d" % ( maxScores[0], len(topnKeys))

		########################################################################
		# From all the topN candidates compute the most likelihood coordinates #
		########################################################################
		# for all the topN matches, return the most likelihood coordinates
		for got in topnKeys:
			# check if the key is from TrainTags
			if self.TrainTagCoord.has_key(got):
				coord = self.getMostLikelihoodCoordinates( self.TrainTagCoord[got] )
      # check if the key is from GeoNames
			elif self.GeoNames.has_key(got):
				coord = self.getMostLikelihoodCoordinates( self.GeoNames[got] )
			else:
				continue
			# if coord is empty
			if len(coord) == 0:
				continue
			# update the results
			resCandidates.add( matches.getValue(got), coord, got )
		# return all the results
		return resCandidates