def multithreadTestVideosParser(self, line, varLock=""):
        '''
		'''
        methodUsed = "All Methods"
        blk = line.strip().split("\t")
        # get the tags
        tags = clean_line(blk[3])
        mtags = clean_line(blk[4])
        print >> sys.stderr, "---"
        print >> sys.stderr, "* input tags: %s [mtags: %s]" % (tags, mtags)
        # apply tags filters -> mtags contain the geo machine tags
        #			if len(mtags) > 1:
        #				tags += " "+ mtags # give more power to the geo tags
        # Read Real Location: "45.516639|-122.681053|16|Portland|Oregon|etats-Unis|16"
        location = blk[5].strip()
        geo = location.split("|")
        acc = geo[2]
        realCoord = [float(geo[0]), float(geo[1])]
        resCandidates = ResCandidates()

        # 1st METHOD: check tags in training set
        if len(tags) > 1:
            methodUsed = "TrainSet"
            resCandidates = self.matchWithGroupsOfTags(tags, mtags)
        else:
            self.updateLockVariables(methodUsed)
        # print out info
        if self.verbose:
            print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(
            ), methodUsed

        # 2nd METHOD: (if there are NOT RESULTS) use the most probable location for this user
        if resCandidates.size() == 0:
            methodUsed = "UserCommonLocation"
            ownGeo = blk[6].split("|")
            ownId = ownGeo[0]
            resCandidates = self.getUserMostProbableLocation(ownId)
            if resCandidates.size() > 0:
                self.updateLockVariables(methodUsed)
            # print out info
            if self.verbose:
                print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(
                ), methodUsed

        # 3rd METHOD: (if there are NOT RESULTS) USE the USER HOMETOWN as tags
        if resCandidates.size() == 0:
            methodUsed = "HomeTown"
            # Owner location: ['14678786@N00', 'milwaukee, United States']
            ownGeo = blk[6].split("|")
            # prepare hometown tags
            ht = ownGeo[1].split(",")
            hometownTags = ""
            for w in ht:
                if len(w) > 1:
                    hometownTags += w + " "
            hometownTags = hometownTags[:-1]
            if (hometownTags) >= 1:
                # check with the new set of tags
                resCandidates = self.matchWithGroupsOfTags(hometownTags, "")
                if resCandidates.size() > 0:
                    self.updateLockVariables(methodUsed)
                    # print out info
                if self.verbose:
                    print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(
                    ), methodUsed

        # 4th METHOD: (if there are NOT RESULTS) define zero position
        if resCandidates.size() == 0:
            methodUsed = "DefaultCoordinates"
            self.updateLockVariables(methodUsed)
            hometown = False
            resCandidates.add(-1.0, self.defaultCoordinates, "no results")
            # print out info
            if self.verbose:
                print >> sys.stderr, "*> parseTestVideos] defined default coordinates"

        # return the bigger coordinates group
        if resCandidates.size() > 2:
            # print out info
            if verbose:
                print >> sys.stderr, "*> parseTestVideos] BiggestCoordinatesGroup: from %d results got 1" % resCandidates.size(
                )
                candies = []
                for res in resCandidates.getList():
                    candies.append(
                        str(res.getScore()) + "," + str(res.getCoord()[0]) +
                        "|" + str(res.getCoord()[1]))
            resCandidates = self.computeBiggestCoordinatesFromResults(
                resCandidates)
            # print out info
            if self.verbose:
                for can in candies:
                    print >> sys.stderr, can
                print >> sys.stderr, "*> parseTestVideos] FINAL => ", resCandidates.getList(
                )[0].getCoord()

        # write the buffer into a file
        self.serialize(resCandidates, realCoord, methodUsed)
        print >> sys.stderr, "---\n"
	def multithreadTestVideosParser(self, line, varLock=""):
		'''
		'''
		methodUsed = "All Methods"
		blk = line.strip().split("\t")
		# get the tags
		tags = clean_line( blk[3] )
		mtags = clean_line( blk[4] )
		print >> sys.stderr, "---"
		print >> sys.stderr, "* input tags: %s [mtags: %s]" % (tags, mtags)
		# apply tags filters -> mtags contain the geo machine tags
#			if len(mtags) > 1:
#				tags += " "+ mtags # give more power to the geo tags
		# Read Real Location: "45.516639|-122.681053|16|Portland|Oregon|etats-Unis|16"
		location = blk[5].strip()
		geo = location.split("|")
		acc = geo[2]
		realCoord = [float(geo[0]), float(geo[1])]
		resCandidates = ResCandidates()
			
		# 1st METHOD: check tags in training set
		if len(tags) > 1:
			methodUsed = "TrainSet"
			resCandidates = self.matchWithGroupsOfTags( tags, mtags )
		else:
			self.updateLockVariables( methodUsed )
		# print out info
		if self.verbose:
			print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(), methodUsed
			
		# 2nd METHOD: (if there are NOT RESULTS) use the most probable location for this user
		if resCandidates.size() == 0:
			methodUsed = "UserCommonLocation"
			ownGeo = blk[6].split("|")
			ownId = ownGeo[0]
			resCandidates = self.getUserMostProbableLocation( ownId )
			if resCandidates.size() > 0:
				self.updateLockVariables( methodUsed )
			# print out info
			if self.verbose:
				print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(), methodUsed

		# 3rd METHOD: (if there are NOT RESULTS) USE the USER HOMETOWN as tags
		if resCandidates.size() == 0:
			methodUsed = "HomeTown"
			# Owner location: ['14678786@N00', 'milwaukee, United States']
			ownGeo = blk[6].split("|")
			# prepare hometown tags
			ht = ownGeo[1].split(",")
			hometownTags = ""
			for w in ht:
				if len(w) > 1:
					hometownTags += w +" "
			hometownTags = hometownTags[:-1]
			if (hometownTags) >= 1:
				# check with the new set of tags
				resCandidates = self.matchWithGroupsOfTags( hometownTags, "" )
				if resCandidates.size() > 0:
					self.updateLockVariables( methodUsed )
					# print out info
				if self.verbose:
					print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(), methodUsed

		# 4th METHOD: (if there are NOT RESULTS) define zero position
		if resCandidates.size() == 0:
			methodUsed = "DefaultCoordinates"
			self.updateLockVariables( methodUsed )
			hometown = False
			resCandidates.add( -1.0, self.defaultCoordinates, "no results" )
			# print out info
			if self.verbose:
				print >> sys.stderr, "*> parseTestVideos] defined default coordinates"

		# return the bigger coordinates group 
		if resCandidates.size() > 2:
			# print out info
			if verbose:
				print >> sys.stderr, "*> parseTestVideos] BiggestCoordinatesGroup: from %d results got 1" % resCandidates.size()
				candies = []
				for res in resCandidates.getList():
					candies.append( str(res.getScore()) +","+ str(res.getCoord()[0]) +"|"+ str(res.getCoord()[1]) ) 
			resCandidates = self.computeBiggestCoordinatesFromResults( resCandidates )
			# print out info
			if self.verbose:
				for can in candies:
					print >> sys.stderr, can
				print >> sys.stderr, "*> parseTestVideos] FINAL => ", resCandidates.getList()[0].getCoord()

		# write the buffer into a file
		self.serialize( resCandidates, realCoord, methodUsed )
		print >> sys.stderr, "---\n"
    def parseTestVideos(self, withMtags=True, officialRun=False):
        ''' Given the TestSet file, read and parse each video meta-data,
			select the tags and retrieve the most suitable places for those tags
		'''
        print >> sys.stderr, "*> [SelectCoordMethod: %s] [ScoreMetric: %s] [MatchingType: %s]" % (
            self.selectCoordMethod, self.scoreMetric, self.matchingType)
        print >> sys.stderr, "*> [TestSet]",
        t1 = time.time()
        userMostProbableLocation = 0
        userHomeCoordinates = 0
        nullCoordinates = 0
        contentbased = 0
        noTags = 0
        totLines = 4532.0
        lines = 0
        # videoId <t> title <t> url <t> tags <t> mtags <t> location <t> ownerLocation
        for line in self.TestFile:
            lines += 1
            print >> sys.stderr, "---"
            methodUsed = "All Methods"
            ################
            # get filename and url
            blk = line.strip().split("\t")
            url = blk[2].strip()
            filename = blk[1].split("|")[0]
            if '.jpg' in filename:
                filename.replace('.jpg', '')
            ################
            # get the tags, mtags, geo and ownGeo
            tags, mtags, geo, ownGeo = self.getInfoFromTestVideo(
                line, withMtags)
            # print info
            #			if type(tags) == 'unicode':
            #				print >> sys.stderr, "* input tags: %s [mtags: %s]" % (tags.encode('utf-8','ignore'), mtags.encode('utf-8','ignore'))
            #			else:
            #				print >> sys.stderr, "* input tags: %s [mtags: %s]" % (tags, mtags)
            realCoord = [float(geo[0]), float(geo[1])
                         ] if not officialRun else ""
            resCandidates = ResCandidates()

            # 1st METHOD: check tags in training set
            if len(tags) > 1:
                methodUsed = "TrainSet"
                resCandidates = self.matchWithGroupsOfTags(tags, mtags)
            else:
                noTags += 1
            # print out info
            if self.verbose:
                print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(
                ), methodUsed

            # 2nd METHOD: (if there are NOT RESULTS) use the most probable location for this user
            if resCandidates.size() == 0:
                methodUsed = "UserCommonLocation"
                ownId = ownGeo[0]
                resCandidates = self.getUserMostProbableLocation(ownId.lower())
                if resCandidates.size() > 0:
                    print >> sys.stderr, "*> Used UserCommonLocation for user %s" % ownId
                    userMostProbableLocation += 1
                else:
                    print >> sys.stderr, "*> NO UserCommonLocation for user %s" % ownId
                # print out info
                if self.verbose:
                    print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(
                    ), methodUsed

            # 3rd METHOD: (if there are NOT RESULTS) USE the USER HOMETOWN as tags
            if resCandidates.size() == 0:
                methodUsed = "HomeTown"
                # prepare hometown tags
                ht = ownGeo[1].split(",")
                hometownTags = ""
                for w in ht:
                    if len(w) > 1:
                        hometownTags += w + " "
                hometownTags = hometownTags[:-1]
                if (hometownTags) < 2:
                    continue
                # check with the new set of tags
                resCandidates = self.matchWithGroupsOfTags(hometownTags, "")
                if resCandidates.size() > 0:
                    userHomeCoordinates += 1
                # print out info
                if self.verbose:
                    print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(
                    ), methodUsed

            # 4th METHOD: Check Content-Video results
            if resCandidates.size() == 0:
                resCandidates = self.getContentVideoResults(filename)
                if resCandidates.size() > 0:
                    print >> sys.stderr, "*> Used Content-Based Approach"
                    contentbased += 1
                else:
                    print >> sys.stderr, "*> NO Content-Based Information"

            # 4th METHOD: (if there are NOT RESULTS) define zero position
            if resCandidates.size() == 0:
                methodUsed = "DefaultCoordinates"
                nullCoordinates += 1
                hometown = False
                resCandidates.add(-1.0, self.defaultCoordinates, "no results")
                # print out info
                if self.verbose:
                    print >> sys.stderr, "*> parseTestVideos] defined default coordinates"

            # return the bigger coordinates group
            if resCandidates.size() > 2:
                # print out info
                if verbose:
                    print >> sys.stderr, "*> parseTestVideos] BiggestCoordinatesGroup: from %d results got 1" % resCandidates.size(
                    )
                    candies = []
                    for res in resCandidates.getList():
                        candies.append(
                            str(res.getScore()) + "," +
                            str(res.getCoord()[0]) + "|" +
                            str(res.getCoord()[1]))
                resCandidates = self.computeBiggestCoordinatesFromResults(
                    resCandidates)
                # print out info
                if self.verbose:
                    for can in candies:
                        print >> sys.stderr, can
                    print >> sys.stderr, "*> parseTestVideos] FINAL => ", resCandidates.getList(
                    )[0].getCoord()

            # write the buffer into a file
            if not officialRun:
                self.serialize(resCandidates, realCoord, methodUsed)
                self.serializeFormatted(resCandidates, filename, url)
            else:
                self.serializeFormatted(resCandidates, filename, url)
            # print info
            if (lines % 100) == 0:
                print >> sys.stderr, "\r*> [TestSet] %2.2f%s parsed [%d usrLoc, %d usrHome, %d content-based, %d defCoordinates, %d noTags]" % (
                    (lines / totLines * 100), '%', userMostProbableLocation,
                    userHomeCoordinates, contentbased, nullCoordinates,
                    noTags),
        # final info
        t2 = time.time()
        print >> sys.stderr, "\r*> [TestSet]  %d videos [%d userLocation, %d hometown, %d content-based, %d defaultCoordinates, %d with no tags] ~%.2fs" % (
            lines, userMostProbableLocation, userHomeCoordinates, contentbased,
            nullCoordinates, noTags, (t2 - t1) * 1.0)
        # compute the average of the statistics
        print >> sys.stderr, "---"
        if not officialRun:
            for k in self.limits:
                p = float(self.stats[k]) / lines * 100
                print >> sys.stderr, "*> [TestSet] %d videos (%.2f%s) inside a radius of %dkm" % (
                    self.stats[k], p, '%', k)
        # close test file
        self.TestFile.close()
	def parseTestVideos(self, withMtags=True, officialRun=False ):
		''' Given the TestSet file, read and parse each video meta-data,
			select the tags and retrieve the most suitable places for those tags
		'''
		print >> sys.stderr, "*> [SelectCoordMethod: %s] [ScoreMetric: %s] [MatchingType: %s]" % ( self.selectCoordMethod, self.scoreMetric, self.matchingType )
		print >> sys.stderr, "*> [TestSet]",
		t1 = time.time()
		userMostProbableLocation = 0
		userHomeCoordinates = 0
		nullCoordinates = 0
		contentbased = 0
		noTags = 0
		totLines = 4532.0
		lines = 0
		# videoId <t> title <t> url <t> tags <t> mtags <t> location <t> ownerLocation
		for line in self.TestFile:
			lines += 1
			print >> sys.stderr, "---"
			methodUsed = "All Methods"
			################
			# get filename and url
			blk = line.strip().split("\t")
			url = blk[2].strip()
			filename = blk[1].split("|")[0]
			if '.jpg' in filename:
				filename.replace('.jpg', '')
			################
			# get the tags, mtags, geo and ownGeo
			tags, mtags, geo, ownGeo = self.getInfoFromTestVideo( line, withMtags )
			# print info
#			if type(tags) == 'unicode':
#				print >> sys.stderr, "* input tags: %s [mtags: %s]" % (tags.encode('utf-8','ignore'), mtags.encode('utf-8','ignore'))
#			else:
#				print >> sys.stderr, "* input tags: %s [mtags: %s]" % (tags, mtags)
			realCoord = [float(geo[0]), float(geo[1])] if not officialRun else ""
			resCandidates = ResCandidates()
			
			# 1st METHOD: check tags in training set
			if len(tags) > 1:
				methodUsed = "TrainSet"
				resCandidates = self.matchWithGroupsOfTags( tags, mtags )
			else:
				noTags += 1
			# print out info
			if self.verbose:
				print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(), methodUsed
			
			# 2nd METHOD: (if there are NOT RESULTS) use the most probable location for this user
			if resCandidates.size() == 0:
				methodUsed = "UserCommonLocation"
				ownId = ownGeo[0]
				resCandidates = self.getUserMostProbableLocation( ownId.lower() )
				if resCandidates.size() > 0:
					print >> sys.stderr, "*> Used UserCommonLocation for user %s" % ownId
					userMostProbableLocation += 1
				else:
					print >> sys.stderr, "*> NO UserCommonLocation for user %s" % ownId
				# print out info
				if self.verbose:
					print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(), methodUsed

			# 3rd METHOD: (if there are NOT RESULTS) USE the USER HOMETOWN as tags
			if resCandidates.size() == 0:
				methodUsed = "HomeTown"
				# prepare hometown tags
				ht = ownGeo[1].split(",")
				hometownTags = ""
				for w in ht:
					if len(w) > 1:
						hometownTags += w +" "
				hometownTags = hometownTags[:-1]
				if (hometownTags) < 2:
					continue
				# check with the new set of tags
				resCandidates = self.matchWithGroupsOfTags( hometownTags, "" )
				if resCandidates.size() > 0:
					userHomeCoordinates += 1
				# print out info
				if self.verbose:
					print >> sys.stderr, "*> parseTestVideos] got %d results with" % resCandidates.size(), methodUsed

			# 4th METHOD: Check Content-Video results
			if resCandidates.size() == 0:
				resCandidates = self.getContentVideoResults( filename )
				if resCandidates.size() > 0:
					print >> sys.stderr, "*> Used Content-Based Approach"
					contentbased += 1
				else:
					print >> sys.stderr, "*> NO Content-Based Information"

			# 4th METHOD: (if there are NOT RESULTS) define zero position
			if resCandidates.size() == 0:
				methodUsed = "DefaultCoordinates"
				nullCoordinates += 1
				hometown = False
				resCandidates.add( -1.0, self.defaultCoordinates, "no results" )
				# print out info
				if self.verbose:
					print >> sys.stderr, "*> parseTestVideos] defined default coordinates"

			# return the bigger coordinates group 
			if resCandidates.size() > 2:
				# print out info
				if verbose:
					print >> sys.stderr, "*> parseTestVideos] BiggestCoordinatesGroup: from %d results got 1" % resCandidates.size()
					candies = []
					for res in resCandidates.getList():
						candies.append( str(res.getScore()) +","+ str(res.getCoord()[0]) +"|"+ str(res.getCoord()[1]) ) 
				resCandidates = self.computeBiggestCoordinatesFromResults( resCandidates )
				# print out info
				if self.verbose:
					for can in candies:
						print >> sys.stderr, can
					print >> sys.stderr, "*> parseTestVideos] FINAL => ", resCandidates.getList()[0].getCoord()

			# write the buffer into a file
			if not officialRun:
				self.serialize( resCandidates, realCoord, methodUsed )
				self.serializeFormatted( resCandidates, filename, url )
			else:
				self.serializeFormatted( resCandidates, filename, url )
			# print info
			if (lines % 100) == 0:
				print >> sys.stderr, "\r*> [TestSet] %2.2f%s parsed [%d usrLoc, %d usrHome, %d content-based, %d defCoordinates, %d noTags]" % ( (lines/totLines*100), '%', userMostProbableLocation, userHomeCoordinates, contentbased, nullCoordinates, noTags ),
		# final info
		t2 = time.time()
		print >> sys.stderr, "\r*> [TestSet]  %d videos [%d userLocation, %d hometown, %d content-based, %d defaultCoordinates, %d with no tags] ~%.2fs" % ( lines, userMostProbableLocation, userHomeCoordinates, contentbased, nullCoordinates, noTags, (t2-t1)*1.0 )
		# compute the average of the statistics
		print >> sys.stderr, "---"
		if not officialRun:
			for k in self.limits:
				p = float(self.stats[k])/lines*100
				print >> sys.stderr, "*> [TestSet] %d videos (%.2f%s) inside a radius of %dkm" % (self.stats[k], p, '%', k)
		# close test file
		self.TestFile.close()