class MovieTensor:
    model = None
    db = None
    tfIdf = None

    def __init__(self, model):
        self.model = model
        self.db = DBConnect()
        self.tfIdf = TFIDF("", "", "_actor_")

    def getListAsString(self, moviesList):
        moviesListStr = str(moviesList)
        moviesListStr = moviesListStr.replace('[', '(')
        moviesListStr = moviesListStr.replace(']', ')')
        return moviesListStr

    def getTensor(self):
        if self.model == 1:

            yearsCountQuery = "select count(distinct year) from mlmovies"
            #movieActorsCountQuery = "select count(distinct movieid) from mlmovies where movieid  in (6058,9818,5914,6097,7232,9443,7062,8929,4354,10059)  "
            res = self.db.executeQuery(yearsCountQuery)
            countStr = res[0]
            countString = str(countStr)
            countString = self.tfIdf.getCount(countString)
            noOfDistinctYear = int(countString)

            # get the no of actors
            movieActorsCountQuery = "select count(*) from imdb_actor_info  "
            #movieActorsCountQuery = "select count(distinct actorid) from imdb_actor_info  where actorid in (17838,45899,61523,68671,96585,99457,128645,133985) "
            res = self.db.executeQuery(movieActorsCountQuery)
            countStr = res[0]
            countString = str(countStr)
            countString = self.tfIdf.getCount(countString)
            noOfActors = int(countString)

            # get the no of movies
            movieActorsCountQuery = "select count(*) from mlmovies  "
            #movieActorsCountQuery = "select count(distinct movieid) from mlmovies where movieid  in (6058,9818,5914,6097,7232,9443,7062,8929,4354,10059)  "
            res = self.db.executeQuery(movieActorsCountQuery)
            countStr = res[0]
            countString = str(countStr)
            countString = self.tfIdf.getCount(countString)
            noOfMovies = int(countString)
            #noOfMovies = 2

            #            actorMovieYearTensor = np.ndarray(  shape=(noOfActors,noOfMovies,noOfDistinctYear))
            #            for i in range(0,noOfActors):
            #                for j in range(0,noOfMovies):
            #                    for k in range(0,noOfDistinctYear):
            #                        actorMovieYearTensor[i,j,k] = 0.0
            #                        #print actorMovieYearTensor[i,j,k]

            #build movie indices
            movieIdVsIndex = {}
            movieIndexVsName = {}
            query = "select * from mlmovies order by movieid"
            #query = "select *  from mlmovies where movieid  in (6058,9818,5914,6097,7232,9443,7062,8929,4354,10059) order by movieid"
            movieIndex = 0
            res = self.db.executeQuery(query)
            for movie in res:
                movieId = movie[0]
                movieName = movie[1]
                movieIdVsIndex[movieId] = movieIndex
                movieIndexVsName[movieIndex] = movieName
                movieIndex = movieIndex + 1

            #build year indices
            yearVsIndex = {}
            yearIndexVsYear = {}
            q = "select distinct year from mlmovies order by year"
            res = self.db.executeQuery(q)
            yearIndex = 0
            for yearRow in res:
                year = yearRow[0]
                yearVsIndex[str(year)] = yearIndex
                yearIndexVsYear[yearIndex] = year
                yearIndex = yearIndex + 1

            actorMovieYearMatrix = np.zeros(
                (noOfActors, noOfMovies, noOfDistinctYear))

            query = "select * from imdb_actor_info order by actorid "
            actors = self.db.executeQuery(query)
            actorIndex = 0
            actorIdVsIndex = {}
            actorIndexVsName = {}
            for actor in actors:
                actorid = actor[0]
                actorName = actor[1]
                actorrelatedMoviesQ = "select * from movie_actor where actorid = " + str(
                    actorid)
                actorrelatedMovies = self.db.executeQuery(actorrelatedMoviesQ)
                movieIds = []
                for movie in actorrelatedMovies:
                    movieIds.append(movie[0])
                # we got the movies
                moviesQuery = "select * from mlmovies where movieid in " + self.getListAsString(
                    movieIds)
                res = self.db.executeQuery(moviesQuery)
                for movieYear in res:
                    movieid = movieYear[0]
                    year = movieYear[2]
                    #actorMovieYearTensor[actorIndex,movieIdVsIndex[movieid],yearVsIndex[str(year)]] = 1.0
                    actorMovieYearMatrix[actorIndex][movieIdVsIndex[movieid]][
                        yearVsIndex[str(year)]] = 1

                actorIdVsIndex[actorid] = actorIndex
                actorIndexVsName[actorIndex] = actorName
                actorIndex = actorIndex + 1

            actorMovieYearMatrix[0][0][0] = 1
            actorMovieYearMatrix[1][1][1] = 1
            actorMovieYearTensor = tl.tensor(actorMovieYearMatrix)

            decomposed = dec.parafac(actorMovieYearTensor, rank=5)

            semanticsActor = decomposed[0]
            semanticsMovie = decomposed[1]
            semanticsYear = decomposed[2]
            for i in range(0, semanticsActor.shape[1]):

                actorsRow = semanticsActor[:, i]
                mean = np.mean(actorsRow)
                print("ACTORS GROUPED UNDER LATENT SEMANTICS {0} ".format(i +
                                                                          1))
                for j in range(0, noOfActors):
                    if (actorsRow[j] >= mean):
                        print(actorIndexVsName[j])

            for i in range(0, semanticsMovie.shape[1]):

                moviesRow = semanticsMovie[:, i]
                mean = np.mean(moviesRow)
                print("MOVIES GROUPED UNDER LATENT SEMANTICS {0}".format(i +
                                                                         1))
                for j in range(0, noOfMovies):
                    if (moviesRow[j] >= mean):
                        print(movieIndexVsName[j])

            for i in range(0, semanticsYear.shape[1]):
                yearsRow = semanticsYear[:, i]
                mean = np.mean(yearsRow)
                print("YEARS GROUPED UNDER LATENT SEMANTICS {0}".format(i + 1))
                for j in range(0, noOfDistinctYear):
                    if (yearsRow[j] >= mean):
                        print(yearIndexVsYear[j])

        elif self.model == 2:
            noOfTags = 0
            query = "select count(*) from genome_tags"
            count = self.db.executeQuery(query)
            countStr = self.tfIdf.getCount(str(count[0]))
            noOfTags = int(countStr)

            # get the no of movies
            movieActorsCountQuery = "select count(*) from mlmovies  "
            res = self.db.executeQuery(movieActorsCountQuery)
            countStr = res[0]
            countString = str(countStr)
            countString = self.tfIdf.getCount(countString)
            noOfMovies = int(countString)

            q = "select count(distinct rating) from mlratings"
            res = self.db.executeQuery(q)
            countStr = res[0]
            countString = str(countStr)
            countString = self.tfIdf.getCount(countString)
            noOfRatings = int(countString)

            tagMovieRatingMatrix = np.zeros(
                (noOfTags, noOfMovies, noOfRatings))

            #print tagMovieRatingTensor

            # build tag index
            query = "select * from genome_tags order by tagid"
            tags = self.db.executeQuery(query)
            tagIndex = 0
            tagIdVsIndex = {}
            tagIndexVsName = {}
            for tag in tags:
                tagid = tag[0]
                tagName = tag[1]
                tagIdVsIndex[tagid] = tagIndex
                tagIndexVsName[tagIndex] = tagName
                tagIndex = tagIndex + 1

            query = "select * from mlmovies order  by movieid"
            movieIndex = 0
            movieIdVsIndex = {}
            movieIndexVsName = {}
            movies = self.db.executeQuery(query)
            for movie in movies:
                movieid = movie[0]
                movieName = movie[1]
                movieIdVsIndex[movieid] = movieIndex
                movieIndexVsName[movieIndex] = movieName

                movieTagsQ = "select * from mltags where movieid = " + str(
                    movieid)
                movieTags = self.db.executeQuery(movieTagsQ)
                movieTagsList = []
                for movieTag in movieTags:
                    movieTagsList.append(movieTag[2])
                totalNoOfRatingsQ = "select count(*) from mlratings where movieid = " + str(
                    movieid)
                res = self.db.executeQuery(totalNoOfRatingsQ)
                totalRatingsStr = self.tfIdf.getCount(str(res[0]))
                totalRatings = int(totalRatingsStr)

                sumQ = "select movieid, sum(rating) from mlratings  where movieid = " + str(
                    movieid) + " group by movieid"
                res = self.db.executeQuery(sumQ)
                sumRating = 0
                for r in res:
                    sumRating = sumRating + r[1]
                avgRating = float(sumRating) / totalRatings

                for tag in movieTagsList:
                    tagIndex = tagIdVsIndex[tag]

                    for i in range(1, noOfRatings + 1):
                        if avgRating <= float(i):
                            tagMovieRatingMatrix[tagIndex][movieIndex][i -
                                                                       1] = 1
                            #print "setting one"

                movieIndex = movieIndex + 1

            tagMovieRatingMatrix[0][0][0] = 1
            tagMovieRatingMatrix[1][1][1] = 1
            tagMovieRatingTensor = tl.tensor(tagMovieRatingMatrix)

            decomposed = dec.parafac(tagMovieRatingTensor, rank=5)

            semanticsTag = decomposed[0]
            semanticsMovie = decomposed[1]
            semanticsRating = decomposed[2]

            for i in range(0, semanticsTag.shape[1]):

                tagRows = semanticsTag[:, i]
                mean = np.mean(tagRows)
                print(" TAGS GROUPED UNDER LATENT SEMANTICS {0} ".format(i +
                                                                         1))
                for j in range(0, noOfTags):
                    if (tagRows[j] >= mean):
                        print(tagIndexVsName[j])

            for i in range(0, semanticsMovie.shape[1]):

                movieRows = semanticsMovie[:, i]
                mean = np.mean(movieRows)
                print("MOVIES GROUPED UNDER LATENT SEMANTICS {0}".format(i +
                                                                         1))
                for j in range(0, noOfMovies):
                    if (movieRows[j] >= mean):
                        print(movieIndexVsName[j])

            for i in range(0, semanticsRating.shape[1]):
                ratingRows = semanticsRating[:, i]
                mean = np.mean(ratingRows)
                print("RATINGS GROUPED UNDER LATENT SEMANTICS {0}".format(i +
                                                                          1))
                for j in range(0, noOfRatings):
                    if (ratingRows[j] >= mean):
                        print(j + 1)
class TFIDF:
    model = None
    entityid = None
    commandStr = None
    relation = None
    db = None
    tableName = None
    epoch = datetime.utcfromtimestamp(0)

    def __init__(self, modelP, entityidP, commandstrP):
        self.model = modelP
        self.entityid = entityidP
        self.commandStr = commandstrP
        self.db = DBConnect()
        if "_actor_" in self.commandStr:
            self.relation = "actor"
        elif "_genre_" in self.commandStr:
            self.relation = "genre"
        elif "_user_" in self.commandStr:
            self.relation = "user"

    def getEntityMovieTableName(self):
        if self.relation == "actor":
            return "movie_actor"
        elif self.relation == "genre":
            return "mlmovies"
        elif self.relation == "user":
            return "mlratings"

    def getWeight(self, rank, totalCount):
        per = rank / totalCount
        inv = 1 - per
        return inv

    def getMillis(self, timestampDiff):
        if timestampDiff == None:
            return None
        #print "getMillis : "+str(timestampDiff)
        #timestampDiffStr = str(timestampDiff)
        return (timestampDiff - self.epoch).total_seconds() * 1000.0
#        d = datetime.strptime(timestampDiffStr, "%Y-%m-%d %H:%M:%S,%f").strftime('%s')
#        d_in_ms = int(d)*1000
#return d_in_ms

    def getCount(self, countString):
        countString = countString.replace('(', '')
        countString = countString.replace('[', '')
        countString = countString.replace(']', '')
        countString = countString.replace(')', '')
        countString = countString.replace('L', '')
        countString = countString.replace(',', '')
        countString = countString.replace('\'', '')
        countString = countString.replace('\'', '')
        return countString

    def calcTFIDFApproach2(self, movies):
        isActor = None
        #print "self relation = "+self.relation
        if self.relation == "actor":
            isActor = True
        else:
            isActor = False

        #records will be list of tuples
        #print "type of records object = "+str(type(records))
        movieVsWeight = {}
        moviesList = []
        moviesListStr = None
        moviesCount = 0
        globalTagIdVsTF = {}
        globalTagIdVsIDF = {}
        globalTagNameVsTFIDF = {}
        tagIdVsMovieList = {}

        for tup in movies:
            movieid = tup[0]
            rank = 0
            if isActor:
                rank = tup[2]

            moviesList.append(movieid)
            #print "movie id = "+str(movieid)
            #print "rank = "+str(rank)

            weight = 0
            if isActor:
                #Weight for actor rank for this movie
                movieActorsCountQuery = "select count(*) from " + self.tableName + " where movieid = " + str(
                    movieid)
                res = self.db.executeQuery(movieActorsCountQuery)
                countStr = res[0]
                countString = str(countStr)
                countString = self.getCount(countString)
                weight = self.getWeight(rank,
                                        int(countString) + 1)  #movie weight
            #print "weight = "+str(weight)
            movieVsWeight[movieid] = weight
            moviesCount = moviesCount + 1

            # Weight for tags for this movie
            tagsRelatedToThisMovie = "select * from mltags where movieid =" + str(
                movieid) + " order by mtimestamp desc"
            tagIdVsTimeStamp = {}
            tags = self.db.executeQuery(tagsRelatedToThisMovie)
            if tags == None or len(tags) == 0:
                continue
            i = 0
            latest = None
            oldest = None
            for tag in tags:
                movieid = tag[1]
                tagid = tag[2]
                timestamp = tag[3]
                if i == 0:
                    latest = timestamp
                oldest = timestamp
                tagIdVsTimeStamp[tagid] = timestamp
                i = i + 1

                if tagid in tagIdVsMovieList:
                    tagMoviesList = tagIdVsMovieList[tagid]
                    tagMoviesList.append(movieid)
                    tagIdVsMovieList[tagid] = tagMoviesList
                else:
                    tagIdVsMovieList[tagid] = [movieid]

            latestMillis = self.getMillis(latest)
            oldestMillis = self.getMillis(oldest)
            timeStampDiff = latestMillis - oldestMillis
            tagIdVsWeight = {}
            totalTagWeights = 0.00
            for tagId, timeStamp in tagIdVsTimeStamp.items():
                tagWeight = 0.00
                if len(tags) == 1:
                    tagWeight = 0.9
                else:
                    tagWeight = (self.getMillis(timeStamp) -
                                 oldestMillis) / timeStampDiff
                #print "tag= "+str(tagId)+"  tagWeight = "+str(tagWeight)
                combinedWeight = weight + tagWeight  # actor weight + tag weight
                tagIdVsWeight[tagId] = combinedWeight
                #print "tag= "+str(tagId)+"  combinedWeight = "+str(combinedWeight)
                totalTagWeights = totalTagWeights + combinedWeight

            # TF calculation
            for tagId, tagWeight in tagIdVsWeight.items():
                tf = 0
                if totalTagWeights != 0.0:
                    tf = tagIdVsWeight[
                        tagId] / totalTagWeights  # weight / totalWeight for this movie tags

                #print "tagId = "+str(tagId)+" tf = "+str(tf)
                if tagId in globalTagIdVsTF:
                    currentWeight = globalTagIdVsTF[tagId]
                    currentWeight = currentWeight + tf
                    globalTagIdVsTF[tagId] = currentWeight
                else:
                    globalTagIdVsTF[tagId] = tf

        # end for
        #print "total Movies = "+str(moviesCount)
        # IDF calculation
        tagsList = []
        for tagId, movieSet in tagIdVsMovieList.items():
            noOfMoviesAssociated = len(movieSet)
            #print "tagid = "+str(tagId)
            #print "no of movies associated = "+str(noOfMoviesAssociated)
            idf = moviesCount / noOfMoviesAssociated
            globalTagIdVsIDF[tagId] = idf
            tagsList.append(tagId)

        tagsListStr = str(tagsList)
        tagsListStr = tagsListStr.replace('[', '(')
        tagsListStr = tagsListStr.replace(']', ')')
        tagsQuery = "select * from genome_tags where tagid in " + tagsListStr
        tags = self.db.executeQuery(tagsQuery)
        tagIdVsName = {}
        for tag in tags:
            tagIdVsName[tag[0]] = tag[1]

        for tagId, Name in tagIdVsName.items():
            globalTagNameVsTFIDF[
                Name] = globalTagIdVsTF[tagId] * globalTagIdVsIDF[tagId]

        print "TF IDF SORTED"
        sortedTagVsIDF = sorted(globalTagNameVsTFIDF.items(),
                                key=operator.itemgetter(1),
                                reverse=True)
        print "" + str(sortedTagVsIDF)

#        for tagId,v in globalTagIdVsTF.items():
#            print " "+tagIdVsName[tagId]+" = "+str(v)

    def calcTFIDFApproach1(self, movies):
        isActor = False
        if self.relation == "actor":
            isActor = True

        movieVsWeight = {}

        moviesList = []
        for movie in movies:
            movieid = movie[0]
            rank = movie[2]
            weight = 0
            if isActor:
                #Weight for actor rank for this movie
                movieActorsCountQuery = "select count(*) from " + self.tableName + " where movieid = " + str(
                    movieid)
                res = self.db.executeQuery(movieActorsCountQuery)
                countStr = res[0]
                countString = str(countStr)
                countString = self.getCount(countString)
                weight = self.getWeight(rank,
                                        int(countString) + 1)  #movie weight
            #print "weight = "+str(weight)
            movieVsWeight[movieid] = weight
            moviesList.append(movieid)

        moviesListStr = str(moviesList)
        moviesListStr = moviesListStr.replace('[', '(')
        moviesListStr = moviesListStr.replace(']', ')')
        #print "movieslist = "+moviesListStr

        #Get the tags related to the actor/genre/user
        oldestTagQuery = "select * from mltags where movieid in " + moviesListStr + " order by mtimestamp limit 1"
        #print "oldestTagQuery = "+oldestTagQuery
        oldestTagQueryRes = self.db.executeQuery(oldestTagQuery)
        oldestTimeStamp = None
        newestTimeStamp = None
        timeRange = None

        for oldTag in oldestTagQueryRes:
            oldestTimeStamp = oldTag[3]
        oldesMillis = self.getMillis(oldestTimeStamp)

        tagsQuery = "select * from mltags where movieid in " + moviesListStr + " order by mtimestamp desc"
        tags = self.db.executeQuery(tagsQuery)
        actorTagsCount = len(tags)
        tagVsTotalWeight = {}
        tagVsTF = {}
        tagIdVsTF = {}
        taglist = []
        movieVsTags = {}
        n = 1
        for tag in tags:
            movieid = tag[1]
            tagid = tag[2]
            timestamp = tag[3]
            if n == 1:
                newestTimeStamp = timestamp
                timeRange = self.getMillis(newestTimeStamp) - oldesMillis

#            if movieid in movieVsTags:
#                l = movieVsTags[movieid]
#                if tagid in l and l.count(tagid) > 3: # same movieid and tag id might be irrelevant after a certain count
#                    print" same movieid and tag id skipping..."
#                    continue
#                else:
#                    l.append(tagid)
#                    movieVsTags[movieid]=l
#            else:
#                l=[tagid]
#                movieVsTags[movieid]=l
            taglist.append(tagid)
            #            print "tagid = "+str(tagid)
            #            print "movie id = "+str(movieid)
            #            print "timestamp = "+str(tag[3])
            tagWeight = self.getWeight(n, actorTagsCount + 1)
            tagWeight = (self.getMillis(timestamp) - oldesMillis) / timeRange
            rankWeight = movieVsWeight[movieid]
            #print "rankweight = "+str(rankWeight)
            n = n + 1
            #print "tagWeight = "+str(tagWeight)
            #combinedWeight = (tagWeight + (3 *rankWeight))/4
            combinedWeight = (tagWeight + rankWeight)
            #print "combinedWeight = "+str(combinedWeight)
            if tagid in tagVsTotalWeight:
                tempWeight = tagVsTotalWeight[tagid]
                tempWeight += combinedWeight
                tagVsTotalWeight[tagid] = tempWeight
            else:
                tagVsTotalWeight[tagid] = combinedWeight
            # dict ("tagid" ,"weight1,weight 2")

        #print "tagVsTotalWeight = "+str(tagVsTotalWeight)

        tagsListStr = str(taglist)
        tagsListStr = tagsListStr.replace('[', '(')
        tagsListStr = tagsListStr.replace(']', ')')

        #Get the tags related to the actor for Tag Id Vs Name dictionary
        tagsQuery = "select * from genome_tags where tagid in " + tagsListStr
        tags = self.db.executeQuery(tagsQuery)
        tagIdVsName = {}
        for tag in tags:
            tagIdVsName[tag[0]] = tag[1]

        totalWeight = 0
        for key, val in tagVsTotalWeight.items():
            #            print "key - "+str(key)
            #            print "weight = "+str(val)
            totalWeight += val

        # Calcualting TF for each tag
        for key, val in tagVsTotalWeight.items():

            tf = tagVsTotalWeight[key] / totalWeight
            tagVsTF[tagIdVsName[key]] = tf
            tagIdVsTF[key] = tf

        sortedTagVsTF = sorted(tagVsTF.items(),
                               key=operator.itemgetter(1),
                               reverse=True)
        if self.model == "TF":
            print "TAG Vs TF " + str(sortedTagVsTF)

        if self.model == "TF":
            print "model = TF"
            return

        # Calculating IDF
        totalDocsCount = 27279
        if isActor:
            totalDocsCount = 27279
        elif self.relation == "genre":
            totalDocsCount = 19
        elif self.relation == "user":
            totalDocsCount = 71567

        #print "Total documenst = "+str(totalDocsCount)
        tagIdVsIDF = {}
        tagIdVsTFIDF = {}

        #Total no of documents
        for key, val in tagIdVsTF.items():
            tagid = key
            #print "tagid = "+str(tagid)
            moviesRelatedToThisTag = "select movieid from mltags where tagid = " + str(
                tagid)
            movies = self.db.executeQuery(moviesRelatedToThisTag)
            moviesList = []
            for mov in movies:
                moviesList.append(mov[0])

            movListStr = str(moviesList)
            movListStr = movListStr.replace('[', '(')
            movListStr = movListStr.replace(']', ')')
            genreSet = set()
            totalRelatedWithThisTag = 0

            if isActor:
                actorIds = "select count(distinct actorid ) from movie_actor where movieid in " + movListStr
                res = self.db.executeQuery(actorIds)
                #print "actorids query = "+str(res)
                actorSet = self.getCount(str(res))
                #print "actorSet = "+str(actorSet)
                totalRelatedWithThisTag = int(actorSet)
            elif self.relation == "genre":
                genres = "select * from mlmovies where movieid in " + movListStr
                res = self.db.executeQuery(genres)
                for genre in res:
                    genreStr = genre[2]
                    genreList = genreStr.split('|')
                    for g in genreList:
                        genreSet.add(g)

                totalRelatedWithThisTag = len(genreSet)
                #print "genres = "+str(genreSet)
            elif self.relation == "user":
                users = "select count(distinct userid) from mltags where tagid =" + str(
                    tagid)
                countUsers = self.db.executeQuery(users)
                countofUsers = self.getCount(str(countUsers))
                totalRelatedWithThisTag = int(countofUsers)

            #print "totalGenresWithThisTag = "+str(totalRelatedWithThisTag)
            idf = totalDocsCount / totalRelatedWithThisTag
            idf = math.log(idf)
            tagIdVsIDF[tagid] = idf
            tagIdVsTFIDF[tagIdVsName[key]] = tagIdVsTF[tagid] * idf
            #print "tagId = "+str(tagid)
            #print "IDF = "+str(idf)

        print "Tag vs TF-IDF "
        sortedTagVsIDF = sorted(tagIdVsTFIDF.items(),
                                key=operator.itemgetter(1),
                                reverse=True)
        print "" + str(sortedTagVsIDF)

    def calcSVD(self, movies):
        isActor = False

        movieVsWeight = {}

        query = "select * from genome_tags order by tagid"
        tags = self.db.executeQuery(query)
        tagIndex = 0
        tagIdVsIndex = {}
        tagIndexVsName = {}
        for tag in tags:

            tagid = tag[0]
            tagName = tag[1]
            tagIdVsIndex[tagid] = tagIndex
            tagIndexVsName[tagIndex] = tagName
            tagIndex = tagIndex + 1

        moviesList = []
        noOfMovies = len(movies)
        q = "select count(*) from genome_tags"
        res = self.db.executeQuery(q)
        countStr = self.getCount(str(res[0]))
        noOfTags = int(countStr)
        movieTFIDF = np.zeros((noOfMovies, noOfTags))

        movieIndex = 0
        movieIdVsIndex = {}
        for movie in movies:
            movieid = movie[0]

            weight = 0
            #Get the tags related to the actor/genre/user
            oldestTagQuery = "select * from mltags where movieid = " + str(
                movieid) + " order by time_stamp limit 1"
            #print "oldestTagQuery = "+oldestTagQuery
            oldestTagQueryRes = self.db.executeQuery(oldestTagQuery)
            oldestTimeStamp = None
            newestTimeStamp = None
            timeRange = None

            for oldTag in oldestTagQueryRes:
                oldestTimeStamp = oldTag[3]
            oldesMillis = self.getMillis(oldestTimeStamp)

            tagsQuery = "select * from mltags where movieid = " + str(
                movieid) + " order by time_stamp desc"
            tags = self.db.executeQuery(tagsQuery)
            actorTagsCount = len(tags)
            tagVsTotalWeight = {}
            tagVsTF = {}
            tagIdVsTF = {}
            taglist = []
            movieVsTags = {}
            n = 1
            for tag in tags:
                movieid = tag[1]
                tagid = tag[2]
                timestamp = tag[3]
                if n == 1:
                    newestTimeStamp = timestamp
                    timeRange = self.getMillis(newestTimeStamp) - oldesMillis

                taglist.append(tagid)
                tagWeight = self.getWeight(n, actorTagsCount + 1)
                if timeRange != 0:
                    tagWeight = (self.getMillis(timestamp) -
                                 oldesMillis) / timeRange

                #print "rankweight = "+str(rankWeight)
                n = n + 1
                #print "tagWeight = "+str(tagWeight)
                #combinedWeight = (tagWeight + (3 *rankWeight))/4
                combinedWeight = (tagWeight)
                #print "combinedWeight = "+str(combinedWeight)
                if tagid in tagVsTotalWeight:
                    tempWeight = tagVsTotalWeight[tagid]
                    tempWeight += combinedWeight
                    tagVsTotalWeight[tagid] = tempWeight
                else:
                    tagVsTotalWeight[tagid] = combinedWeight
                # dict ("tagid" ,"weight1,weight 2")

            #print "tagVsTotalWeight = "+str(tagVsTotalWeight)

            tagsListStr = str(taglist)
            tagsListStr = tagsListStr.replace('[', '(')
            tagsListStr = tagsListStr.replace(']', ')')

            totalWeight = 0
            for key, val in tagVsTotalWeight.items():
                #            print "key - "+str(key)
                #            print "weight = "+str(val)
                totalWeight += val

            # Calcualting TF for each tag
            for key, val in tagVsTotalWeight.items():

                tf = tagVsTotalWeight[key] / totalWeight

                tagIdVsTF[key] = tf

            # Calculating IDF
            totalDocsCount = noOfMovies

            #print "Total documenst = "+str(totalDocsCount)
            tagIdVsIDF = {}
            tagIdVsTFIDF = {}

            totalRelatedWithThisTag = 0

            for tagid in taglist:

                users = "select count(distinct movieid) from mltags where tagid =" + str(
                    tagid)
                countUsers = self.db.executeQuery(users)
                countofUsers = self.getCount(str(countUsers))
                totalRelatedWithThisTag = int(countofUsers)
                #print "totalGenresWithThisTag = "+str(totalRelatedWithThisTag)
                idf = totalDocsCount / totalRelatedWithThisTag
                idf = math.log(idf)
                tagIdVsIDF[tagid] = idf
                tagIdVsTFIDF[tagid] = tagIdVsTF[tagid] * idf

            for key, val in tagIdVsTFIDF.items():
                tagid = key
                tfIdf = val
                movieTFIDF[movieIndex][tagIdVsIndex[tagid]] = tfIdf
            movieIdVsIndex[movieid] = movieIndex
            movieIndex = movieIndex + 1

        print "Movie tag Matrix"
        print movieTFIDF
        print movieTFIDF[movieIdVsIndex[7247]][tagIdVsIndex[1128]]
        u, s, v = np.linalg.svd(movieTFIDF, full_matrices=False)
        print "S"

        print s
        print "V"
        print v

    def calcUserVector(self):
        query = "select * from mlratings where userid = " + str(self.entityid)
        movies = self.db.executeQuery(query)
        self.calcTFIDFApproach1(movies)

    def calcGenreVector(self):
        self.tableName = self.getEntityMovieTableName()
        query = "select * from mlmovies where genres like '%" + str(
            self.entityid) + "%'"
        #print "query = "+query
        movies = self.db.executeQuery(query)
        self.calcTFIDFApproach1(movies)
        return None

    def calcMoviesVector(self):
        self.tableName = self.getEntityMovieTableName()
        query = "select * from mlmovies "
        movies = self.db.executeQuery(query)
        self.calcSVD(movies)
        return None

    def calcActorVector(self):

        tableName = self.getEntityMovieTableName()
        query = "select * from " + tableName + " where actorid = " + str(
            self.entityid)
        movies = self.db.executeQuery(query)
        #self.calcTFIDFApproach2(movies)
        self.calcTFIDFApproach1(movies)

    def getWeightedTagVector(self):
        #print "Inside getWeightedTagVector"
        self.tableName = self.getEntityMovieTableName()
        if self.relation == "actor":
            self.calcActorVector()
        elif self.relation == "genre":
            self.calcGenreVector()
        else:

            self.calcUserVector()

    def pDiff1(self, model, genre1, genre2):

        #print "Model = "+model
        genre1MovieList = []
        genre1TagsList = []
        genre2MovieList = []
        genre2TagsList = []
        genre1TagVsWeight = {}
        genre2TagVsWeight = {}
        totalMoviesSet = set()
        # Movies associated with Genre 1
        query = "select distinct movieid from mlmovies where genres like '%" + str(
            genre1) + "%'"
        movies = self.db.executeQuery(query)
        for movie in movies:
            genre1MovieList.append(movie[0])
            totalMoviesSet.add(movie[0])
        noOfMoviesGenre1 = len(genre1MovieList)
        #print "Genre1 movies = "+str(genre1MovieList)
        #print "Count1 = "+str(noOfMoviesGenre1)

        # Movies associated with Genre 2
        query = "select distinct movieid from mlmovies where genres like '%" + str(
            genre2) + "%'"
        movies = self.db.executeQuery(query)
        for movie in movies:
            genre2MovieList.append(movie[0])
            totalMoviesSet.add(movie[0])
        noOfMoviesGenre2 = len(genre2MovieList)
        #print "Count2 = "+str(noOfMoviesGenre2)
        totalMovies = len(totalMoviesSet)

        #print "moviesList "+str(genre1MovieList)
        movListStr1 = str(genre1MovieList)
        movListStr1 = movListStr1.replace('[', '(')
        movListStr1 = movListStr1.replace(']', ')')

        movListStr2 = str(genre2MovieList)
        movListStr2 = movListStr2.replace('[', '(')
        movListStr2 = movListStr2.replace(']', ')')

        # tags associated to Genre 1
        query = "select distinct tagid from mltags where movieid in " + movListStr1
        tags = self.db.executeQuery(query)
        tagsList = []
        for tag in tags:
            tagsList.append(tag[0])
            genre1Genre2MoviesForTag = set()
            genre2MoviesForTag = []
            noOfMoviesAssociatedWithThisTagGenre1 = 0
            r = 0
            m = 0
            R = 0
            M = 0
            if model == "P-DIFF1":
                query = "select distinct movieid from mltags where movieid in " + movListStr1 + " and tagid = " + str(
                    tag[0])
                res = self.db.executeQuery(query)
                for movie in res:
                    genre1Genre2MoviesForTag.add(movie[0])
                noOfMoviesAssociatedWithThisTagGenre1 = len(
                    genre1Genre2MoviesForTag)

                query = "select distinct movieid from mltags where movieid in " + movListStr2 + " and tagid = " + str(
                    tag[0])
                res = self.db.executeQuery(query)
                for movie in res:
                    genre1Genre2MoviesForTag.add(movie[0])
                    genre2MoviesForTag.append(movie[0])
                r = noOfMoviesAssociatedWithThisTagGenre1
                m = len(genre1Genre2MoviesForTag)
                R = noOfMoviesGenre1
                M = totalMovies
            elif model == "P-DIFF2":
                query = "select count(distinct movieid) from mltags where movieid in " + movListStr2 + " and tagid != " + str(
                    tag[0])
                res = self.db.executeQuery(query)
                r = int(self.getCount(str(res)))

                query = "select count(distinct movieid) from mltags where movieid in " + movListStr1 + " or movieid in " + movListStr2 + "and tagid != " + str(
                    tag[0])
                res = self.db.executeQuery(query)
                m = int(self.getCount(str(res)))
                R = noOfMoviesGenre2
                M = totalMovies

#            print "tagid = "+str(tag[0])
#            print "r = "+str(r)
#            print "m = "+str(m)
#            print "R = "+str(R)
#            print "M = "+str(M)

#            smallmMinusr = smallmMinusr + 0.5
#            R =R + 1
#            r = r+0.5
#            CapMminusR= CapMminusR+1

#            if tag[0] == 1013:

#                print "m = "+str(m)
#                print "r = "+str(r)
#                print "M = "+str(M)
#                print "R = "+str(R)
#                print "m-r = "+str(smallmMinusr)
            x = float(r + float(m) / float(M)) / float(R + 1)
            y = float(m - r + float(m) / M) / float(M - R + 1)
            w = float(((x * (1 - y)) / (y * (1 - x))) * math.fabs((x - y)))

            w = math.log(float(w))
            #            if tag[0] == 1013:
            #                print "w = "+str(w)
            #            num = r / (R - r)
            #            denom1 = m-r
            #            denom2 = M - m;
            #            denom2 = denom2 - R + r
            #            denom = denom1 * denom2
            #            leftExpression = num / denom
            #            right1 = r / R
            #            right2  = (m - r) / (M - R)
            #            right = math.fabs(right1 - right2)
            #            w = leftExpression * right
            genre1TagVsWeight[tag[0]] = w
        tagsListStr = str(tagsList)
        tagsListStr = tagsListStr.replace('[', '(')
        tagsListStr = tagsListStr.replace(']', ')')
        tagsQuery = "select * from genome_tags where tagid in " + tagsListStr
        tags = self.db.executeQuery(tagsQuery)
        tagIdVsName = {}
        for tag in tags:
            tagIdVsName[tag[0]] = tag[1]
        genre1TagNamVsWeight = {}
        for tagid, weight in genre1TagVsWeight.items():
            genre1TagNamVsWeight[tagIdVsName[tagid]] = weight
        sortedTagVsWeight = sorted(genre1TagNamVsWeight.items(),
                                   key=operator.itemgetter(1),
                                   reverse=True)

        print "Tag vs Weight = " + str(sortedTagVsWeight)
        #print "Genre2 = "+str(genre2TagVsWeight)

        # tags associated to Genre 2
#        query = "select distinct tagid from mltags where movieid in "+movListStr2
#        tags = self.db.executeQuery(query)
#        for tag in tags:
#            genre2genre1MoviesForTag = set()
#            genre1MoviesListForThisTag = []
#            query = "select distinct movieid from mltags where movieid in "+movListStr2+" and tagid = "+str(tag[0])
#            res = self.db.executeQuery(query)
#            for movie in res:
#                genre2genre1MoviesForTag.add(movie[0])
#            noOfMoviesAssociatedWithThisTagGenre2 = len(genre2genre1MoviesForTag)
#
#            query = "select distinct movieid from mltags where movieid in "+movListStr1+" and tagid = "+str(tag[0])
#            res = self.db.executeQuery(query)
#            for movie in res:
#                genre2genre1MoviesForTag.add(movie[0])
#                genre1MoviesListForThisTag.append(movie[0])
#            noOfMoviesAssociatedWithThisTagGenre2 = len(genre1MoviesListForThisTag)
#
#
#            r = noOfMoviesAssociatedWithThisTagGenre2
#            m = len(genre2genre1MoviesForTag)
#            R = noOfMoviesGenre2
#            M = totalMovies
#
#            x = (r + float(m)/float(M))/ float(R)
#            y =  ( m-r + float(m)/M) / float(M-R + 1)
#            w =  ((x*(1-y)) / (y * (1-x))) * (x -y)
#            #w = math.log(w)
#            genre2TagVsWeight[tag[0]] = w
#            print "Genre 2 Tag weight = "+str(genre2TagVsWeight)
#            num = r / (R - r)
#            denom1 = m-r
#            denom2 = M - m;
#            denom2 = denom2 - R + r
#            denom = denom1 * denom2
#            leftExpression = num / denom
#            right1 = r / R
#            right2  = (m - r) / (M - R)
#            right = math.fabs(right1 - right2)
#            w = leftExpression * right
#            genre1TagVsWeight[tag[0]] = w

#            print "tagid  = "+str(tag[0])
#            print "r=  = "+str(r)
#            print "m  = "+str(m)
#            print "R  = "+str(R)
#            print "M  = "+str(M)
#print "weight  = "+str(w)

    def tfIdfDiff(self, genre1, genre2):
        query = "select distinct movieid from mlmovies where genres like '%" + str(
            genre1) + "%'"
        movies = self.db.executeQuery(query)
        moviesList = []
        for movie in movies:
            moviesList.append(movie[0])
        moviesListStr = str(moviesList)
        moviesListStr = moviesListStr.replace('[', '(')
        moviesListStr = moviesListStr.replace(']', ')')
        query = "select tagid from mltags where movieid in " + moviesListStr
        totaltags = 0
        tagIdVsFreq = {}
        tagIdVsTF = {}
        tagIdVsIDF = {}
        tagIdVsName = {}
        tagVsTFIDFDIFF = {}
        genre1MovieList = []
        totalMoviesSet = set()
        genre2MovieList = []
        tagsList = []

        tags = self.db.executeQuery(query)
        for tag in tags:
            tagsList.append(tag[0])
            totaltags = totaltags + 1
            if tag[0] in tagIdVsFreq.items():
                freq = tagIdVsFreq[tag[0]]
                freq = freq + 1
                tagIdVsFreq = freq
            else:
                tagIdVsFreq[tag[0]] = 1
        for tagid, freq in tagIdVsFreq.items():
            tagIdVsTF[tagid] = tagIdVsFreq[tagid] / totaltags

        query = "select distinct movieid from mlmovies where genres like '%" + str(
            genre1) + "%'"
        movies = self.db.executeQuery(query)
        for movie in movies:
            totalMoviesSet.add(movie[0])

        #print "Genre1 movies = "+str(genre1MovieList)
        #print "Count1 = "+str(noOfMoviesGenre1)

        # Movies associated with Genre 2
        query = "select distinct movieid from mlmovies where genres like '%" + str(
            genre2) + "%'"
        movies = self.db.executeQuery(query)
        for movie in movies:
            totalMoviesSet.add(movie[0])
        totalMovies = len(totalMoviesSet)

        # IDF Calcualtion
        for tag in tags:
            query = "select count(distinct movieid) from mltags where tagid = " + str(
                tag[0])
            count = self.db.executeQuery(query)
            moviesWithThisTag = self.getCount(str(count))
            tagIdVsIDF[tag[0]] = math.log(totalMovies / int(moviesWithThisTag))

        tagsListStr = str(tagsList)
        tagsListStr = tagsListStr.replace('[', '(')
        tagsListStr = tagsListStr.replace(']', ')')
        tagsQuery = "select * from genome_tags where tagid in " + tagsListStr
        tags = self.db.executeQuery(tagsQuery)
        tagIdVsName = {}
        for tag in tags:
            tagIdVsName[tag[0]] = tag[1]

        for tagid, val in tagIdVsTF.items():
            tagVsTFIDFDIFF[
                tagIdVsName[tagid]] = tagIdVsTF[tagid] * tagIdVsIDF[tagid]

        sortedTagVsWeight = sorted(tagVsTFIDFDIFF.items(),
                                   key=operator.itemgetter(1),
                                   reverse=True)
        print "Tag vs TF-IDF-DIFF = " + str(sortedTagVsWeight)