class VectorSpaceModel():
    def __init__(self, N):
        self.frequency = Frequency()
        self.noOfDocs = N

    def userInterface(self):
        op = '1'
        while (op != '0'):
            print("vector Space Model")
            print("-----------------------------")
            print("1. Execute Query")
            print("0. Exit")

            op = input("Enter input: ")

            self.inputQuery(op)

    def inputQuery(self, op):

        if op == '1':
            query = input("Enter query: ")
            queryArr = query.split(" ")
            self.data = self.createTable()

            qVector = self.getVector(queryArr)
            docVectors = self.getDocumentVectors()

            #            print('q= ', qVector)
            #            print('docs = ', docVectors)

            rankings = self.generateRankings(docVectors, qVector)
            print(self.formatRankings(rankings))

        else:
            return

    def formatRankings(self, rankings):
        rankings = rankings.loc[rankings['sim'] > 0.005]
        return rankings.sort_values(by=['sim'], ascending=False)

    def generateRankings(self, docs, q):
        rankings = pd.DataFrame({
            'docs': [str(x) + '.txt' for x in range(1, self.noOfDocs + 1)],
            'sim': [self.sim(docs[i], q) for i in range(1, self.noOfDocs + 1)]
        })
        return rankings

    def sim(self, d, q):
        x = np.array(d)
        y = np.array(q)

        modX = sum(x * x)**0.5
        modY = sum(y * y)**0.5

        return sum(x * y) / (modX * modY)

    def createTable(self):
        self.frequency.loadDocuments()
        self.frequency.buildDictionary()
        #        data = pd.DataFrame({
        #                'words': self.frequency.getWords(),
        #                'idf': self.frequency.getIdf()
        #                })

        keys = self.frequency.getWords()
        values = self.frequency.getIdf(self.noOfDocs)

        data = dict(zip(keys, values))
        #        print('data: ', data)
        return data

    def getVector(self, array, docId=0):
        vector = []

        for word, idf in self.data.items():

            if word in array:
                if (docId == 0):
                    # docId=0 means getting vector for query
                    tf = self.getQueryFrequency(array)[word]
                else:
                    tf = self.frequency.getTermFrequency(word)[docId]

                vector.append(self.tf_idf(tf, idf))
            else:
                vector.append(0)
        return vector

    def getDocumentVectors(self):
        docVectors = {}
        docId = 1
        for i in range(self.noOfDocs):
            doc = self.frequency.collection[i]
            docVectors[docId] = self.getVector(doc, docId)
            docId += 1
        return docVectors

    def getQueryFrequency(self, queryArr):
        tf = {}
        for q in queryArr:
            if q not in tf:
                tf[q] = 1
            else:
                tf[q] = tf[q] + 1
        return tf

    def tf_idf(self, tf, idf):
        return tf * idf