Ejemplo n.º 1
0
def AnalyzeResultforQuestionWithIDF(utterence,no):

    y=wordvec.vectorize_query(utterence,IDFset)
    scores=[]
    query=Data_Cleaner(utterence)
    processedQuestion=[]
    for i in range(Question_vectors.__len__()):
        x = np.array(Question_vectors[i])
        score = spatial.distance.cosine(x, y)
        processedQuestion.append(Data_Cleaner(TrainingSet.Question[i]))
        if math.isnan(score):
            scores.append(1)
        else:
            scores.append(score)

    TestSet = pd.DataFrame(list(zip(TrainingSet.Question,processedQuestion, scores)),columns=['Question','Processed Question', 'Score'])
    utterence=re.sub('[^A-Za-z0-9 ]+', '', utterence)
    excel_name=str('_'.join(utterence.split())) +".xlsx"
    excel_name="TestSet"+no+".xlsx"
    TestSet.to_excel(excel_name)

    index=scores.index(np.min(scores))
    print("Index",index)
    answer=TrainingSet.Answer[index]
    question=TrainingSet.Question[index]

    return query,question,answer,np.min(scores)
def getAnswer(utterence):

    y = wordvec.vectorize_query(utterence, IDFset)
    scores = []
    for i in range(Question_vectors.__len__()):
        x = np.array(Question_vectors[i])
        score = spatial.distance.euclidean(x, y)
        if math.isnan(score):
            scores.append(1)
        else:
            scores.append(score)

    index = scores.index(np.min(scores))
    print("Index", index)
    answer = TrainingSet.Answer[index]
    question = TrainingSet.Question[index]

    return question, answer