コード例 #1
0
def predict(q, nlpServer_Port):
    print()
    print("Query = " + q)

    allMovies = AllfilesInFolder.getAllFilesInFolder("Data\Plots")

    c_score = character_score(q, nlpServer_Port)
    en_score = entity_score(q, nlpServer_Port)
    ev_score = event_score(q, nlpServer_Port)
    ne_score = ner_score(q, nlpServer_Port)
    s_score = semantic_score(q)

    total_score = [0] * len(allMovies)
    print()
    print("Final Score:")
    for i in range(len(allMovies)):
        total_score[i] = round((c_score[i] + en_score[i] + ev_score[i] +
                                ne_score[i] + s_score[i]), 5)
        print(allMovies[i] + "  Character Score: " + str(c_score[i]) +
              "  Entity Score: " + str(en_score[i]) + "  Event Score: " +
              str(ev_score[i]) + "  NE score: " + str(ne_score[i]) +
              "  Semantic score: " + str(s_score[i]) + "   Total Score: " +
              str(total_score[i]))
    norm_factor = sum(total_score)
    prob = [
        round(x / norm_factor, 5) if (norm_factor > 0) else norm_factor
        for x in total_score
    ]
    topPicks = ranking(allMovies, prob, topPicksCount)
    return topPicks
コード例 #2
0
def matchEntities(allMovies, query_entities, threshold):
    mFolder = "Data\Entities"
    mFile = str(AllfilesInFolder.getAllFilesInFolder(mFolder)[0])
    f = open(mFolder + '/' + mFile, 'r')
    allMovieEntities = json.loads(f.read())

    score = [0] * len(allMovies)
    if len(query_entities) == 0:
        return score

    for query_entity in query_entities:
        for movie in allMovieEntities:
            movie_entities = movie["Entities"]
            movie_entities = movie_entities.split(",")
            en_score = 0
            for movie_entity in movie_entities:
                a1 = wn.synsets(movie_entity)
                a2 = wn.synsets(query_entity)
                try:
                    sim = a1[0].wup_similarity(a2[0])
                    if sim > en_score:
                        en_score = sim
                except:
                    pass
            if en_score >= threshold:
                score[allMovies.index(movie["movieTitle"])] += en_score
                print("Entity '" + query_entity + "' found in " + movie["movieTitle"])

    return (score)
コード例 #3
0
def extractVerbsFromDB(nlpServer_Port):
    movies = AllfilesInFolder.getAllFilesInFolder('Data\Plots\\')
    print("Extracting events from plots")
    for movie in tqdm(movies):
        r = open('Data\Plots\\' + movie, 'r')
        plot = r.read()
        r.close()
        verbs = getVerbs(plot, nlpServer_Port)
        w = open('Data\Event\AllVerbs\\' + movie, 'w')
        w.writelines(["%s " % vb for vb in verbs])
        w.close()
コード例 #4
0
def event_score(q, nlpServer_Port):
    allMovies = AllfilesInFolder.getAllFilesInFolder("Data\Plots")

    # Events
    print()
    query_events = list(set(extractEvents.getVerbs(q, nlpServer_Port)))
    if len(query_events) > 0:
        print("Events identified in query: " + str(query_events))
        event_score = Score.matchEvent(allMovies, query_events, threshold)
    else:
        print("No events identified in query")
        event_score = [0] * len(allMovies)
    if len(query_events) > 0:
        event_score = [round(x / len(query_events), 5) for x in event_score]
    return event_score
コード例 #5
0
def ner_score(q, nlpServer_Port):
    allMovies = AllfilesInFolder.getAllFilesInFolder("Data\Plots")

    # NE
    print()
    query_ne = extractNER.getNE(q, nlpServer_Port)
    if len(query_ne) > 0:
        print("NE identified in query: " + str(query_ne))
        ne_score = Score.matchNE(allMovies, query_ne)
    else:
        print("No NE identified in query")
        ne_score = [0] * len(allMovies)
    if len(query_ne) > 0:
        ne_score = [round(x / len(query_ne), 5) for x in ne_score]

    return ne_score
コード例 #6
0
def extractMovieCharacersfromDB(nlpServer_Port):
    dataFolder = "Data\Plots"
    plotFiles = AllfilesInFolder.getAllFilesInFolder(dataFolder)
    characters = []
    print ("Extracting characters from plots")
    for movie in tqdm(plotFiles):
        f = open(dataFolder+"/"+movie, 'r')
        plot = f.read()
        allCharacters = getCharacterNames(plot,nlpServer_Port)
        movieChar = {"movieTitle": movie,"characters": ",".join(allCharacters)}
        characters.append(movieChar)
        f.close()
    r = json.dumps(characters)
    f = open('Data\Characters\Characters.txt', 'w')
    f.write(r)
    f.close()
    print ("Characters file successfully created")
コード例 #7
0
def entity_score(q, nlpServer_Port):
    allMovies = AllfilesInFolder.getAllFilesInFolder("Data\Plots")

    # Entities
    print()
    query_entites = extractEntities.getEntities(q, nlpServer_Port)
    if len(query_entites) > 0:
        print("Entities identified in query: " + str(query_entites))
        entitiy_score = Score.matchEntities(allMovies, query_entites,
                                            threshold)
    else:
        print("No entities identitifed in query")
        entitiy_score = [0] * len(allMovies)
    if len(query_entites) > 0:
        entitiy_score = [
            round(x / len(query_entites), 5) for x in entitiy_score
        ]
    return entitiy_score
コード例 #8
0
def character_score(q, nlpServer_Port):
    allMovies = AllfilesInFolder.getAllFilesInFolder("Data\Plots")

    # Characters
    print()
    query_characters = extractCharacters.getCharacterNames(q, nlpServer_Port)
    if len(query_characters) > 0:
        print("Characters identified in query: " + str(query_characters))
        characters_score = Score.matchCharacters(allMovies, query_characters)
    else:
        print("No characters identitifed in query")
        characters_score = [0] * len(allMovies)
    if len(query_characters) > 0:
        characters_score = [
            round(x / len(query_characters), 5) for x in characters_score
        ]

    return characters_score
コード例 #9
0
def cleanUpDB():
    readFolder = "Data\RawPlots"
    writeFolder = "Data\Plots"

    movies = AllfilesInFolder.getAllFilesInFolder(readFolder)

    for movie in (movies):
        f = open(readFolder + "/" + movie, 'r', encoding="utf8")
        plot = f.read()
        f.close()
        cleanPlot = cleanUp(plot)
        if len(cleanPlot) > 0:
            w = open(writeFolder + "//" + movie, 'w')
            w.write(cleanPlot)
            w.close()
        else:
            print(movie + " is empty")
    print("Movie plots cleaned up successfully")
コード例 #10
0
def semantic_score(q):
    allMovies = AllfilesInFolder.getAllFilesInFolder("Data\Plots")

    # Semantics
    query_semantics = extractSemantics.extractSemanctic(q)
    if len(query_semantics) > 0:
        print("Semantics found in query:")
        for idx, query_semantic in enumerate(query_semantics):
            print(str(idx + 1) + ". " + query_semantic.toString())
        s_score = Score.matchSemantics(allMovies, query_semantics, allMovies,
                                       threshold)
    else:
        print("No Semantics found in query")
        s_score = [0] * len(allMovies)
    if len(query_semantics) > 0:
        s_score = [round(x, 5) for x in s_score]

    return s_score
コード例 #11
0
def extractEntitiesFromDB(nlpServer_Port):
    dataFolder = "Data\Plots"
    plotFiles = AllfilesInFolder.getAllFilesInFolder(dataFolder)
    movieEntities = []
    print("Extracting Entites from plots")
    for movie in tqdm(plotFiles):
        f = open(dataFolder + "/" + movie, 'r')
        plot = f.read()
        Entities_list = getEntities(plot, nlpServer_Port)
        Entities_list = Entities_list
        Entities = {"movieTitle": movie, "Entities": ",".join(Entities_list)}
        movieEntities.append(Entities)
        f.close()
    r = json.dumps(movieEntities)
    f = open("Data\Entities\Entities.txt", "w")
    f.write(str(r))
    f.close
    print("Entity file successfully created")
コード例 #12
0
def matchNE(allMovies, query_NEs):
    mFolder = "Data\\NE"
    mFile = str(AllfilesInFolder.getAllFilesInFolder(mFolder)[0])
    f = open(mFolder + '/' + mFile, 'r')
    allMovieNE = json.loads(f.read())

    score = [0] * len(allMovies)
    if len(query_NEs) == 0:
        return score
    for movie in allMovieNE:
        movie_NEs = movie["NE"]
        movie_NEs = movie_NEs.split(",")
        for query_NE in query_NEs:
            for movie_NE in movie_NEs:
                if (query_NE in movie_NE):
                    score[allMovies.index(movie["movieTitle"])] += 1
                    print("NE '" + query_NE + "' found in " + movie["movieTitle"])
                    break
    return (score)
コード例 #13
0
def matchCharacters(allMovies, query_characters):
    mFolder = "Data\Characters"
    mFile = str(AllfilesInFolder.getAllFilesInFolder(mFolder)[0])
    f = open(mFolder + '/' + mFile, 'r')
    allMovieCharacters = json.loads(f.read())

    score = [0] * len(allMovies)
    if len(query_characters) == 0:
        return score
    for movie in allMovieCharacters:
        movie_chars = movie["characters"]
        movie_chars = movie_chars.split(",")
        for query_char in query_characters:
            for movie_char in movie_chars:
                if (query_char in movie_char):
                    score[allMovies.index(movie["movieTitle"])] += 1
                    print("Character '" + query_char + "' found in " + movie["movieTitle"])
                    break
    return (score)
コード例 #14
0
def extractSemanticFromDB():
    dataFolder = "Data\Plots"

    plotFiles = AllfilesInFolder.getAllFilesInFolder(dataFolder)
    MongoDB.delete_all()
    print("Extracting semantics roles from plots")
    for movie in tqdm(plotFiles):
        f = open(dataFolder + "/" + movie, 'r')
        plot = f.read()
        f.close()
        sentences = ParagraphToSentences.PtoS(plot)
        semantics = semanticRoleSimilarity(sentences)
        semanticsMongo = []
        for semantic in semantics:
            r = semantic.root
            p = semantic.properties
            w = semantic.weight
            semanticsMongo.append([r, p, w, movie])
        MongoDB.bulk_insert(semanticsMongo)
        print(str(MongoDB.recordCount()) + " semantic roles present in DB")
コード例 #15
0
def extractNEFromDB(nlpServer_Port):
    dataFolder = "Data\Plots"

    plotFiles = AllfilesInFolder.getAllFilesInFolder(dataFolder)

    all_NE = []
    print("Extracting NE from plots")
    for movie in tqdm(plotFiles):
        f = open(dataFolder + "/" + movie, 'r')
        plot = f.read()
        allNE = getNE(plot, nlpServer_Port)
        trunc_NE = []
        for NE in allNE:
            if not (NE in trunc_NE):
                trunc_NE.append(NE)
        NE = {"movieTitle": movie, "NE": ",".join(trunc_NE)}
        all_NE.append(NE)
        f.close()
    r = json.dumps(all_NE)
    f = open("Data\\NE\MovieNE.txt", "w")
    f.write(r)
    f.close()
    print("NE file successfully created")
コード例 #16
0
def extractTfidFromDB():
    movies = AllfilesInFolder.getAllFilesInFolder('Data\Event\AllVerbs\\')
    rawDocs = []
    print("Building TF-IDF for the events")
    for movie in movies:
        r = open('Data\Event\AllVerbs\\' + movie, 'r')
        q = r.read()
        rawDocs.append(tb(q))
        r.close()
    i = 0
    for doc in tqdm(rawDocs):
        scores = {word: tfidf(word, doc, rawDocs) for word in doc.words}
        sorted_words = sorted(scores.items(), key=lambda x: x[1], reverse=True)
        Tf_Score = []
        for word, score in sorted_words[:]:
            if score > 0.00001:
                Tf_Score.append({"Event": word, "Score": round(score, 5)})
        w = open('Data\Event\TFIDF\\' + movies[i], 'w')
        Tf_json = json.dumps(Tf_Score)
        w.write(Tf_json)
        w.close()
        i += 1
    print("Event files successfully created")
コード例 #17
0
def matchEvent(allMovies, queryEvents, threshold):
    score = [0] * len(allMovies)
    mFolder = "Data\Event\TFIDF"
    mFiles = AllfilesInFolder.getAllFilesInFolder(mFolder)
    for idx, mFile in enumerate(mFiles):
        print("Movie: " + mFile)
        f = open(mFolder + '/' + mFile, 'r')
        allMovieEvents = json.loads(f.read())
        for queryEvent in queryEvents:
            eScore = []
            for movieEvent in allMovieEvents:
                mEvent = movieEvent["Event"]
                a1 = wn.synsets(mEvent)
                a2 = wn.synsets(queryEvent)
                try:
                    sim = a1[0].wup_similarity(a2[0])
                    if sim >= threshold:
                        eScore.append(round(sim, 5))
                    else:
                        eScore.append(0)
                except:
                    eScore.append(0)

            if len(eScore) > 0:
                max_value = max(eScore)
                if max_value > 0:
                    max_index = eScore.index(max_value)
                    print("       Event = '" + queryEvent + "' matched with '" + allMovieEvents[max_index][
                        'Event'] + "'. " + "Similarity Score:" + str(max_value) + "     Tf-Idf Score:" + str(
                        allMovieEvents[max_index]['Score']))
                    score[idx] += allMovieEvents[max_index]['Score'] * max_value
                else:
                    print("       No matches found for Event = '" + queryEvent + "'")
            else:
                print("       No Events were found in the movie")
    return score