def get_model_similarities_results(self, questions, number_of_models):
        name_array = []
        not_wanted = ['npy', 'Readme.md']
        onlyfiles = [f for f in listdir(os.path.dirname(os.path.realpath(__file__)) + "/LeaningAlgoImpl/Models") if
                     isfile(join(os.path.dirname(os.path.realpath(__file__)) + "/LeaningAlgoImpl/Models", f))]
        for file_index in range(0, len(onlyfiles)):
            if (not_wanted[0] in onlyfiles[file_index] or not_wanted[1] in onlyfiles[file_index]):
                continue
            else:
                name_array.append(onlyfiles[file_index])

        # print(name_array)
        random.shuffle(name_array)
        results = []
        i = 0
        for name in name_array:
            if number_of_models > i:
                print(name)
                finished_model = FM.Finished_Models()
                finished_model.get_model(
                    os.path.dirname(os.path.realpath(__file__)) + "/LeaningAlgoImpl/Models/" + name)
                results.append(finished_model.model_similarity_results(questions))
                i += 1
            else:
                continue

        # print(results)
        return results
Esempio n. 2
0
def get_model_similarities_results():
    questions = "wordsim353.tsv"
    name_array = []
    not_wanted = ['npy', 'Readme.md']
    onlyfiles = [
        f for f in listdir(
            os.path.dirname(os.path.realpath(__file__)) +
            "/LeaningAlgoImpl/Models") if isfile(
                join(
                    os.path.dirname(os.path.realpath(__file__)) +
                    "/LeaningAlgoImpl/Models", f))
    ]
    for file_index in range(0, len(onlyfiles)):
        if (not_wanted[0] in onlyfiles[file_index]
                or not_wanted[1] in onlyfiles[file_index]):
            continue
        else:
            name_array.append(onlyfiles[file_index])

    # print(name_array)
    results = []

    for name in name_array:
        print(name)
        finished_model = FM.Finished_Models()
        finished_model.get_model(
            os.path.dirname(os.path.realpath(__file__)) +
            "/LeaningAlgoImpl/Models/" + name)
        pearson, spearman, oov = finished_model.human_similarity_test(
            questions)
        fixed_result = [
            name, 10, spearman[0], spearman[1], pearson[0], pearson[1]
        ]
        print(fixed_result)
        results.append(fixed_result)

    print(results)

    for res in results:
        print(res)
        if (os.path.isfile("individual_models_human_similarity_english.csv")):
            f = open("individual_models_human_similarity_english.csv", "a")
        else:
            f = open("individual_models_human_similarity_english.csv", "w")

        np.savetxt(f, [res], delimiter=',', newline="\n", fmt="%s")
        f.close()

    # print(results)
    return results
    def get_multiple_certainess_tie_handling_results(self, questions,
                                                     number_of_models, topn):
        name_array = []
        not_wanted = 'npy'
        onlyfiles = [
            f for f in listdir(
                os.path.dirname(os.path.realpath(__file__)) +
                "/LeaningAlgoImpl/Models") if isfile(
                    join(
                        os.path.dirname(os.path.realpath(__file__)) +
                        "/LeaningAlgoImpl/Models", f))
        ]
        for file_index in range(0, len(onlyfiles)):
            if (not_wanted in onlyfiles[file_index]):
                continue
            else:
                name_array.append(onlyfiles[file_index])

        #print(name_array)
        models = []
        i = 0
        for name in name_array:
            if number_of_models > i:
                #print(name)
                finished_model = FM.Finished_Models()
                finished_model.get_model(
                    os.path.dirname(os.path.realpath(__file__)) +
                    "/LeaningAlgoImpl/Models/" + name)
                models.append(finished_model)
                i += 1
            else:
                continue

        #print(models)

        results = []
        for model in models:
            results.append(model.get_acc_results_extra(topn, questions))

        #print(results)
        return results
    def get_models_clustering_results(self, test_set, number_of_models):
        name_array = []
        not_wanted = ['npy', 'Readme.md']
        onlyfiles = [
            f for f in listdir(
                os.path.dirname(os.path.realpath(__file__)) +
                "/LeaningAlgoImpl/Models") if isfile(
                    join(
                        os.path.dirname(os.path.realpath(__file__)) +
                        "/LeaningAlgoImpl/Models", f))
        ]
        for file_index in range(0, len(onlyfiles)):
            if (not_wanted[0] in onlyfiles[file_index]
                    or not_wanted[1] in onlyfiles[file_index]):
                continue
            else:
                name_array.append(onlyfiles[file_index])

        # print(name_array)
        random.shuffle(name_array)
        results = []
        i = 0
        for name in name_array:
            if number_of_models > i:
                print(name)
                finished_model = FM.Finished_Models()
                finished_model.get_model(
                    os.path.dirname(os.path.realpath(__file__)) +
                    "/LeaningAlgoImpl/Models/" + name)
                # If model knows less than 3 words we ignore it!
                try:
                    results.append(finished_model.get_model_clusters(test_set))
                except IndexError:
                    print("model knows to little")
                i += 1
            else:
                continue

        # print(results)
        return results
import LeaningAlgoImpl.Finished_Models as FM
import logging, os, time

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
dir_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))

finished_model = FM.Finished_Models()
finished_model.get_model(dir_path + '/Code/LeaningAlgoImpl/Models/CBOW,0,5,0,10,100,1,90000_Trained_on1000000articles')

print(finished_model)

finished_model.acc()#'danish-topology.txt')

print(finished_model.acc('questions-words.txt'))

print("DANISH")
#print(finished_model.danish_acc('questions-words.txt'))
vocab = finished_model.get_vocabulary()

if 'en' in vocab:
    print('hej')


print(finished_model.danish_acc('danish-topology.txt'))

print(finished_model.special_danish_acc('special-danish-topology.txt'))

#finished_model.get_vocabulary()

#print('similarity')
#print(finished_model.similarity('en', 'er'))
Esempio n. 6
0
def get_model_clustering_results():
    name_array = []
    not_wanted = ['npy', 'Readme.md']
    onlyfiles = [
        f for f in listdir(
            os.path.dirname(os.path.realpath(__file__)) +
            "/LeaningAlgoImpl/Models") if isfile(
                join(
                    os.path.dirname(os.path.realpath(__file__)) +
                    "/LeaningAlgoImpl/Models", f))
    ]
    for file_index in range(0, len(onlyfiles)):
        if (not_wanted[0] in onlyfiles[file_index]
                or not_wanted[1] in onlyfiles[file_index]):
            continue
        else:
            name_array.append(onlyfiles[file_index])

    # print(name_array)

    for test in [
            "Navneord-udsagnsord-tillægsord.csv", "Frugt-dyr-køretøjer.csv",
            "Hus-værktøj-kropsdele.csv"
    ]:
        dir_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
        test_set = dir_path + '/Code/TestingSet/' + test

        reals = []
        with open(test_set) as csvfile:
            test_reader = csv.reader(csvfile, delimiter=',')
            # initialize first cluster
            cluster = []
            for row in test_reader:
                if not row:
                    # Add new cluster
                    reals.append(cluster)
                    cluster = []
                else:
                    cluster.append(''.join(row))
            # add last cluster
            reals.append(cluster)

        print(reals)
        results = []
        for name in name_array:
            print(name)
            finished_model = FM.Finished_Models()
            finished_model.get_model(
                os.path.dirname(os.path.realpath(__file__)) +
                "/LeaningAlgoImpl/Models/" + name)
            try:
                result = finished_model.clustering(reals)
                print(result)
                fixed_result = [name, result[1], result[3]]
                print(fixed_result)
                results.append(fixed_result)
            except IndexError:
                print("model knows to little")

        print(results)

        for res in results:
            print(res)
            if (os.path.isfile("individual_models_clustering_english" + test +
                               ".csv")):
                f = open(
                    "individual_models_clustering_english" + test + ".csv",
                    "a")
            else:
                f = open(
                    "individual_models_clustering_english" + test + ".csv",
                    "w")

            np.savetxt(f, [res], delimiter=',', newline="\n", fmt="%s")
            f.close()

    # print(results)
    return results
Esempio n. 7
0
def get_multiple_results(topn=1):
    questions = "questions-words.txt"
    name_array = []
    not_wanted = ['npy', 'Readme.md']
    onlyfiles = [
        f for f in listdir(
            os.path.dirname(os.path.realpath(__file__)) +
            "/LeaningAlgoImpl/Models") if isfile(
                join(
                    os.path.dirname(os.path.realpath(__file__)) +
                    "/LeaningAlgoImpl/Models", f))
    ]
    for file_index in range(0, len(onlyfiles)):
        if (not_wanted[0] in onlyfiles[file_index]
                or not_wanted[1] in onlyfiles[file_index]):
            continue
        else:
            name_array.append(onlyfiles[file_index])

    print(name_array)

    reals = get_expected_acc_results(questions)
    results = []
    for name in name_array:
        print(name)
        finished_model = FM.Finished_Models()
        finished_model.get_model(
            os.path.dirname(os.path.realpath(__file__)) +
            "/LeaningAlgoImpl/Models/" + name)
        #models.append(finished_model)

        res = finished_model.get_acc_results(1, questions)
        correct = []
        wrong = []
        number_of_correct = 0
        number_of_wrong = 0
        for j in range(0, len(res)):
            if res[j] is not None:
                #print(res[j][0])

                predicted = res[j][0]
            else:
                predicted = res[j]
            expected = reals[j]
            if predicted == expected:
                correct_message = ("predicted: %s correct" % (predicted))
                correct.append(correct_message)
                number_of_correct += 1
            else:
                wrong_message = ("predicted: %s, should have been: %s" %
                                 (predicted, expected))
                wrong.append(wrong_message)
                number_of_wrong += 1
        results.append([name, len(correct), len(wrong)])

    print(reals)

    print(results)
    print(len(name_array))
    print(len(results))

    for res in results:
        print(res)
        if (os.path.isfile("individual_models_acc_english.csv")):
            f = open("individual_models_acc_english.csv", "a")
        else:
            f = open("individual_models_acc_english.csv", "w")

        np.savetxt(f, [res], delimiter=',', newline="\n", fmt="%s")
        f.close()
    # print(results)
    return results