Python DataTasks.write1dArray Exemples

Exemple #1

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

def getMatchedLines(file_name, lines_to_match):
    matched_lines = []
    failed_lines = []
    match_names = []
    match_years = []
    for line in lines_to_match:
        match_names.append(re.split(r'\t+', line)[0])
        match_years.append(re.split(r'\t+', line)[1])
    file = open(file_name, "r")
    lines = file.readlines()
    for i in range(len(lines_to_match)):
        matched = False
        last_movie = ""
        for l in range(len(lines)):
            if matched is True and re.split(r'\t+', lines[l])[0] != last_movie:
                break
            split_line = re.split(r'\t+', lines[l])
            split_line[0] = re.sub(r'\s+', '', split_line[0].translate(None, string.punctuation).lower())
            match_names[i] = re.sub(r'\s+', '', match_names[i].translate(None, string.punctuation).lower())
            if split_line[0] == match_names[i]:
                matched_lines.append(lines[l])
                matched = True
                last_movie = re.split(r'\t+', lines[l])[0]
                print "Found a line for " + last_movie
                continue
        if matched:
            print "Matched", lines_to_match[i]
        else:
            failed_lines.append(lines_to_match[i])
            print "Failed", lines_to_match[i]
    dt.write1dArray(failed_lines, "filmdata/KeywordData/failed_second_match.txt")
    dt.write1dArray(matched_lines, "filmdata/KeywordData/matched_lines_NEW.txt")

Exemple #2

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

def getScoreDifferences(name_word_file1, name_score_file1, name_word_file2, name_score_file2, name):
    word_file1 = open(name_word_file1, "r")
    score_file1 = open(name_score_file1, "r")
    word_lines1 = word_file1.readlines()
    score_lines1 = score_file1.readlines()
    scores1 = []
    words1 = []
    for s in score_lines1:
        scores1.append(float(s.strip()))
    for w in word_lines1:
        words1.append(w.strip())
    word_file2 = open(name_word_file2, "r")
    score_file2 = open(name_score_file2, "r")
    word_lines2 = word_file2.readlines()
    score_lines2 = score_file2.readlines()
    scores2 = []
    words2 = []
    for s in score_lines2:
        scores2.append(float(s))
    for w in word_lines2:
        words2.append(w.strip())
    differences_list = []
    for i in range(len(score_lines1)):
        differences_list.append(scores1[i] - scores2[i])
    most_different_words = [x for (y,x) in sorted(zip(differences_list,words1))]
    differences_list = sorted(differences_list)
    dt.write1dArray(most_different_words, "filmdata/SVM/most_different_words_" + name + ".txt")
    dt.write1dArray(differences_list, "filmdata/SVM/most_different_values_" + name + ".txt")

Exemple #3

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

def outputTopByVotes(amount_of_votes):
    file = open("filmdata/ratings.list/ratings.list", "r")
    lines = file.readlines()
    top_movies = []
    top_ratings = []
    for line in lines:
        top_ratings.append(int(line.split()[1]))
    top_ratings = np.asarray(top_ratings)
    indices = np.argpartition(top_ratings, -amount_of_votes)[-amount_of_votes:]
    for i in indices:
        just_movie = lines[i].split()[3:]
        just_movie = " ".join(just_movie)
        just_movie = just_movie.split('{')
        just_movie = just_movie[0]
        just_movie = just_movie.split('(')
        try:
            if not re.findall(r'\d+', just_movie[2])[0]:
                del just_movie[2]
            else:
                just_movie[0] = just_movie[0] + "(" + just_movie[1]
                del just_movie[1]
        except IndexError:
            print
        try:
            year = re.findall(r'\d+', just_movie[1])[0]
        except IndexError:
            print "FALED", just_movie
        if just_movie[0].endswith(' '):
            just_movie[0] = just_movie[0][:-1]
        if just_movie[0].startswith('"') and just_movie[0].endswith('"'):
            just_movie[0] = just_movie[0][1:-1]
        just_movie = just_movie[0] + " " + str(year)
        print just_movie
        top_movies.append(just_movie)
    dt.write1dArray(top_movies, "filmdata/top50000moviesbyvotes.txt")

Exemple #4

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

def getVectors(ordered_IDs, unique_phrases):
    vectors = [[0 for x in range(len(ordered_IDs))] for x in range(len(unique_phrases))]
    vectors_maintained = [[0 for x in range(len(ordered_IDs))] for x in range(len(unique_phrases))]
    multi_dictionary = {}
    dict_mapping = {}
    print "Mapping to memory."
    for i in range(len(ordered_IDs)):
        ordered_IDs[i] = str(ordered_IDs[i])
    for i in range(len(ordered_IDs)):
        ordered_IDs[i] = ordered_IDs[i].strip()
        dict_mapping[ordered_IDs[i]] = i
        if ordered_IDs[i] != "-1":
            file = open("filmdata/Tokens/" + ordered_IDs[i] + ".film", "r")
            lines = file.readlines()[1:]
            for line in lines:
                split_line = line.split()
                multi_dictionary[(ordered_IDs[i], split_line[0])] = int(split_line[1])
            file.close()
        else:
            multi_dictionary[(ordered_IDs[i], split_line[0])] = 0
    for up in range(len(unique_phrases)):
        unique_phrases[up] = unique_phrases[up].strip()

    print len("Iterating over memory.")
    for p in range(13177, 25842, 1):
        for key, value in multi_dictionary.iteritems():
            if key[1] == unique_phrases[p]:
                vectors_maintained[p][dict_mapping[key[0].strip()]] = value
                vectors[p][dict_mapping[key[0]]] = 1
        print unique_phrases[p]
        dt.write1dArray(vectors_maintained[p], "filmdata/classesPhrases/nonbinary/class-" + unique_phrases[p])
        dt.write1dArray(vectors[p], "filmdata/classesPhrases/class-" + unique_phrases[p])

    return vectors_maintained, vectors

Exemple #5

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

def getVectorsKeywords(movie_strings, keywords):
    multi_dictionary = {}
    dict_mapping = {}
    movie_names = []
    file_names = dt.getAllFileNames("filmdata\KeywordData\Movie_Most_Common_Keyword_Mapping")
    for i in movie_strings:
        movie_names.append(i.strip()[:-5])
        print i
    print "Mapping to memory."
    for i in file_names:
        try:
            file = open("filmdata/KeywordData/Movie_Most_Common_Keyword_Mapping/" +i, "r")
            lines = file.readlines()
            dict_mapping[movie_strings[int(i)]] = i
            for line in lines:
                line = line.strip()
                multi_dictionary[(movie_strings[int(i)], line)] = 1
            file.close()
        except IOError:
            print movie_names[i]

    for up in range(len(keywords)):
        keywords[up] = keywords[up].strip()

    print len("Iterating over memory.")
    for p in range(len(keywords)):
        vector = [0 for x in range(len(movie_strings))]
        print len(vector)
        for key, value in multi_dictionary.iteritems():
            if key[1] == keywords[p]:
                #print int(dict_mapping[key[0]])
                vector[int(dict_mapping[key[0]])] = 1
        print keywords[p]
        dt.write1dArray(vector, "filmdata/classesKeywords/NewData/class-" + keywords[p])

Exemple #6

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

def findMissingKeywords(file_name, common_keywords):
    print "?"
    file = open(file_name, "r")
    lines = file.readlines()
    last_film = ""
    movie_strings = dt.importString("filmdata/filmNames.txt")
    standard_strings = []
    indexes = []
    for m in movie_strings:
        m = m[:-5]
        standard_strings.append(m.translate(None, string.punctuation).replace(" ", "").strip().upper())
    for line in lines:
        film_vectors = []
        line = line.strip()
        if len(line) > 2:
            line_split = re.split(r'\t+', line)
            line_split[0] = line_split[0].translate(None, string.punctuation).replace(" ", "").strip().upper()
            file_save = ""
            for m in range(len(standard_strings)):
                if standard_strings[m] == line_split[0]:
                    print "matched", m, standard_strings[m], line_split[0]
                    file_save = str(m)
                    break
            if file_save != "":
                if last_film.strip() != line_split[0].strip() and last_film is not None:
                    print "Succeeded", line_split[0]
                    for m in range(len(standard_strings)):
                        if standard_strings[m] == last_film:
                            indexes.append(m)
                            break
                last_film = line_split[0]
            else:
                print "Failed", line_split[0],
    dt.write1dArray(indexes, "filmdata/MISSING_FROM_MOVIEDATA.txt")

Exemple #7

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

def getIDs(movie_strings):
    ordered_IDs = []
    movie_names = []
    for name in movie_strings:
        movie_names.append(name[:-5])
    id_mappings = open("filmdata/KeywordData/Movie_Most_Common_Keyword_Mapping/films-ids.txt", "r")
    id_mappings_lines = id_mappings.readlines()
    found_name = False
    failed_names = []
    x = 0
    for name in movie_names:
        for line in id_mappings_lines:
            mapping_id = line.split()[0]
            mapping_name = re.split(r'\t+', line)[2]
            if similar(name.upper().strip(), mapping_name.upper().strip()):
                ordered_IDs.append(mapping_id)
                found_name = True
                break
        if found_name is True:
            found_name = False
        else:
            failed_names.append(name)
            ordered_IDs.append(-1)
        x += 1
        print x
    dt.write1dArray(failed_names, "filmdata/KeywordData/NAMES_THAT_FAILED_IDS.txt")
    dt.write1dArray(ordered_IDs, "filmdata/KeywordData/IDsByOriginalOrdering.txt")

Exemple #8

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

def outputKeywords():
    movie_strings = dt.importString("filmdata/filmNames.txt")
    movie_data = getMovieDataFromIMDB(movie_strings)
    commonality = 0
    common_keywords = getMostCommonKeywords(0, "filmdata/IMDB_movie_data.txt")
    dt.write1dArray(common_keywords, "filmdata/common_keywords_15k_commanility_" + str(commonality))
    vectors = getKeywordVectors(common_keywords, movie_strings, "")
    dt.write2dArray(vectors, "filmdata/classesKeywords/class-extra-all-commonality-" + str(commonality))

Exemple #9

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

def trimMovieData(file_string):
    new_movie_data = []
    movie_data_file = open(file_string)
    with movie_data_file as myFile:
        for num, line in enumerate(myFile, 1):
            if "{" not in line and "(V)" not in line and "(TV)" not in line and "(VG)" not in line and len(line) > 2 and line.startswith(" ") is False and line.startswith("\t") is False and line.startswith("\n") is False:
                new_movie_data.append(line[:-1])
    print "dun"
    dt.write1dArray(new_movie_data, file_string + "trimmed")

Exemple #10

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

def getMissingIndexes(index_list, length):
    full_index = range(length)
    for i in index_list:
        i = int(i)
        full_index[i] = -1
    missing_indexes = []
    for i in full_index:
        if i > -1:
            missing_indexes.append(i)
    dt.write1dArray(missing_indexes, "filmdata/missing_indexes_keywords.txt")

Exemple #11

0

Afficher le fichier

    def __init__(self, cluster_vectors_fn, cluster_labels_fn, movie_names_fn,
                 label_names_fn, cluster_names_fn, filename, training_data,
                 cluster_to_classify, max_depth):

        vectors = dt.importVectors(cluster_vectors_fn)
        labels = dt.importLabels(cluster_labels_fn)
        cluster_names = dt.importString(cluster_names_fn)
        vector_names = dt.importString(movie_names_fn)
        label_names = dt.importString(label_names_fn)
        scores_array = []
        for l in range(len(labels[0])):
            new_labels = [0] * 15000
            for x in range(len(labels)):
                new_labels[x] = labels[x][l]
            x_train = np.asarray(vectors[:training_data])
            x_test = np.asarray(vectors[training_data:])
            y_train = np.asarray(new_labels[:training_data])
            y_test = np.asarray(new_labels[training_data:])

            self.clf = tree.DecisionTreeClassifier(max_depth=max_depth)
            self.clf = self.clf.fit(x_train, y_train)

            y_pred = self.clf.predict(x_test)
            f1 = f1_score(y_test, y_pred, average='binary')
            accuracy = accuracy_score(y_test, y_pred)
            scores = [[label_names[l], "f1", f1, "accuracy", accuracy]]
            print scores[0]
            scores_array.append(scores)

            class_names = [label_names[l], "NOT " + label_names[l]]
            tree.export_graphviz(self.clf,
                                 feature_names=cluster_names,
                                 class_names=class_names,
                                 out_file='Rules/' + label_names[l] +
                                 filename + '.dot',
                                 max_depth=10)
            """
            rewrite_dot_file = dt.importString('Rules/'+filename+label_names[l]+'.dot')
            new_dot_file = []
            for s in rewrite_dot_file:
                new_string = s
                if "->" not in s and "digraph" not in s and "node" not in s and "(...)" not in s and "}" not in s:
                    index = s.index("value")
                    new_string = s[:index] + '"] ;'
                new_dot_file.append(new_string)
            dt.write1dArray(new_dot_file, 'Rules/Cleaned'+filename+label_names[l]+'.dot')
            """
            graph = pydot.graph_from_dot_file('Rules/' + label_names[l] +
                                              filename + '.dot')
            graph.write_png('Rules/Images/' + label_names[l] + filename +
                            ".png")
            self.get_code(self.clf, cluster_names, class_names,
                          label_names[l] + filename)
        dt.write1dArray(scores_array, 'Rules/Scores/' + filename + '.scores')

Exemple #12

0

Afficher le fichier

Fichier : Cluster.py Projet : eygrr/RulesFromAuto-encoders

    def __init__(self, low_threshold, high_threshold,  filename):

        hdn, ldn, hd, ld = self.splitDirections("Directions/"+filename+".directions",
                                           "SVMResults/"+filename+".scores",
                                           "SVMResults/"+filename+".names",
                                            low_threshold, high_threshold)

        least_similar_cluster_names, cluster_name_dict, least_similar_clusters = self.createTermClusters(hd, ld, hdn, ldn)

        dt.write1dArray(least_similar_cluster_names, "Clusters/"+filename+"LeastSimilarHIGH"+str(high_threshold)+","+str(low_threshold)+".names")
        dt.write2dArray(least_similar_clusters, "Clusters/"+filename+"LeastSimilarHIGH"+str(high_threshold)+","+str(low_threshold)+".clusters")
        dt.writeArrayDict(cluster_name_dict, "Clusters/"+filename+"MostSimilarCLUSTER"+str(high_threshold)+","+str(low_threshold)+".names")

Exemple #13

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

def getIMDBKeywordsForMovieNames(movie_names):
    stripped_movie_names = []
    for movie in movie_names:
        stripped_movie_names.append(movie.replace('\n', ''))
    stripped_movie_names = sorted(stripped_movie_names)
    split_names = []
    split_years = []
    for stripped_movie_name in stripped_movie_names:
        split = stripped_movie_name.split()
        split_year = split[len(split)-1]
        split_years.append(split_year)
        split_names.append(stripped_movie_name[:-len(split_year)-1])

    file = open("filmdata\keywords.list\keywords.list", "r")
    lines = file.readlines()
    keywords_list = lines[79748:]
    matched_lines = []
    x = 0
    last_line = keywords_list[0]
    matched = False
    while x < 50000:
        for line in keywords_list:
            split_line = line.rsplit('(', 2)
            movie_name = split_line[0].rstrip()

            if movie_name.startswith('"') and movie_name.endswith('"'):
                movie_name = movie_name[1:-1]
            try:
                movie_year = str(re.findall(r'\d+', split_line[1])[0])
            except IndexError:
                movie_year = "NULL"
            if not movie_name:
                movie_name = "'NULL"
            formatted_line = movie_name.rstrip() + " " + str(movie_year).rstrip()

            if matched is True and formatted_line == last_line:
                matched_lines.append(line)
                print split_names[x], line
            elif matched is False and similar(movie_name.strip().upper(), split_names[x].strip().upper()) and movie_year == split_years[x]:
                matched = True
                matched_lines.append(line)
                print split_names[x], line
            elif matched is True and formatted_line != last_line:
                matched = False
                x = x + 1
            last_line = formatted_line
        print "cycled through"



    print "Found:", x
    dt.write1dArray(matched_lines, "filmdata/imdb_movie_keywords.txt")

Exemple #14

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

def makeConsistent(file_name, new_file_name):
    new_file = []
    with open(file_name) as my_file:
        for num, line in enumerate(my_file, 1):
            line = line.strip()
            name = line[:-4]
            year = line[len(line)-4:]

            name = name.translate(None, string.punctuation)
            year = year.translate(None, string.punctuation)
            new_line = "\t".join([name, year])
            new_file.append(new_line)
            print new_line

    dt.write1dArray(new_file, new_file_name)

Exemple #15

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

def reMapPPMI(ordered_IDs, file_names):
    print "Mapping to memory."
    for i in range(len(ordered_IDs)):
        ordered_IDs[i] = str(ordered_IDs[i])
    for i in range(len(ordered_IDs)):
        for f in range(len(file_names)):
            id = file_names[f].split(".")[0]
            if int(ordered_IDs[i]) == int(id) and int(ordered_IDs[i]) != -1:
                print ordered_IDs[i], id
                file = open("filmdata/vectors/Tokens/" + file_names[f])
                lines = file.readlines()
                dt.write1dArray(lines, "filmdata/NewTokens/"+str(i)+".ppmi")
                file.close()
            elif int(ordered_IDs[i]) == -1:
                dt.write1dArray([[""]], "filmdata/NewTokens/"+str(i)+".error")

Exemple #16

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

def getUnformattedTopByVotes(amount_of_votes):
    file = open("filmdata/ratings.list/ratings.list", "r")
    lines = file.readlines()
    top_movies = []
    top_ratings = []
    for line in lines:
        top_ratings.append(int(line.split()[1]))
    top_ratings = np.asarray(top_ratings)
    indices = np.argpartition(top_ratings, -amount_of_votes)[-amount_of_votes:]
    for i in indices:
        just_movie = lines[i].split()[3:]
        just_movie = " ".join(just_movie)
        print just_movie
        top_movies.append(just_movie)
    dt.write1dArray(top_movies, "filmdata/imdb_formatted_top50000.txt")

Exemple #17

0

Afficher le fichier

Fichier : KN.py Projet : eygrr/RulesFromAuto-encoders

def getKNeighbors(vector_path="filmdata/films200.mds/films200.mds", class_path="filmdata/classesGenres/class-All",
                  n_neighbors=1, algorithm="kd_tree", leaf_size=30,
                  training_data=10000, name="normal200"):
    movie_vectors = np.asarray(dt.importVectors(vector_path))
    movie_labels = np.asarray(dt.importLabels(class_path))

    x_train, y_train, x_test, y_test = dt.splitData(training_data, movie_vectors, movie_labels)

    classifier = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors, algorithm=algorithm, leaf_size=leaf_size)
    classifier.fit(x_train, y_train.ravel())
    y_pred = classifier.predict(x_test)

    f1 = f1_score(y_test, y_pred, average='macro')
    accuracy = accuracy_score(y_test, y_pred)
    dt.write1dArray([f1, accuracy], "KNNScores/" + name + ".score")
    print "F1 " + str(f1), "Accuracy", accuracy

Exemple #18

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

def writeMissing(folder_name):
    print "?"
    file_names = dt.getAllFileNames(folder_name)
    standard = range(15000)
    missing = []
    for i in standard:
        found = False
        for f in file_names:
            if int(f) == int(i):
                found = True
                break
        if found:
            print "found", i
        else:
            missing.append(i)
            print "no found", i
    dt.write1dArray(missing, "filmdata/MISSING_KEYWORD_ITEMS.txt")

Exemple #19

0

Afficher le fichier

Fichier : DecisionTree.py Projet : eygrr/RulesFromAuto-encoders

    def __init__(self, cluster_vectors_fn, cluster_labels_fn, movie_names_fn, label_names_fn, cluster_names_fn, filename, training_data, cluster_to_classify, max_depth):

        vectors = dt.importVectors(cluster_vectors_fn)
        labels = dt.importLabels(cluster_labels_fn)
        cluster_names = dt.importString(cluster_names_fn)
        vector_names = dt.importString(movie_names_fn)
        label_names = dt.importString(label_names_fn)
        scores_array = []
        for l in range(len(labels[0])):
            new_labels = [0] * 15000
            for x in range(len(labels)):
                new_labels[x] = labels[x][l]
            x_train = np.asarray(vectors[:training_data])
            x_test = np.asarray(vectors[training_data:])
            y_train = np.asarray(new_labels[:training_data])
            y_test = np.asarray(new_labels[training_data:])


            self.clf = tree.DecisionTreeClassifier( max_depth=max_depth)
            self.clf = self.clf.fit(x_train, y_train)

            y_pred = self.clf.predict(x_test)
            f1 = f1_score(y_test, y_pred, average='binary')
            accuracy = accuracy_score(y_test, y_pred)
            scores = [[label_names[l], "f1", f1, "accuracy", accuracy]]
            print scores[0]
            scores_array.append(scores)

            class_names = [ label_names[l], "NOT "+label_names[l]]
            tree.export_graphviz(self.clf, feature_names=cluster_names, class_names=class_names, out_file='Rules/'+label_names[l]+filename+'.dot', max_depth=10)
            """
            rewrite_dot_file = dt.importString('Rules/'+filename+label_names[l]+'.dot')
            new_dot_file = []
            for s in rewrite_dot_file:
                new_string = s
                if "->" not in s and "digraph" not in s and "node" not in s and "(...)" not in s and "}" not in s:
                    index = s.index("value")
                    new_string = s[:index] + '"] ;'
                new_dot_file.append(new_string)
            dt.write1dArray(new_dot_file, 'Rules/Cleaned'+filename+label_names[l]+'.dot')
            """
            graph = pydot.graph_from_dot_file('Rules/'+label_names[l]+filename+'.dot')
            graph.write_png('Rules/Images/'+label_names[l]+filename+".png")
            self.get_code(self.clf, cluster_names, class_names, label_names[l]+filename)
        dt.write1dArray(scores_array, 'Rules/Scores/'+filename+'.scores')

Exemple #20

0

Afficher le fichier

    def get_code(self, tree, feature_names, class_names, filename):
        left = tree.tree_.children_left
        right = tree.tree_.children_right
        threshold = tree.tree_.threshold
        value = tree.tree_.value

        #print tree.tree_.feature, len(tree.tree_.feature
        # )
        features = []
        for i in tree.tree_.feature:
            if i != -2 or i <= 200:
                features.append(feature_names[i])
        rules_array = []

        def recurse(left, right, threshold, features, node):
            if (threshold[node] != -2):
                line = "IF ( " + features[node] + " <= " + str(
                    threshold[node]) + " ) {"
                rules_array.append(line)
                if left[node] != -1:
                    recurse(left, right, threshold, features, left[node])
                line = "} ELSE {"
                rules_array.append(line)
                if right[node] != -1:
                    recurse(left, right, threshold, features, right[node])
                line = "}"
                rules_array.append(line)
            else:
                if value[node][0][0] >= value[node][0][1]:
                    line = "return", class_names[0]
                    rules_array.append(line)
                else:
                    line = "return", class_names[1]
                    rules_array.append(line)

        recurse(left, right, threshold, features, 0)
        dt.write1dArray(rules_array, "Rules/Statements/" + filename + ".rules")
        cleaned = jsbeautifier.beautify_file("Rules/Statements/" + filename +
                                             ".rules")
        file = open("Rules/Statements/" + filename + ".rules", "w")
        file.write(cleaned)
        file.close()

Exemple #21

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

def getMostCommonKeywords(top_value, file_name, keyword_file, value_file):
    common_keywords = []
    file = open(file_name, "r")
    lines = file.readlines()
    keywords = defaultdict(int)
    for line in lines:
        if len(line.split()) > 0:
            line_split = line.split()
            keyword = line_split[len(line_split)-1]
            keywords[keyword] += 1
        print line
    sorted_dict = sorted(keywords.iteritems(), key=lambda x:-x[1])[:top_value]
    print sorted_dict
    keys = []
    values = []
    for key, value in sorted_dict:
        keys.append(key)
        values.append(value)
    dt.write1dArray(keys, keyword_file)
    dt.write1dArray(values, value_file)

Exemple #22

0

Afficher le fichier

Fichier : Cluster.py Projet : ThomasAger/RulesFromAuto-encoders

    def __init__(self, low_threshold, high_threshold, filename):

        hdn, ldn, hd, ld = self.splitDirections(
            "Directions/" + filename + ".directions",
            "SVMResults/" + filename + ".scores",
            "SVMResults/" + filename + ".names", low_threshold, high_threshold)

        least_similar_cluster_names, cluster_name_dict, least_similar_clusters = self.createTermClusters(
            hd, ld, hdn, ldn)

        dt.write1dArray(
            least_similar_cluster_names,
            "Clusters/" + filename + "LeastSimilarHIGH" + str(high_threshold) +
            "," + str(low_threshold) + ".names")
        dt.write2dArray(
            least_similar_clusters,
            "Clusters/" + filename + "LeastSimilarHIGH" + str(high_threshold) +
            "," + str(low_threshold) + ".clusters")
        dt.writeArrayDict(
            cluster_name_dict, "Clusters/" + filename + "MostSimilarCLUSTER" +
            str(high_threshold) + "," + str(low_threshold) + ".names")

Exemple #23

0

Afficher le fichier

Fichier : SVM.py Projet : ThomasAger/RulesFromAuto-encoders

    def __init__(self, name_distinction="", class_names=None, vector_path=None, class_path=None, class_by_class=True, input_size=200,
                 training_data=10000, amount_of_scores=400,  low_kappa=0.1, high_kappa=0.5, rankSVM=False, amount_to_cut_at=100, largest_cut=21470000):
        print "getting movie data"

        movie_vectors = dt.importVectors(vector_path)
        movie_labels = dt.importLabels(class_path)
        print "getting file names"

        file_names = dt.getFns(class_path[:-10])

        print len(movie_labels), len(movie_labels[0])

        print "getting training and test data"

        x_train = np.asarray(movie_vectors[:training_data])
        x_test = np.asarray(movie_vectors[training_data:])

        movie_labels = zip(*movie_labels)
        file_names, movie_labels = self.getSampledData(file_names, movie_labels, amount_to_cut_at, largest_cut)
        movie_labels = zip(*movie_labels)

        y_train = movie_labels[:training_data]
        y_test = movie_labels[training_data:]
        y_train = np.asarray(zip(*y_train))
        y_test = np.asarray(zip(*y_test))



        print len(y_train), len(y_test), training_data

        print "getting kappa scores"

        kappa_scores, directions =   self.runAllSVMs(y_test, y_train, x_train, x_test, file_names)

        dt.write1dArray(kappa_scores, "SVMResults/"+name_distinction+".scores")
        dt.write1dArray(file_names, "SVMResults/"+name_distinction+".names")

        dt.write2dArray(directions, "directions/"+name_distinction+".directions")

Exemple #24

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

def getMovieDataFromIMDB(movie_strings):
    movie_data = []
    write_line_file = open("filmdata/Found_Missing.txt", "w")
    failed_movies = []
    names = []
    years = []
    for movie_string in movie_strings:
        names.append(movie_string[:-6])
        years.append(movie_string[-5:].strip())

    for n in range(len(names)):
        found = False
        last_name = ""
        old_num = 0
        with open("filmdata/IMDB_movie_data.txttrimmed") as myFile:
            for num, line in enumerate(myFile, 1):
                num = old_num
                split_line = re.split(r'\t+', line)
                movie_string = split_line[0]
                movie_name = movie_string.split()
                del movie_name[len(movie_name)-1]
                movie_name = " ".join(movie_name)
                if found is True and last_name != movie_name:
                    break
                movie_year = int(re.findall(r'\d+', movie_string.split()[len(movie_string.split())-1])[0])
                if similar(names[n].upper().strip(), movie_name.upper().strip()) and int(years[n]) == int(movie_year):
                    movie_data.append(line)
                    write_line_file.write(line)
                    found = True
                    old_num = num
                last_name = movie_name
        if found is False:
            failed_movies.append(names[n])
            print "FAILED:", names[n]
    print "Total failed", len(failed_movies)
    write_line_file.close()
    dt.write1dArray(failed_movies, "filmdata/failed_movies_final_push.txt")
    return movie_data

Exemple #25

0

Afficher le fichier

Fichier : DecisionTree.py Projet : eygrr/RulesFromAuto-encoders

    def get_code(self, tree, feature_names, class_names, filename):
        left      = tree.tree_.children_left
        right     = tree.tree_.children_right
        threshold = tree.tree_.threshold
        value = tree.tree_.value

        #print tree.tree_.feature, len(tree.tree_.feature
        # )
        features = []
        for i in tree.tree_.feature:
            if i != -2 or i <= 200:
                features.append(feature_names[i])
        rules_array = []
        def recurse(left, right, threshold, features,  node):
                if (threshold[node] != -2):
                        line = "IF ( " + features[node] + " <= " + str(threshold[node]) + " ) {"
                        rules_array.append(line)
                        if left[node] != -1:
                                recurse (left, right, threshold, features,left[node])
                        line = "} ELSE {"
                        rules_array.append(line)
                        if right[node] != -1:
                                recurse (left, right, threshold, features,right[node])
                        line = "}"
                        rules_array.append(line)
                else:
                        if value[node][0][0] >= value[node][0][1]:
                            line = "return", class_names[0]
                            rules_array.append(line)
                        else:
                            line = "return", class_names[1]
                            rules_array.append(line)
        recurse(left, right, threshold, features, 0)
        dt.write1dArray(rules_array, "Rules/Statements/"+filename+".rules")
        cleaned = jsbeautifier.beautify_file("Rules/Statements/"+filename+".rules")
        file = open("Rules/Statements/"+filename+".rules", "w")
        file.write(cleaned)
        file.close()

Exemple #26

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

def makeConsistentKeywords(file_name, new_file_name):
    new_file = []
    with open(file_name) as my_file:
        for num, line in enumerate(my_file, 1):
            if "{" in line or "?" in line:
                continue
            split_line = re.split(r'\t+', line)
            split_on_bracket = split_line[0].split(" (")
            if split_on_bracket[1].startswith("1") == False and split_on_bracket[1].startswith("2") == False:
                year = split_on_bracket[2][:4]
                name = "".join([split_on_bracket[0], split_on_bracket[1]])
            else:
                year = split_on_bracket[1][:4]
                name = split_on_bracket[0]
            name = name.translate(None, string.punctuation)
            year = year.translate(None, string.punctuation)
            keyword = re.sub(r'\s+', '', split_line[1]).translate(None, string.punctuation)
            name_and_year = "\t".join([name, year])
            new_line = "\t".join([name_and_year, keyword])
            new_file.append(new_line)
            print new_line

    dt.write1dArray(new_file, new_file_name)

Exemple #27

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

def getVectorsIO(ordered_IDs, unique_phrases):
    vectors = [[0 for x in range(len(ordered_IDs))] for x in range(len(unique_phrases))]
    vectors_maintained = [[0 for x in range(len(ordered_IDs))] for x in range(len(unique_phrases))]

    for p in range(11212, 25842, 1):
        unique_phrases[p] = unique_phrases[p].strip()
        for i in range(len(ordered_IDs)):
            ordered_IDs[i] = ordered_IDs[i].strip()
            if ordered_IDs[i] != "-1":
                file = open("filmdata/Tokens/" + ordered_IDs[i] + ".film", "r")
                lines = file.readlines()[1:]
                for line in lines:
                    split_line = line.split()
                    split_line[1] = split_line[1].strip()
                    if split_line[0] == p:
                        vectors_maintained[p][i] = split_line[1]
                        vectors[p][i] = 1
                file.close()
        print unique_phrases[p]
        dt.write1dArray(vectors_maintained[p], "filmdata/classesPhrases/nonbinary/class-" + unique_phrases[p])
        dt.write1dArray(vectors[p], "filmdata/classesPhrases/class-" + unique_phrases[p])

    return vectors_maintained, vectors

Exemple #28

0

Afficher le fichier

def getKNeighbors(vector_path="filmdata/films200.mds/films200.mds",
                  class_path="filmdata/classesGenres/class-All",
                  n_neighbors=1,
                  algorithm="kd_tree",
                  leaf_size=30,
                  training_data=10000,
                  name="normal200"):
    movie_vectors = np.asarray(dt.importVectors(vector_path))
    movie_labels = np.asarray(dt.importLabels(class_path))

    x_train, y_train, x_test, y_test = dt.splitData(training_data,
                                                    movie_vectors,
                                                    movie_labels)

    classifier = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors,
                                                algorithm=algorithm,
                                                leaf_size=leaf_size)
    classifier.fit(x_train, y_train.ravel())
    y_pred = classifier.predict(x_test)

    f1 = f1_score(y_test, y_pred, average='macro')
    accuracy = accuracy_score(y_test, y_pred)
    dt.write1dArray([f1, accuracy], "KNNScores/" + name + ".score")
    print "F1 " + str(f1), "Accuracy", accuracy

Exemple #29

0

Afficher le fichier

Fichier : Gini.py Projet : eygrr/RulesFromAuto-encoders

    def __init__(self, direction_fn, ppmi_fn, phrases_fn, phrases_to_check_fn, fn):
        ppmi = dt.importLabels(ppmi_fn)
        ppmi = np.asarray(ppmi)
        phrases = dt.importString(phrases_fn)

        indexes_to_get = []
        if phrases_to_check_fn != "":
            phrases_to_check = dt.importString(phrases_to_check_fn)
            for pc in range(len(phrases_to_check)):
                for p in range(len(phrases)):
                    if phrases_to_check[pc] == phrases[p][6:]:
                        indexes_to_get.append(p)

        ppmi = ppmi.transpose()
        print len(ppmi), len(ppmi[0])
        scores = []
        pvalues = []
        scores_kendall = []
        pvalues_kendall = []
        counter = 0
        averages = []
        with open(direction_fn) as f:
            for line in f:
                if indexes_to_get is not []:
                    for i in indexes_to_get:
                        if i == counter:
                            total = 0
                            amt = 0
                            direction = line.split()
                            for d in range(len(direction)):
                                direction[d] = float(direction[d])
                            new_direction = []
                            new_ppmi = []
                            direction_rank = np.argsort(direction)
                            ppmi_rank = np.argsort(ppmi[counter])
                            for d in range(len(ppmi[counter])):
                                if ppmi[counter][d] != 0:
                                    total += ppmi[counter][d]
                                    amt += 1
                                    new_direction.append(direction_rank[d])
                                    new_ppmi.append(ppmi_rank[d])
                            average = total / amt
                            rho, pvalue = spearmanr(new_ppmi, new_direction)
                            scores.append(rho)
                            pvalues.append(pvalue)
                            scores_kendall.append(rhok)
                            pvalues_kendall.append(pvaluek)
                            averages.append(average)
                            print phrases[counter] + ":", rho, pvalue, average
                else:
                    direction = line.split()
                    for d in range(len(direction)):
                        direction[d] = float(direction[d])
                    direction_rank = np.argsort(direction)
                    ppmi_rank = np.argsort(ppmi[counter])
                    rho, pvalue = spearmanr(direction_rank, ppmi_rank)
                    scores.append(rho)
                    pvalues.append(pvalue)
                    print phrases[counter] + ":", rho, pvalue
                counter += 1
        dt.write1dArray(scores, "RuleType/s" + fn + ".score")
        dt.write1dArray(pvalues, "RuleType/s" + fn + ".pvalue")
        dt.write1dArray(scores_kendall, "RuleType/k" + fn + ".score")
        dt.write1dArray(pvalues_kendall, "RuleType/k" + fn + ".pvalue")
        dt.write1dArray(phrases, "RuleType/" + fn + ".names")
        dt.write1dArray(averages, "RuleType/" + fn + ".averages")

Exemple #30

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

#reMapPPMI(dt.importString("filmdata/filmIds.txt"), dt.getFns("filmdata/vectors/tokens/"))

filenames = ["AUTOENCODER0.5tanhtanhmse15tanh[1000]4SDA1","AUTOENCODER0.5tanhtanhmse60tanh[200]4SDA2","AUTOENCODER0.5tanhtanhmse30tanh[1000]4SDA3",
             "AUTOENCODER0.5tanhtanhmse60tanh[200]4SDA4"]
"""
path = "newdata/spaces/"
id = 155
for f in filenames:
    movie_vectors = dt.getMovieVectors(input_size=200, vector_path=path+f+".mds")
    nearest_movies, nearest_distances = getKNearestMovies(movie_vectors, movie_vectors[id], 30)
    dt.write1dArray(nearest_movies, "KDNearest/" + f + str(id)+".knmovies")
    dt.write1dArray(nearest_distances, "KDNearest/" + f + str(id)+".kndistances")
"""
"""
#makeConsistent("filmdata/KeywordData/Matched_Films.txt", "filmdata/KeywordData/Matched_Films_Normalised.txt")

#getMatchedLines("filmdata/KeywordData/All_Films_Norm_Spaces.txt", dt.importString("filmdata/KeywordData/Missing_Films_Normalised.txt"))
"""
"""
movie_strings = dt.importString("filmdata/filmNames.txt")
missing_items = getMissing("filmdata/IMDB Keywords Movie Data/Matched_Films.txt", movie_strings)
dt.write1dArray(missing_items, "filmdata/missing_films.txt")

"""




#outputPhrases()
#outputKeywords()

Exemple #31

0

Afficher le fichier

Fichier : Spearmanr.py Projet : eygrr/RulesFromAuto-encoders

    def __init__(self, direction_fn, ppmi_fn, phrases_fn, phrases_to_check_fn, fn):
        ppmi = dt.importLabels(ppmi_fn)
        ppmi = np.asarray(ppmi)
        phrases = dt.importString(phrases_fn)

        indexes_to_get = []
        if phrases_to_check_fn != "":
            phrases_to_check = dt.importString(phrases_to_check_fn)
            for pc in range(len(phrases_to_check)):
                for p in range(len(phrases)):
                    if phrases_to_check[pc] == phrases[p][6:]:
                        indexes_to_get.append(p)
        indexes_to_get.sort()
        ppmi = ppmi.transpose()
        print len(ppmi), len(ppmi[0])
        scores = []
        pvalues = []
        scores_kendall = []
        pvalues_kendall = []
        agini = []
        agini1 = []
        angini1 = []
        angini = []
        amap = []
        andcg = []
        counter = 0
        averages = []
        with open(direction_fn) as f:
            for line in f:
                exists = True
                if phrases_to_check_fn != "":
                    exists = False
                    for i in indexes_to_get:
                        if i == counter:
                            exists = True
                            break
                if exists:
                    total = 0
                    amt = 0
                    direction = line.split()
                    for d in range(len(direction)):
                        direction[d] = float(direction[d])
                    new_direction = []
                    new_ppmi = []
                    direction_rank = np.argsort(direction)
                    ppmi_rank = np.argsort(ppmi[counter])
                    for d in range(len(ppmi[counter])):
                        if ppmi[counter][d] != 0:
                            total += ppmi[counter][d]
                            amt += 1
                            new_direction.append(direction_rank[d])
                            new_ppmi.append(ppmi_rank[d])
                    average = total / amt

                    min_max_scaler = preprocessing.MinMaxScaler()
                    normalized_ppmi = min_max_scaler.fit_transform(ppmi[counter])
                    normalized_dir = min_max_scaler.fit_transform(direction)

                    ginis = gini(normalized_ppmi, normalized_dir)

                    ranked_ppmi = dt.sortByArray(new_ppmi, new_direction)
                    nr_ppmi = min_max_scaler.fit_transform(ranked_ppmi)
                    ndcgs = ndcg_at_k(nr_ppmi, len(nr_ppmi))

                    #binarizer = preprocessing.Binarizer()
                    #binary_ppmi = binarizer.transform(normalized_ppmi)
                    #normalized_dir = np.ndarray.tolist(normalized_dir)
                    map = 0#average_precision_score(normalized_ppmi, normalized_dir)

                    rho, pvalue = spearmanr(new_ppmi, new_direction)
                    rhok, pvaluek = kendalltau(new_ppmi, new_direction)

                    scores.append(rho)
                    pvalues.append(pvalue)
                    scores_kendall.append(rhok)
                    pvalues_kendall.append(pvaluek)
                    andcg.append(ndcgs)
                    agini.append(ginis)
                    amap.append(map)
                    averages.append(average)
                    print phrases[counter] + ":", map, ginis

                counter += 1
        dt.write1dArray(scores, "RuleType/s" + fn + ".score")
        dt.write1dArray(pvalues, "RuleType/s" + fn + ".pvalue")
        dt.write1dArray(scores_kendall, "RuleType/k" + fn + ".score")
        dt.write1dArray(pvalues_kendall, "RuleType/k" + fn + ".pvalue")
        dt.write1dArray(phrases, "RuleType/" + fn + ".names")
        dt.write1dArray(averages, "RuleType/" + fn + ".averages")
        dt.write1dArray(agini, "RuleType/gn" + fn + ".score")
        dt.write1dArray(andcg, "RuleType/ndcg" + fn + ".score")
        dt.write1dArray(amap, "RuleType/map" + fn + ".score")

Exemple #32

0

Afficher le fichier

    def __init__(self, direction_fn, ppmi_fn, phrases_fn, phrases_to_check_fn,
                 fn):
        ppmi = dt.importLabels(ppmi_fn)
        ppmi = np.asarray(ppmi)
        phrases = dt.importString(phrases_fn)

        indexes_to_get = []
        if phrases_to_check_fn != "":
            phrases_to_check = dt.importString(phrases_to_check_fn)
            for pc in range(len(phrases_to_check)):
                for p in range(len(phrases)):
                    if phrases_to_check[pc] == phrases[p][6:]:
                        indexes_to_get.append(p)

        ppmi = ppmi.transpose()
        print len(ppmi), len(ppmi[0])
        scores = []
        pvalues = []
        scores_kendall = []
        pvalues_kendall = []
        counter = 0
        averages = []
        with open(direction_fn) as f:
            for line in f:
                if indexes_to_get is not []:
                    for i in indexes_to_get:
                        if i == counter:
                            total = 0
                            amt = 0
                            direction = line.split()
                            for d in range(len(direction)):
                                direction[d] = float(direction[d])
                            new_direction = []
                            new_ppmi = []
                            direction_rank = np.argsort(direction)
                            ppmi_rank = np.argsort(ppmi[counter])
                            for d in range(len(ppmi[counter])):
                                if ppmi[counter][d] != 0:
                                    total += ppmi[counter][d]
                                    amt += 1
                                    new_direction.append(direction_rank[d])
                                    new_ppmi.append(ppmi_rank[d])
                            average = total / amt
                            rho, pvalue = spearmanr(new_ppmi, new_direction)
                            scores.append(rho)
                            pvalues.append(pvalue)
                            scores_kendall.append(rhok)
                            pvalues_kendall.append(pvaluek)
                            averages.append(average)
                            print phrases[counter] + ":", rho, pvalue, average
                else:
                    direction = line.split()
                    for d in range(len(direction)):
                        direction[d] = float(direction[d])
                    direction_rank = np.argsort(direction)
                    ppmi_rank = np.argsort(ppmi[counter])
                    rho, pvalue = spearmanr(direction_rank, ppmi_rank)
                    scores.append(rho)
                    pvalues.append(pvalue)
                    print phrases[counter] + ":", rho, pvalue
                counter += 1
        dt.write1dArray(scores, "RuleType/s" + fn + ".score")
        dt.write1dArray(pvalues, "RuleType/s" + fn + ".pvalue")
        dt.write1dArray(scores_kendall, "RuleType/k" + fn + ".score")
        dt.write1dArray(pvalues_kendall, "RuleType/k" + fn + ".pvalue")
        dt.write1dArray(phrases, "RuleType/" + fn + ".names")
        dt.write1dArray(averages, "RuleType/" + fn + ".averages")

Exemple #33

0

Afficher le fichier

Fichier : MovieTasks.py Projet : eygrr/RulesFromAuto-encoders

def removeGaps(file_name, new_file_name):
    new_file = []
    with open(file_name) as my_file:
        for num, line in enumerate(my_file, 1):
            new_file.append(line.strip())
    dt.write1dArray(new_file, new_file_name)

Exemple #34

0

Afficher le fichier

Fichier : Spearmanr.py Projet : ThomasAger/RulesFromAuto-encoders

    def __init__(self, direction_fn, ppmi_fn, phrases_fn, phrases_to_check_fn,
                 fn):
        ppmi = dt.importLabels(ppmi_fn)
        ppmi = np.asarray(ppmi)
        phrases = dt.importString(phrases_fn)

        indexes_to_get = []
        if phrases_to_check_fn != "":
            phrases_to_check = dt.importString(phrases_to_check_fn)
            for pc in range(len(phrases_to_check)):
                for p in range(len(phrases)):
                    if phrases_to_check[pc] == phrases[p][6:]:
                        indexes_to_get.append(p)
        indexes_to_get.sort()
        ppmi = ppmi.transpose()
        print len(ppmi), len(ppmi[0])
        scores = []
        pvalues = []
        scores_kendall = []
        pvalues_kendall = []
        agini = []
        agini1 = []
        angini1 = []
        angini = []
        amap = []
        andcg = []
        counter = 0
        averages = []
        with open(direction_fn) as f:
            for line in f:
                exists = True
                if phrases_to_check_fn != "":
                    exists = False
                    for i in indexes_to_get:
                        if i == counter:
                            exists = True
                            break
                if exists:
                    total = 0
                    amt = 0
                    direction = line.split()
                    for d in range(len(direction)):
                        direction[d] = float(direction[d])
                    new_direction = []
                    new_ppmi = []
                    direction_rank = np.argsort(direction)
                    ppmi_rank = np.argsort(ppmi[counter])
                    for d in range(len(ppmi[counter])):
                        if ppmi[counter][d] != 0:
                            total += ppmi[counter][d]
                            amt += 1
                            new_direction.append(direction_rank[d])
                            new_ppmi.append(ppmi_rank[d])
                    average = total / amt

                    min_max_scaler = preprocessing.MinMaxScaler()
                    normalized_ppmi = min_max_scaler.fit_transform(
                        ppmi[counter])
                    normalized_dir = min_max_scaler.fit_transform(direction)

                    ginis = gini(normalized_ppmi, normalized_dir)

                    ranked_ppmi = dt.sortByArray(new_ppmi, new_direction)
                    nr_ppmi = min_max_scaler.fit_transform(ranked_ppmi)
                    ndcgs = ndcg_at_k(nr_ppmi, len(nr_ppmi))

                    #binarizer = preprocessing.Binarizer()
                    #binary_ppmi = binarizer.transform(normalized_ppmi)
                    #normalized_dir = np.ndarray.tolist(normalized_dir)
                    map = 0  #average_precision_score(normalized_ppmi, normalized_dir)

                    rho, pvalue = spearmanr(new_ppmi, new_direction)
                    rhok, pvaluek = kendalltau(new_ppmi, new_direction)

                    scores.append(rho)
                    pvalues.append(pvalue)
                    scores_kendall.append(rhok)
                    pvalues_kendall.append(pvaluek)
                    andcg.append(ndcgs)
                    agini.append(ginis)
                    amap.append(map)
                    averages.append(average)
                    print phrases[counter] + ":", map, ginis

                counter += 1
        dt.write1dArray(scores, "RuleType/s" + fn + ".score")
        dt.write1dArray(pvalues, "RuleType/s" + fn + ".pvalue")
        dt.write1dArray(scores_kendall, "RuleType/k" + fn + ".score")
        dt.write1dArray(pvalues_kendall, "RuleType/k" + fn + ".pvalue")
        dt.write1dArray(phrases, "RuleType/" + fn + ".names")
        dt.write1dArray(averages, "RuleType/" + fn + ".averages")
        dt.write1dArray(agini, "RuleType/gn" + fn + ".score")
        dt.write1dArray(andcg, "RuleType/ndcg" + fn + ".score")
        dt.write1dArray(amap, "RuleType/map" + fn + ".score")