def getVectorsKeywords(movie_strings, keywords):
    multi_dictionary = {}
    dict_mapping = {}
    movie_names = []
    file_names = dt.getAllFileNames("filmdata\KeywordData\Movie_Most_Common_Keyword_Mapping")
    for i in movie_strings:
        movie_names.append(i.strip()[:-5])
        print i
    print "Mapping to memory."
    for i in file_names:
        try:
            file = open("filmdata/KeywordData/Movie_Most_Common_Keyword_Mapping/" +i, "r")
            lines = file.readlines()
            dict_mapping[movie_strings[int(i)]] = i
            for line in lines:
                line = line.strip()
                multi_dictionary[(movie_strings[int(i)], line)] = 1
            file.close()
        except IOError:
            print movie_names[i]

    for up in range(len(keywords)):
        keywords[up] = keywords[up].strip()

    print len("Iterating over memory.")
    for p in range(len(keywords)):
        vector = [0 for x in range(len(movie_strings))]
        print len(vector)
        for key, value in multi_dictionary.iteritems():
            if key[1] == keywords[p]:
                #print int(dict_mapping[key[0]])
                vector[int(dict_mapping[key[0]])] = 1
        print keywords[p]
        dt.write1dArray(vector, "filmdata/classesKeywords/NewData/class-" + keywords[p])
def writeMissing(folder_name):
    print "?"
    file_names = dt.getAllFileNames(folder_name)
    standard = range(15000)
    missing = []
    for i in standard:
        found = False
        for f in file_names:
            if int(f) == int(i):
                found = True
                break
        if found:
            print "found", i
        else:
            missing.append(i)
            print "no found", i
    dt.write1dArray(missing, "filmdata/MISSING_KEYWORD_ITEMS.txt")