Example #1
0
    def __init__(self):
        #init DB connection
        from Database_Connection import Database_Connection
        database_connection = Database_Connection()

        #get list of words from the database
        self.list_of_words = database_connection.get_entries_as_list()
        words_from_database = database_connection.get_entries_as_list()
        
        #remove quotes because it breaks SQLite
        for i in range(0, len(words_from_database)):
            words_from_database[i] = words_from_database[i].encode('ascii','ignore')
            words_from_database[i] = words_from_database[i].replace("'", "") #todo - fix this shit
            words_from_database[i] += " "

        #add the words that were just downloaded from Facebook
        self.list_of_words.extend(self.load_words_from_json())
        words_from_json = self.load_words_from_json()
        
        #remove quotes because it breaks SQLite
        for i in range(0, len(words_from_json)):
            words_from_json[i] = words_from_json[i].encode('ascii','ignore')
            words_from_json[i] = words_from_json[i].replace("'", "") #todo - fix this shit
            words_from_json[i] += " "

        #remove duplicates
        from more_itertools import unique_everseen
        mega_list = self.list_of_words

        #normailze megalist for deleting duplicates
        mega_list = [string.strip() for string in mega_list]
        pruned_list = list(unique_everseen(mega_list))

        print "mega list - {0}".format(len(mega_list))
        print "pruned list - {0}".format(len(pruned_list))

        #remove quotes because it breaks SQLite
        for i in range(0, len(pruned_list)):
            pruned_list[i] = pruned_list[i].encode('ascii','ignore')
            pruned_list[i] = pruned_list[i].replace("'", "") #todo - fix this shit
            pruned_list[i] += " "

        database_connection.wipe_database()
        database_connection.add_list_to_database(pruned_list)
__author__ = 'ryancraig'

from more_itertools import unique_everseen
from Database_Connection import Database_Connection

database_connection = Database_Connection()

mega_list = database_connection.get_entries_as_list()
pruned_list = list(unique_everseen(mega_list))

print "mega list - {0}".format(len(mega_list))
print "pruned list - {0}".format(len(pruned_list))

database_connection.wipe_database()
database_connection.add_list_to_database(pruned_list)