Ejemplo n.º 1
0
def q(word):
    """INPUT: Takes word
       writes embedding to file
    """
    
    #word embeddings txt file
    embedding_file = 'word_embeddings.txt'

    #get word embedding from conceptnet
    embedding = Conceptnet.embed_word(word)

    #write embedding and word to file
    IO.write_to_file(embedding_file,word+':'+str(embedding))
Ejemplo n.º 2
0
 def test_ensdir(self):
     #Test a bad folder name
     path = "!@#$%^&*():;[]{}+-`~\"/|"
     with self.assertRaises(WindowsError):
         IO.ensdir(path)
     
     import os, os.path
     #Make sure the folder isn't there
     path = "/test_ensdir_folder"
     if os.path.isdir(path): os.rmdir(path)
     
     self.assertFalse(os.path.isdir(path))
     IO.ensdir(path)
     self.assertTrue(os.path.isdir(path))
     os.rmdir(path)
Ejemplo n.º 3
0
    def test_ensdir(self):
        #Test a bad folder name
        path = "!@#$%^&*():;[]{}+-`~\"/|"
        with self.assertRaises(WindowsError):
            IO.ensdir(path)

        import os, os.path
        #Make sure the folder isn't there
        path = "/test_ensdir_folder"
        if os.path.isdir(path): os.rmdir(path)

        self.assertFalse(os.path.isdir(path))
        IO.ensdir(path)
        self.assertTrue(os.path.isdir(path))
        os.rmdir(path)
Ejemplo n.º 4
0
 def test_ensfile(self):
     #Test a bad filename
     filename = "!@#$%^&*():;[]{}.txt"
     with self.assertRaises(IOError):
         IO.ensfile(filename)
     
     import os, os.path
     #Make sure the folder isn't there
     path = "/test_ensdir_folder"
     filename = "/test_ensdir_folder/test_ensfile.txt"
     if os.path.isfile(filename): os.remove(filename)
     if os.path.isdir(path): os.rmdir(path)
     
     self.assertFalse(os.path.isfile(filename))
     IO.ensfile(filename)
     self.assertTrue(os.path.isfile(filename))
     os.remove(filename)
     os.rmdir(path)
Ejemplo n.º 5
0
    def test_ensfile(self):
        #Test a bad filename
        filename = "!@#$%^&*():;[]{}.txt"
        with self.assertRaises(IOError):
            IO.ensfile(filename)

        import os, os.path
        #Make sure the folder isn't there
        path = "/test_ensdir_folder"
        filename = "/test_ensdir_folder/test_ensfile.txt"
        if os.path.isfile(filename): os.remove(filename)
        if os.path.isdir(path): os.rmdir(path)

        self.assertFalse(os.path.isfile(filename))
        IO.ensfile(filename)
        self.assertTrue(os.path.isfile(filename))
        os.remove(filename)
        os.rmdir(path)
Ejemplo n.º 6
0
 def test_bufcount(self):
     nline = 12011
     data = "\n"*nline
     filename = "test_bufcount.txt"
     with open(filename, 'w') as f:
         f.write(data)
     
     expected = nline
     actual = IO.bufcount(filename)
     self.assertEqual(actual, expected)
     import os
     os.remove(filename)
Ejemplo n.º 7
0
    def test_bufcount(self):
        nline = 12011
        data = "\n" * nline
        filename = "test_bufcount.txt"
        with open(filename, 'w') as f:
            f.write(data)

        expected = nline
        actual = IO.bufcount(filename)
        self.assertEqual(actual, expected)
        import os
        os.remove(filename)
Ejemplo n.º 8
0
 def test_remove_file(self):
     #Test file that doesn't exist
     result = IO.remove_file("sadksal21r45ewq90&&&&%%!@####:::")
     self.assertFalse(result)
     
     #Test file that does exist
     IO.ensfile("test_remove_file.exe")
     result = IO.remove_file("test_remove_file.exe")
     self.assertTrue(result)
     
     #Test file that exists, but can't be removed.
     IO.ensfile("test_remove_file.txt")
     with open("test_remove_file.txt") as f:
         result = IO.remove_file("test_remove_file.txt")
         self.assertFalse(result)
     result = IO.remove_file("test_remove_file.txt")
     self.assertTrue(result)
Ejemplo n.º 9
0
    def test_remove_file(self):
        #Test file that doesn't exist
        result = IO.remove_file("sadksal21r45ewq90&&&&%%!@####:::")
        self.assertFalse(result)

        #Test file that does exist
        IO.ensfile("test_remove_file.exe")
        result = IO.remove_file("test_remove_file.exe")
        self.assertTrue(result)

        #Test file that exists, but can't be removed.
        IO.ensfile("test_remove_file.txt")
        with open("test_remove_file.txt") as f:
            result = IO.remove_file("test_remove_file.txt")
            self.assertFalse(result)
        result = IO.remove_file("test_remove_file.txt")
        self.assertTrue(result)
Ejemplo n.º 10
0
def main():
    """main method
    """

    #use all cpu cores for parallelizing
    nc = cpu_count()

    #read 501 posts contained in csv file
    posts = IO.read_csv('data/posts.csv',501)

    #extract just the text strings
    ps = [(','.join(p.split(',')[1:-1]))[3:-3] for p in posts]

    #remove stop words
    ps_wo_stopwords = [NLP.remove_stop_words(s) for s in ps]

    #get unique words
    u_words = tqdm(NLP.get_unique_words(ps_wo_stopwords))

    #calculate number of unique words
    n_u_words = len(u_words)

    #embed unique words using conceptnet parallelized
    embeddings = Parallel(n_jobs = nc)(delayed(q)(u) for u in u_words)