def basicSetup(self, ngram_order=10): self.uc = unnaturalCode(logFilePath=self.logFilePath) # Oiugh... thank you, dependecy injection. self.cm = mitlmCorpus(readCorpus=self.readCorpus, writeCorpus=self.readCorpus, uc=self.uc, order=ngram_order) self.sm = sourceModel(cm=self.cm, language=self.lm)
def testDefaultCorpusEnv(self): cm = mitlmCorpus() dir=os.path.dirname(cm.readCorpus) self.assertTrue(os.access(dir, os.X_OK & os.R_OK & os.W_OK)) self.assertTrue(os.path.isdir(dir)) dir=os.path.dirname(cm.writeCorpus) self.assertTrue(os.access(dir, os.X_OK & os.R_OK & os.W_OK)) self.assertTrue(os.path.isdir(dir))
def setUpClass(self): self.td = mkdtemp(prefix='ucTest-') assert os.access(self.td, os.X_OK & os.R_OK & os.W_OK) assert os.path.isdir(self.td) readCorpus = os.path.join(self.td, 'ucCorpus') logFilePath = os.path.join(self.td, 'ucLogFile') self.uc = unnaturalCode(logFilePath=logFilePath) self.cm = mitlmCorpus(readCorpus=readCorpus, writeCorpus=readCorpus, uc=ucGlobal) self.sm = sourceModel(cm=self.cm, language=pythonSource)
def __init__(self, cm=mitlmCorpus(), language=pythonSource, windowSize=20): self.cm = cm self.lang = language self.windowSize = windowSize self.listOfUniqueTokens = {} self.uTokenFile = self.cm.writeCorpus + ".uniqueTokens" readTokenFile = self.cm.readCorpus + ".uniqueTokens" if os.path.isfile(readTokenFile): with open(readTokenFile, "rb") as f: self.listOfUniqueTokens = pickle.load(f)
def __init__(self, ngram_order=10): self.homeDir = os.path.expanduser("~") self.ucDir = os.getenv("UC_DATA", os.path.join(self.homeDir, ".unnaturalCode")) if not os.path.exists(self.ucDir): os.makedirs(self.ucDir) assert os.access(self.ucDir, os.X_OK & os.R_OK & os.W_OK) assert os.path.isdir(self.ucDir) self.readCorpus = os.path.join(self.ucDir, 'pyCorpus') self.logFilePath = os.path.join(self.ucDir, 'pyLogFile') self.uc = unnaturalCode(logFilePath=self.logFilePath) # Oiugh... thank you, dependecy injection. self.cm = mitlmCorpus(readCorpus=self.readCorpus, writeCorpus=self.readCorpus, uc=self.uc, order=ngram_order) self.lm = pythonSource self.sm = sourceModel(cm=self.cm, language=self.lm)
def __init__(self, cm=mitlmCorpus(), language=pythonSource, windowSize=20): self.cm = cm self.lang = language self.windowSize = windowSize
def testCorpify(self): sm = sourceModel(cm=mitlmCorpus()) self.assertEquals(sm.corpify(pythonSource(someLexemes)), 'print ( 1 + 2 ** 2 ) <ENDMARKER>')
def testEnvMitlm(self): cm = mitlmCorpus() self.assertTrue(os.access(cm.estimateNgramPath, os.X_OK & os.R_OK))