def __init__(self, source=None, language=pythonSource, resultsDir=None, corpus=mitlmCorpus): self.resultsDir = ((resultsDir or os.getenv("ucResultsDir", None)) or mkdtemp(prefix='ucValidation-')) if isinstance(source, str): raise NotImplementedError elif isinstance(source, list): self.validFileNames = source else: raise TypeError("Constructor arguments!") assert os.access(self.resultsDir, os.X_OK & os.R_OK & os.W_OK) self.csvPath = path.join(self.resultsDir, 'results.csv') self.progress = dict() try: self.csvFile = open(self.csvPath, 'r') self.csv = csv.reader(self.csvFile) for row in self.csv: if row[0] in self.progress: self.progress[row[0]] += 1 else: self.progress[row[0]] = 1 self.csvFile.close() except (IOError): pass self.csvFile = open(self.csvPath, 'a') self.csv = csv.writer(self.csvFile) self.corpusPath = os.path.join(self.resultsDir, 'validationCorpus') self.cm = corpus(readCorpus=self.corpusPath, writeCorpus=self.corpusPath, order=10) self.lm = language self.sm = sourceModel(cm=self.cm, language=self.lm) self.validFiles = list() self.addValidationFile(self.validFileNames) self.genCorpus()
def basicSetup(self, ngram_order=10): self.uc = unnaturalCode(logFilePath=self.logFilePath) # Oiugh... thank you, dependecy injection. self.cm = mitlmCorpus(readCorpus=self.readCorpus, writeCorpus=self.readCorpus, uc=self.uc, order=ngram_order) self.sm = sourceModel(cm=self.cm, language=self.lm)
def setUpClass(self): self.td = mkdtemp(prefix='ucTest-') assert os.access(self.td, os.X_OK & os.R_OK & os.W_OK) assert os.path.isdir(self.td) readCorpus = os.path.join(self.td, 'ucCorpus') logFilePath = os.path.join(self.td, 'ucLogFile') self.uc = unnaturalCode(logFilePath=logFilePath) self.cm = mitlmCorpus(readCorpus=readCorpus, writeCorpus=readCorpus, uc=ucGlobal) self.sm = sourceModel(cm=self.cm, language=pythonSource)
def __init__(self, ngram_order=10): self.homeDir = os.path.expanduser("~") self.ucDir = os.getenv("UC_DATA", os.path.join(self.homeDir, ".unnaturalCode")) if not os.path.exists(self.ucDir): os.makedirs(self.ucDir) assert os.access(self.ucDir, os.X_OK & os.R_OK & os.W_OK) assert os.path.isdir(self.ucDir) self.readCorpus = os.path.join(self.ucDir, 'pyCorpus') self.logFilePath = os.path.join(self.ucDir, 'pyLogFile') self.uc = unnaturalCode(logFilePath=self.logFilePath) # Oiugh... thank you, dependecy injection. self.cm = mitlmCorpus(readCorpus=self.readCorpus, writeCorpus=self.readCorpus, uc=self.uc, order=ngram_order) self.lm = pythonSource self.sm = sourceModel(cm=self.cm, language=self.lm)
def __init__(self, test=None, train=None, language=pythonSource, resultsDir=None, corpus=mitlmCorpus, keep=False, retry_valid=False): self.resultsDir = ((resultsDir or os.getenv("ucResultsDir", None)) or mkdtemp(prefix='ucValidation-')) self.retry_valid = retry_valid if isinstance(test, str): raise NotImplementedError elif isinstance(test, list): self.testFileNames = test else: raise TypeError("Constructor arguments!") if isinstance(train, str): raise NotImplementedError elif isinstance(train, list): self.trainFileNames = train else: raise TypeError("Constructor arguments!") assert os.access(self.resultsDir, os.X_OK & os.R_OK & os.W_OK) self.csvPath = path.join(self.resultsDir, 'results.csv') self.progress = dict() try: self.csvFile = open(self.csvPath, 'r') self.csv = csv.reader(self.csvFile) for row in self.csv: if (row[0], row[1]) in self.progress: self.progress[(row[0], row[1])] += 1 else: self.progress[(row[0], row[1])] = 1 self.csvFile.close() except (IOError): pass self.csvFile = open(self.csvPath, 'a') self.csv = csv.writer(self.csvFile) self.corpusPath = os.path.join(self.resultsDir, 'validationCorpus') if keep: pass elif os.path.exists(self.corpusPath): os.remove(self.corpusPath) if keep: pass elif os.path.exists(self.corpusPath + ".uniqueTokens"): os.remove(self.corpusPath + ".uniqueTokens") self.cm = corpus(readCorpus=self.corpusPath, writeCorpus=self.corpusPath, order=10) self.lm = language self.sm = sourceModel(cm=self.cm, language=self.lm) self.trainFiles = list() self.testFiles = list() self.addValidationFile(self.trainFileNames, testing=False, training=True) self.genCorpus() del self.trainFiles self.addValidationFile(self.testFileNames, testing=True, training=False)
def testCorpify(self): sm = sourceModel(cm=mitlmCorpus()) self.assertEquals(sm.corpify(pythonSource(someLexemes)), 'print ( 1 + 2 ** 2 ) <ENDMARKER>')