class ReportIterator: def __init__(self, s3_files_to_do): self.store = Store() self.batch_ctr = 0 self.batch = [] self.s3_files_to_do = s3_files_to_do def __iter__(self): return self def __next__(self): if self.batch_ctr == len(self.batch): self.nextBatch() if self.batch_ctr < len(self.batch): return self.getNextEntry() raise StopIteration() def nextBatch(self): filename = self.s3_files_to_do and self.s3_files_to_do.pop() if filename: self.readBatch(filename) else: raise StopIteration() def readBatch(self, filename): local_filename = self.store.moveFileToLocal(filename) self.batch = self.store.read_local_csv(local_filename) self.batch_ctr = 0 def getNextEntry(self): result = self.batch[self.batch_ctr] self.batch_ctr += 1 return result
def testOneUsefulFileInDir(self, mockClass): mockClass.return_value.getFileList.return_value = [ 'records-RNTIR-00002-RNTIR-01647.js', 'useless.js' ] store = Store() self.assertEqual(store.getReportFileList(), ['records-RNTIR-00002-RNTIR-01647.js'])
def testgetFilesToProcess(self, mockClass): mockClass.return_value.getFileList.return_value = [ 'records-RNTIR-00002-RNTIR-01647.js', 'records-RNTIR-00002-RNTIR-01647-processed.js', 'useless.js', 'records-RNTIR-10002-RNTIR-21647.js' ] store = Store() self.assertEqual(['records-RNTIR-10002-RNTIR-21647.js'], store.getFilesToProcess())
def testProcessedFile(self, mockClass): # From live issue mockClass.return_value.getFileList.return_value = [ 'records-RNTIR-01644-RNTIR-01647-processed.csv', 'records-RNTIR-01644-RNTIR-01647.csv', 'records-RNTIR-01644-RNTIR-01648.csv' ] store = Store() self.assertEqual(['records-RNTIR-01644-RNTIR-01647-processed.csv'], store.getProcessedFileList())
def testOne1FileInDir(self, mockClass): mockClass.return_value.getFileList.return_value = [ 'records-RNTIR-00002-RNTIR-01647.js', 'records-RNTIR-00002-RNTIR-01647-processed.js' ] store = Store() self.assertEqual(store.getReportFileList(), ['records-RNTIR-00002-RNTIR-01647.js']) self.assertEqual(store.getProcessedFileList(), ['records-RNTIR-00002-RNTIR-01647-processed.js'])
class SemanticAnalysis: def __init__(self): self.store = Store() self.naive_Bayes_analyser = Blobber(analyzer=NaiveBayesAnalyzer()) # self.naive_Bayes_analyser('def').sentiment self.pattern_analyser = Blobber() def processFile(self, filename): local_input_filename = self.store.filenameAsLocal(filename) csv_list = self.store.read_local_csv(local_input_filename) local_output_filename = self.store.filenameAsProcessed( local_input_filename) output_csv = [] for line in csv_list: pass output_csv.append(self.processLine(line)) self.store.write_local_csv(local_output_filename, output_csv) def processLine(self, line): ptn_assessed = self.pattern_analyser(line[2]).sentiment return [line[0], line[1], '', ptn_assessed.polarity]
class Semantic: def __init__(self): self.store = Store() self.semantic_analysis = SemanticAnalysis() def execute(self): files_to_process = self.getFilesToProcess() self.analyseReports(files_to_process) def getFilesToProcess(self): return self.store.getFilesToProcess() def analyseReports(self, files_to_process): for filename in files_to_process: print(f'Processing "{filename}"') self.store.moveFileToLocal(filename) self.semantic_analysis.processFile(filename) self.store.moveFileToS3(self.store.filenameAsProcessed(filename)) def processLine(self, line): print(line)
def __init__(self, s3_files_to_do): self.store = Store() self.batch_ctr = 0 self.batch = [] self.s3_files_to_do = s3_files_to_do
def __init__(self): self.store = Store() self.naive_Bayes_analyser = Blobber(analyzer=NaiveBayesAnalyzer()) # self.naive_Bayes_analyser('def').sentiment self.pattern_analyser = Blobber()
def testNoFileInDir(self, mockClass): mockClass.return_value.getFileList.return_value = [] store = Store() self.assertEqual(store.getReportFileList(), [])
def __init__(self): self.store = Store() self.semantic_analysis = SemanticAnalysis()