class ReportIterator:
    def __init__(self, s3_files_to_do):
        self.store = Store()
        self.batch_ctr = 0
        self.batch = []    
        self.s3_files_to_do = s3_files_to_do

    def __iter__(self):
        return self

    def __next__(self):
        if self.batch_ctr == len(self.batch):
            self.nextBatch()
        if self.batch_ctr < len(self.batch):
            return self.getNextEntry()
        raise StopIteration()
    
    def nextBatch(self):
        filename = self.s3_files_to_do and self.s3_files_to_do.pop()
        if filename:
            self.readBatch(filename)
        else:
            raise StopIteration()

    def readBatch(self, filename):
        local_filename = self.store.moveFileToLocal(filename)
        self.batch = self.store.read_local_csv(local_filename)
        self.batch_ctr = 0

    def getNextEntry(self):
        result = self.batch[self.batch_ctr]
        self.batch_ctr += 1
        return result
Exemple #2
0
 def testOneUsefulFileInDir(self, mockClass):
     mockClass.return_value.getFileList.return_value = [
         'records-RNTIR-00002-RNTIR-01647.js', 'useless.js'
     ]
     store = Store()
     self.assertEqual(store.getReportFileList(),
                      ['records-RNTIR-00002-RNTIR-01647.js'])
Exemple #3
0
 def testgetFilesToProcess(self, mockClass):
     mockClass.return_value.getFileList.return_value = [
         'records-RNTIR-00002-RNTIR-01647.js',
         'records-RNTIR-00002-RNTIR-01647-processed.js', 'useless.js',
         'records-RNTIR-10002-RNTIR-21647.js'
     ]
     store = Store()
     self.assertEqual(['records-RNTIR-10002-RNTIR-21647.js'],
                      store.getFilesToProcess())
Exemple #4
0
 def testProcessedFile(self, mockClass):
     # From live issue
     mockClass.return_value.getFileList.return_value = [
         'records-RNTIR-01644-RNTIR-01647-processed.csv',
         'records-RNTIR-01644-RNTIR-01647.csv',
         'records-RNTIR-01644-RNTIR-01648.csv'
     ]
     store = Store()
     self.assertEqual(['records-RNTIR-01644-RNTIR-01647-processed.csv'],
                      store.getProcessedFileList())
Exemple #5
0
 def testOne1FileInDir(self, mockClass):
     mockClass.return_value.getFileList.return_value = [
         'records-RNTIR-00002-RNTIR-01647.js',
         'records-RNTIR-00002-RNTIR-01647-processed.js'
     ]
     store = Store()
     self.assertEqual(store.getReportFileList(),
                      ['records-RNTIR-00002-RNTIR-01647.js'])
     self.assertEqual(store.getProcessedFileList(),
                      ['records-RNTIR-00002-RNTIR-01647-processed.js'])
Exemple #6
0
class SemanticAnalysis:
    def __init__(self):
        self.store = Store()
        self.naive_Bayes_analyser = Blobber(analyzer=NaiveBayesAnalyzer())
        #        self.naive_Bayes_analyser('def').sentiment
        self.pattern_analyser = Blobber()

    def processFile(self, filename):
        local_input_filename = self.store.filenameAsLocal(filename)
        csv_list = self.store.read_local_csv(local_input_filename)
        local_output_filename = self.store.filenameAsProcessed(
            local_input_filename)
        output_csv = []
        for line in csv_list:
            pass
            output_csv.append(self.processLine(line))
        self.store.write_local_csv(local_output_filename, output_csv)

    def processLine(self, line):
        ptn_assessed = self.pattern_analyser(line[2]).sentiment
        return [line[0], line[1], '', ptn_assessed.polarity]
Exemple #7
0
class Semantic:
    def __init__(self):
        self.store = Store()
        self.semantic_analysis = SemanticAnalysis()

    def execute(self):
        files_to_process = self.getFilesToProcess()
        self.analyseReports(files_to_process)

    def getFilesToProcess(self):
        return self.store.getFilesToProcess()

    def analyseReports(self, files_to_process):
        for filename in files_to_process:
            print(f'Processing "{filename}"')
            self.store.moveFileToLocal(filename)
            self.semantic_analysis.processFile(filename)
            self.store.moveFileToS3(self.store.filenameAsProcessed(filename))

    def processLine(self, line):
        print(line)
 def __init__(self, s3_files_to_do):
     self.store = Store()
     self.batch_ctr = 0
     self.batch = []    
     self.s3_files_to_do = s3_files_to_do
Exemple #9
0
 def __init__(self):
     self.store = Store()
     self.naive_Bayes_analyser = Blobber(analyzer=NaiveBayesAnalyzer())
     #        self.naive_Bayes_analyser('def').sentiment
     self.pattern_analyser = Blobber()
Exemple #10
0
 def testNoFileInDir(self, mockClass):
     mockClass.return_value.getFileList.return_value = []
     store = Store()
     self.assertEqual(store.getReportFileList(), [])
Exemple #11
0
 def __init__(self):
     self.store = Store()
     self.semantic_analysis = SemanticAnalysis()