Exemple #1
0
 def _write_set(self, sset, apps_sizes):
     for app, size in apps_sizes:
         stage_stats = StageStats.get_stats(app.stages)
         # Bug in bytes read metrics reported by Spark
         stage_stats[1] = size
         self._writer.writerow([len(app.slaves), sset, size,
                                app.records_read, app.duration,
                                Parser.fits_in_memory(app)] + stage_stats)
Exemple #2
0
def get_row(log):
    """Return a row using only one LogParser instance."""
    parser = LogParser()
    app = parser.parse_file(log)
    input_bytes = app.bytes_read
    input_records = app.records_read
    sset = HBKmeansParser.get_set(input_records)

    stage_stats = StageStats.get_stats(app.stages)
    return [len(app.slaves), sset, input_bytes, input_records, app.duration,
            Parser.fits_in_memory(app)] + stage_stats
Exemple #3
0
    def run(self):
        """Parse logs and extract relevant information."""
        self.start()

        # CSV files
        csv_gen = CSVGen()
        app = HBSortParser.get_app()
        stage_titles = StageStats.get_titles(app.stages)
        header = ['workers', 'set', 'input_bytes', 'input_records',
                  'duration_ms', 'in_memory'] + stage_titles
        writer = csv_gen.get_writer(header, self.filename)

        for app in HBSortParser.get_apps():
            size = app.bytes_read
            sset = HBSortParser.get_set(size)
            stage_stats = StageStats.get_stats(app.stages)
            row = [len(app.slaves), sset, size, app.records_read, app.duration,
                   Parser.fits_in_memory(app)] + stage_stats
            writer.writerow(row)

        csv_gen.close()
        self.finish()