Example #1
0
 def process_report(self, filename, sanitised_streams, raw_streams):
     target = get_luigi_target(filename)
     sanitised_yaml_filename = os.path.basename(filename)
     if not sanitised_yaml_filename.endswith(".gz"):
         sanitised_yaml_filename = sanitised_yaml_filename + ".gz"
     sanitised_yaml = get_luigi_target(os.path.join(
         self.dst_public,
         "reports-sanitised",
         "yaml",
         self.date.strftime("%Y-%m-%d"),
         sanitised_yaml_filename
     )).open('w')
     logger.info("Sanitising %s" % filename)
     with target.open('r') as in_file:
         report = Report(in_file, self.bridge_db, target.path)
         for sanitised_entry, raw_entry in report.entries():
             try:
                 logger.debug("writing sanitised entry to stream")
                 sanitised_streams.write(json_dumps(sanitised_entry))
                 sanitised_streams.write("\n")
                 logger.debug("writing raw entry to stream")
                 raw_streams.write(json_dumps(raw_entry))
                 raw_streams.write("\n")
                 logger.debug("writing sanitised yaml file")
                 yaml_dump(sanitised_entry, sanitised_yaml)
             except Exception:
                 logger.error("error in dumping %s" % filename)
                 logger.error(traceback.format_exc())
     sanitised_yaml.close()
 def output(self):
     output = {}
     for report_file in self.report_files:
         dst = os.path.join(self.dst_private, os.path.basename(report_file))
         output[report_file] = get_luigi_target(
             dst, ssh_key_file=config.core.ssh_private_key_file,
             no_host_key_check=True)
     return output
Example #3
0
 def output(self):
     output_path = os.path.join(self.dst,
                                "{software_name}-{test_name}"
                                "-interesting-{date}.json".format(
                                    date=self.date,
                                    test_name=self.test_name,
                                    software_name=self.software_name))
     return get_luigi_target(output_path)
Example #4
0
 def output(self):
     sanitised_streams = get_luigi_target(os.path.join(
         self.dst_public,
         "reports-sanitised",
         "streams",
         self.date.strftime("%Y-%m-%d.json")
     ))
     raw_streams = get_luigi_target(os.path.join(
         self.dst_private,
         "reports-raw",
         "streams",
         self.date.strftime("%Y-%m-%d.json")
     ))
     return {
         "raw_streams": raw_streams,
         "sanitised_streams": sanitised_streams
     }
 def run(self):
     output = self.output()
     for report_file in self.report_files:
         logger.info("Copying %s to %s" % (report_file,
                                           output[report_file].path))
         t = get_luigi_target(report_file,
                              ssh_key_file=config.core.ssh_private_key_file,
                              no_host_key_check=True)
         with t.open('r') as in_file:
             out_file = output[report_file].open('w')
             shutil.copyfileobj(in_file, out_file)
             out_file.close()
         t.remove()
Example #6
0
    def run(self):
        with get_luigi_target(config.ooni.bridge_db_path).open('r') as f:
            self.bridge_db = json.load(f)

        output = self.output()
        raw_streams = output["raw_streams"].open('w')
        sanitised_streams = output["sanitised_streams"].open('w')

        reports_path = os.path.join(self.src,
                                    self.date.strftime("%Y-%m-%d"))
        logger.debug("listing path %s" % reports_path)
        for filename in list_report_files(reports_path,
                                          config.aws.access_key_id,
                                          config.aws.secret_access_key):
            logger.debug("got filename %s" % filename)
            try:
                self.process_report(filename, sanitised_streams, raw_streams)
            except Exception:
                logger.error("error in processing %s" % filename)
                logger.error(traceback.format_exc())
        raw_streams.close()
        sanitised_streams.close()
Example #7
0
 def input(self):
     return get_luigi_target(os.path.join(self.src, "%s.json" % self.date))