def archive_report(report_path): # zip files gz_name = os.path.split(report_path)[-1] gz_file = os.path.join(settings.archive_directory, gz_name) + ".gz" if os.path.isfile(gz_file): log.info("Archive does already exist, overwriting") log.info("Archiving report: " + report_path) infile = open(report_path, "rb") outfile = gzip.open(gz_file, "wb") outfile.writelines(infile) infile.close() outfile.close()
def archive_report(report_path): # zip files gz_name = os.path.split(report_path)[-1] gz_file = os.path.join(settings.archive_directory, gz_name) + ".gz" if os.path.isfile(gz_file): log.info("Archive does already exist, overwriting") log.info("Archiving report: " + report_path) infile = open(report_path, 'rb') outfile = gzip.open(gz_file, 'wb') outfile.writelines(infile) infile.close() outfile.close()
def sanitise_report(report_file, semaphore): match = re.search("^" + re.escape(settings.reports_directory) + "(.*)", report_file) # read report file report = Report(report_file) report_header = report.header report_header["report_file"] = match.group(1) report_filename = os.path.split(report_file)[-1] report_filename_sanitised = os.path.join(settings.sanitised_directory, report_filename) if os.path.isfile(report_filename_sanitised): log.info("Sanitised report name already exists, overwriting: %s" % report_filename_sanitised) else: log.info("New report file: %s" % report_filename_sanitised) report_file_sanitised = open(report_filename_sanitised, "w") safe_dump(report_header, report_file_sanitised, explicit_start=True, explicit_end=True) safe_dump_all(report, report_file_sanitised, explicit_start=True, explicit_end=True, default_flow_style=False) log.info("Moving original unsanitised file %s to archive" % report_file) archive_report(report_file) report_file_sanitised.close() report.close() os.remove(report_file) semaphore.release()
def main(): logfile = join(settings.sanitised_directory, "publish.log") fh = logging.FileHandler(logfile) log.addHandler(fh) manager = Manager() semaphore = manager.Semaphore(cpu_count()) pool = Pool(processes=cpu_count()) report_counter = 0 # iterate over report files report_files = list_report_files(settings.sanitised_directory) while True: try: semaphore.acquire() report_file = report_files.next() log.info("Importing %s" % report_file) pool.apply_async(ReportInserter, (report_file, semaphore)) report_counter += 1 except StopIteration: break log.info("Waiting for all the tasks to finish") pool.close() pool.join() log.info("Imported %d reports" % report_counter)
class ReportInserter(object): def __init__(self, report_file, semaphore): try: # Insert the report into the database self.fh = open(report_file) self._report = yaml.safe_load_all(self.fh) self.header = self._report.next() cc = self.header['probe_cc'] assert re.match("[a-zA-Z]{2}", cc) public_file = join(settings.public_directory, cc, basename(report_file)+".gz") self.header['report_file'] = public_file report = self.header report['measurements'] = [] self.rid = settings.db.reports.insert(report) test_name = self.header['test_name'] # Insert each measurement into the database for entry in self: entry = run_process(test_name, report_file, entry) settings.db.reports.update( {'_id': self.rid}, {'$push': {'measurements': entry} }) try: makedirs(dirname(public_file)) except OSError as exc: if exc.errno != 17: raise exc fsrc = open(report_file, 'rb') fdst = gzip.open(public_file, 'wb') shutil.copyfileobj(fsrc, fdst) fsrc.close() fdst.close() remove(report_file) except Exception, e: print e semaphore.release() log.info("Imported %s" % report_file)
def main(): if not os.path.isdir(settings.reports_directory): log.error(settings.reports_directory + " does not exist") sys.exit(1) logfile = os.path.join(settings.reports_directory, "sanitise.log") fh = logging.FileHandler(logfile) log.addHandler(fh) if not os.path.isdir(settings.archive_directory): log.error(settings.archive_directory + " does not exist") sys.exit(1) if not os.path.isfile(settings.bridge_db_mapping_file): log.error(settings.bridge_db_mapping_file + " does not exist") sys.exit(1) if not os.path.isdir(settings.sanitised_directory): log.error(settings.sanitised_directory + " does not exist") sys.exit(1) report_counter = 0 manager = Manager() semaphore = manager.Semaphore(cpu_count()) pool = Pool(processes=cpu_count()) # iterate over report files report_files = list_report_files(settings.reports_directory) while True: try: semaphore.acquire() report_file = report_files.next() pool.apply_async(sanitise_report, (report_file, semaphore)) report_counter += 1 except StopIteration: break log.info("Waiting for all the tasks to finish") pool.close() pool.join() if report_counter > 0: log.info(str(report_counter) + " reports archived") else: log.info("No reports were found in the reports directory: " + settings.reports_directory)
def sanitise_report(report_file, semaphore): match = re.search("^" + re.escape(settings.reports_directory) + "(.*)", report_file) # read report file report = Report(report_file) report_header = report.header report_header['report_file'] = match.group(1) report_filename = os.path.split(report_file)[-1] report_filename_sanitised = os.path.join(settings.sanitised_directory, report_filename) if os.path.isfile(report_filename_sanitised): log.info("Sanitised report name already exists, overwriting: %s" % report_filename_sanitised) else: log.info("New report file: %s" % report_filename_sanitised) report_file_sanitised = open(report_filename_sanitised, 'w') safe_dump(report_header, report_file_sanitised, explicit_start=True, explicit_end=True) safe_dump_all(report, report_file_sanitised, explicit_start=True, explicit_end=True, default_flow_style=False) log.info("Moving original unsanitised file %s to archive" % report_file) archive_report(report_file) report_file_sanitised.close() report.close() os.remove(report_file) semaphore.release()