Exemple #1
0
    def __init__(self, file_path, store_title):
        """ Ctor """

        object.__init__(self)

        # State
        self.title = None
        # Time
        self.start_time = time.time()
        self.end_time = None
        # Loaded entries
        self.entries = []
        # ClassifierResultGroup objects (name, classifier, result)
        self.classifier_results = []
        # OtherResult objects (file_name lines)
        self.other_result_files = []

        # StorerAndPrinter - stores and prints ;)
        time_printer = util.prtr.TimePrinter(name="exp")
        self.storer_printer = util.prtr.StorerAndPrinter(printer=time_printer)

        # Paths and name
        self.file_path = os.path.expanduser(file_path)
        self.input_file_name = os.path.basename(self.file_path)
        experiment_dir_name = None

        if not os.path.lexists(self.file_path):
            util.outp.exit_on_error("Input file not found: %s" %
                                    self.file_path)

        self.title = store_title
        if self.title is None:
            random_num_str = "".join(
                str(x) for x in (random.sample(range(0, 15), 5)))
            self.title = "Experiment %s" % random_num_str

        experiment_dir_name = Dir.remove_disallowed_characters(
            self.title.lower())
        experiment_dir_name += time.strftime("_%m-%d_%H-%M")
        self.experiment_dir_path = self.get_experiment_folder(
            experiment_dir_name)

        if os.path.lexists(self.experiment_dir_path):
            self.experiment_dir_path = Dir.uniquify(self.experiment_dir_path)
Exemple #2
0
def _sample(file_path, number_of_elements, limit_to):
	""" Sample <number_of_elements> from the given file. """

	print("Sampling...")

	target_file_path = "%s_%s-sample" % (file_path, number_of_elements)

	if not os.path.lexists(file_path):
		raise IOError("Input file doesn't exist")

	target_file_path = Dir.uniquify(target_file_path)

	line_generator = Dir.yield_lines(file_path)

	log_lines = None
	if limit_to is None:
		log_lines = ids_tools.reservoir_sample(line_generator, number_of_elements)
	else:
		log_lines = ids_tools.reservoir_sample_limit(line_generator, number_of_elements, limit_to)

	Dir.write_lines(target_file_path, log_lines)

	print("Done. Wrote to file:\n%s" % target_file_path)
Exemple #3
0
    def store_experiment(self):
        """ Store the results saved in this class in our experiment directory. """

        self.end_time = time.time()
        self.storer_printer.prt("Storing experiment results...")

        Dir.ensure_folder_exists(self.experiment_dir_path)

        entry_file_path = os.path.join(self.experiment_dir_path,
                                       "used_entries")
        result_file_path = os.path.join(self.experiment_dir_path, "result")
        stdout_file_path = os.path.join(self.experiment_dir_path, "stdout")
        classifiers_file_path = os.path.join(self.experiment_dir_path,
                                             "classifiers")
        file_paths = [
            entry_file_path, result_file_path, stdout_file_path,
            classifiers_file_path
        ]
        other_result_files_paths = []
        for file_name, _ in self.other_result_files:
            oth_res_path_creation = os.path.join(self.experiment_dir_path,
                                                 file_name)
            oth_res_path_creation = Dir.uniquify(oth_res_path_creation)
            other_result_files_paths.append(oth_res_path_creation)

        if any([
                os.path.lexists(x)
                for x in file_paths + other_result_files_paths
        ]):
            raise IOError("One of the files exists: %s" %
                          (file_paths + other_result_files_paths))

        self.storer_printer.prt("Data verified. Storing utilised entries...")

        # Create new file with my entries
        saved_path = idse_dao.save_entries(entry_file_path, self.entries)

        self.storer_printer.prt("Done. Analysing file...")

        # Analyse that file
        log_file_analysis.analyse(saved_path,
                                  to_file=True,
                                  output_printer=util.prtr.Storer())

        self.storer_printer.prt("Done. Saving classifiers...")

        # Save trained classifiers
        classifier_lines = self.create_classifier_lines()
        Dir.write_lines(classifiers_file_path, classifier_lines)

        self.storer_printer.prt("Done. Saving result digest...")

        # Save the result
        result_lines = self.create_result_lines()
        Dir.write_lines(result_file_path, result_lines)

        if self.other_result_files:
            for oth_res_path, (oth_res_name,
                               oth_res_lines) in zip(other_result_files_paths,
                                                     self.other_result_files):
                self.storer_printer.prt("Saving others: %s..." % oth_res_name)
                Dir.write_lines(oth_res_path, oth_res_lines)

        self.storer_printer.prt("Done!")
        self.storer_printer.prt("Experiment stored in: %s" %
                                self.experiment_dir_path)

        # Save the stdout (tee replacement)
        stdout_lines = self.storer_printer.get_messages()
        Dir.write_lines(stdout_file_path, stdout_lines)