Exemple #1
0
            def run(self):
                import rfprediction
                fn_t = self.parameters['filename_training']
                fn_i = self.parameters['filename_input']
                dir_out = self.parameters['directory_report']
                output_column = self.parameters.get('output_field', 'OUT')
                id_column = self.parameters.get('id_field', 'ID')
                num_trees = self.parameters.get('num_trees', 20)
                max_depth = self.parameters.get('max_depth', 4)
                iterations = self.parameters.get('iterations', 1)
                # if isinstance(iterations, int) is False: iterations = 1
                # if isinstance(num_trees, int) is False: num_trees = 20
                # if isinstance(max_depth, int) is False: max_depth = 4
                try:
                    summary = rfprediction.execute_analysis(training_file=fn_t,
                        diagnosis_file=fn_i,
                        mum_trees=num_trees,
                        max_depth=max_depth,
                        id_field=id_column,
                        output_field=output_column,
                        output=dir_out,
                        iterations=iterations)
#                    result = report_file
#                    summary = rfprediction.execute_analysis()
                    error = None
                    import rfreport
                    result = rfreport.generate_report_document(summary, dir_out)
                except Exception as e:
                    result = None
                    error = repr(e)
                event = MainPanel._event_class(error=error, result=result)
                wx.PostEvent(self.parent, event) # throw results via messaging system
Exemple #2
0
        try:
            predicted = get_decision_results(best_forest, predictionset, fields)
        except Exception as e:
            sys.stderr.write('error while prediction : ' + e)
            raise e

        # Save data
        summary = pack_json_results(trainingset, predictionset, fields, predicted, best_forest, best_tree, output_groups, weights, conditions)
    if args.key: # unique key for interaction with other processes
        summary['key'] = args.key
    if args.without_rawdata: # remove rawdata for privacy concern
        fields = [KEYWORD_ID, KEYWORD_OUTPUT]
        summary['trainingset'] = __trim_data_fields(summary['trainingset'], fields)
        summary['analysisset'] = __trim_data_fields(summary['trainingset'], fields)

    if dstdir.lower().endswith('.json'): # save JSON only
        filename = dstdir
        dstdir = None
        with open(filename, 'w') as fo:
            json.dump(summary, fo, indent=4, separators=(',', ': '))
    else: # HTML reports
        if os.path.exists(dstdir) is False:
            os.makedirs(dstdir)

        filename = os.path.join(dstdir, 'report_{}.json'.format(timestamp))
        with open(filename, 'w') as fo:
            json.dump(summary, fo, indent=4, separators=(',', ': '))
        # visualization
        import rfreport
        rfreport.generate_report_document(summary, dstdir, timestamp=timestamp)
Exemple #3
0
    conditions = {}
    conditions['Training data'] = args.t
    conditions['Analysis data'] = args.i
    conditions['Number of Trees'] = args.n
    conditions['Maximum depth'] = args.d
    conditions['Iteration'] = args.iteration

    if args.verbose:
        for key, value in conditions.items():
            sys.stderr.write('{}\t{}\n'.format(key, value))

    filename_prediction = args.i if args.i is not None else args.t
    iteration = args.iteration
    dstdir = args.o

    trainingset, predictionset, fields \
    = load_files_and_determine_fields(filename_training=args.t, filename_diagnosis=args.i, field_id=args.I, field_output=args.F, verbose=args.verbose)
    best_forest, best_tree, weights, output_groups = _obtain_forest(trainingset, predictionset, fields, args.n, args.d, iteration, args.verbose)
    if predictionset is None:
        predictionset = copy.deepcopy(trainingset)
    predicted = get_decision_results(best_forest, predictionset, fields)
    timestamp = __get_timestamp()
    filename = os.path.join(dstdir, 'report_{}.json'.format(timestamp))
    summary = save_json_results(filename, trainingset, predictionset, fields, predicted, best_forest, best_tree, output_groups, weights, conditions)

    import rfreport
    rfreport.generate_report_document(summary, dstdir)

#    execute_analysis(training_file=args.t, diagnosis_file=args.i, num_trees=args.n,
#        max_depth=args.d, output=args.o, output_field=args.F, id_field=args.I, verbose=args.verbose)