def main(): # Create Problem polynomial = Polynomial(num_terms=5, num_dimensions=2) log.info(polynomial) # Define Search Range search_range = SearchRange() search_range.set_feasible_range(10000, 2) # Solvers # brute_force_solver = BruteForceSolver(search_range) genetic_algorithm_solver = GeneticAlgorithmSolver(search_range) gui = Gui(polynomial, search_range) # benchmark = Benchmark([brute_force_solver, genetic_algorithm_solver]) benchmark = Benchmark([genetic_algorithm_solver]) # Benchmark benchmark.evaluate(polynomial) # Visualize Solvers # gui.create_animation(brute_force_solver) animation = gui.create_animation(genetic_algorithm_solver) # Create Default Visual # gui.plot_problem() gui.show()
def run(max_df, min_df, max_features, lowercase, stop_words, analyzer, strip_accents, use_idf, sublinear_tf, norm, data_provider): count_vector = CountVectorizer(min_df=min_df, max_df=max_df, max_features=max_features, lowercase=lowercase, stop_words=stop_words, analyzer=analyzer, strip_accents=strip_accents) tfidf_transformer = TfidfTransformer(use_idf=use_idf, sublinear_tf=sublinear_tf, norm=norm) feature_extractor = FeatureExtractor(PorterStemmer(), count_vector, tfidf_transformer) benchmark = Benchmark(DataProvider.get_data_provider('reuters'), feature_extractor, False) return benchmark.evaluate()
def run_benchmark(root_folder, times, feasible_ranges, polynomial=None): if not os.path.exists(root_folder): log.info('Creating Folder') os.makedirs(root_folder) # Create Problem if not polynomial: polynomial = Polynomial(num_terms=5, num_dimensions=2) log.info(polynomial) for feasible_range in feasible_ranges: search_range = SearchRange() search_range.set_feasible_range(feasible_range, 2) folder = os.path.join(root_folder, 'feasible_range_{}'.format(feasible_range)) solution_payload = {} if not os.path.exists(folder): os.mkdir(folder) for i in range(times): # Create Folder run_folder = os.path.join(folder, 'run_{}'.format(i)) if not os.path.exists(run_folder): os.mkdir(run_folder) # Solvers brute_force_solver = BruteForceSolver(search_range) genetic_algorithm_solver = GeneticAlgorithmSolver(search_range) solvers = [brute_force_solver, genetic_algorithm_solver] # Benchmark benchmark = Benchmark(solvers) benchmark.evaluate(polynomial) # Add to Benchmark for solver in solvers: metric = benchmark.get_metrics(solver._id) add_to_benchmark_payload(solution_payload, solver, metric) # Save Animations gui = Gui(polynomial, search_range) genetic_animation = gui.create_animation( genetic_algorithm_solver) genetic_animation.save(os.path.join(run_folder, 'genetic.gif'), writer='imagemagick', fps=30) brute_animation = gui.create_animation(brute_force_solver) brute_animation.save(os.path.join(run_folder, 'brute.gif'), writer='imagemagick', fps=30) # Save Problem figure, plot = gui.plot_problem() figure.savefig(os.path.join(run_folder, 'problem.png')) # Pickle benchmark && solvers && polynomial pickle_object( genetic_algorithm_solver, os.path.join(run_folder, 'genetic_solver.pickle')) pickle_object(brute_force_solver, os.path.join(run_folder, 'brute_solver.pickle')) pickle_object(benchmark, os.path.join(run_folder, 'brute_solver.pickle')) pickle_object(gui, os.path.join(run_folder, 'gui.pickle')) # Clear Matplotlib figures gui.close() pickle_object(solution_payload, os.path.join(folder, 'benchmark.pickle'))
from benchmark import Benchmark from data_provider import NewsgroupsDataProvider, ReutersDataProvider from nltk import PorterStemmer, WordNetLemmatizer from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer from feature_extractor import FeatureExtractor max_df = 0.50 # ignore terms that appear in more than 50% of the documents min_df = 0.01 # ignore terms that appear in less than 1% of the documents count_vector = CountVectorizer(min_df=min_df, max_df=max_df, max_features=1000, lowercase=True, stop_words='english', analyzer='word', strip_accents='unicode') tfidf_transformer = TfidfTransformer(use_idf=True, sublinear_tf=True, norm=None) feature_extractor = FeatureExtractor(PorterStemmer(), count_vector, tfidf_transformer) # dataProvider = NewsgroupsDataProvider() dataProvider = ReutersDataProvider() benchmark = Benchmark(dataProvider, feature_extractor) print('evaluation started') benchmark.evaluate()