def main(): sentences = shijian.List_Consensus([ "This is a test.", "This test.", "This is a test.", "This is not a test.", "This is a test.", "This is a test.", "This is a test.", "This is a test.", "This is a test.", "This is a test." ]) print("list:\n{list}".format(list=sentences)) print("list size: {size}".format(size=sys.getsizeof(sentences))) print( "list consensus: {consensus}".format(consensus=sentences.consensus())) print("ensure list size...") sentences.ensure_size() pyprel.print_line() print("append to list multiple times while ensuring list size...") print("list:\n{list}".format(list=sentences)) print("append to list while ensuring list size...") sentences.append("This is another test.") print("list:\n{list}".format(list=sentences)) print("append to list while ensuring list size...") sentences.append("This is another test.") print("list:\n{list}".format(list=sentences)) print("append to list while ensuring list size...") sentences.append("This is another test.") print("list:\n{list}".format(list=sentences)) print("append to list while ensuring list size...") sentences.append("This is another test.") print("list:\n{list}".format(list=sentences)) print("append to list while ensuring list size...") sentences.append("This is another test.") print("list:\n{list}".format(list=sentences)) pyprel.print_line() print("list:\n{list}".format(list=sentences))
def engage( self ): if not self.silent: pyprel.print_line() # logo if self.display_logo: if not self.silent: log.info(pyprel.center_string(text = self.logo)) pyprel.print_line() # engage alert if self.name: if not self.silent: log.info("initiate {name}".format( name = self.name )) # version if self.version: if not self.silent: log.info("version: {version}".format( version = self.version )) if not self.silent: log.info("initiation time: {time}".format( time = clock.start_time() ))
def main(options): global program program = propyte.Program(options=options, name=name, version=version, logo=logo) global log from propyte import log expression = options["--expression"] word_vector_model = options["--wordvectormodel"] model_word2vec = abstraction.load_word_vector_model( filename=word_vector_model) sentences = [ "What are you dirty hooers doing on my planet?", "What time is it?", "What can you do?", "Change the color from red to black.", "All those moments will be lost in time.", "All of those moments will be lost in time.", "All of those moments are to be lost in time." ] result = most_similar_expression(expression=expression, expressions=sentences, model_word2vec=model_word2vec) pyprel.print_line() log.info( "input expression: {expression}".format(expression=expression)) log.info("most similar expression: {expression}".format(expression=result)) pyprel.print_line() program.terminate()
def main(options): global program program = propyte.Program(options=options, name=name, version=version, logo=logo) global log from propyte import log # access options and arguments database_filename = options["--database"] log.info("") database = abstraction.access_database(filename=database_filename) for table in database.tables: log.info("\ntable: {table}/n".format(table=table)) for entry in database[table].all(): pyprel.print_line() for column in database[table].columns: log.info("\n{column}: {content}".format(column=column, content=str( entry[column]))) pyprel.print_line() log.info("") program.terminate()
def main(): pyprel.print_line() print("\nexample: accessing a palette and extending it\n") palette_name = "palette1" print("access palette {name}".format(name=palette_name)) palette = pyprel.access_palette(name=palette_name) print("palette colors default: {colors}".format(colors=palette)) minimum_number_of_colors_needed = 3 print( "extend palette to ensure that it has at least {number} colors".format( number=minimum_number_of_colors_needed)) palette.extend_palette( minimum_number_of_colors_needed=minimum_number_of_colors_needed) print("palette colors: {colors}".format(colors=palette)) minimum_number_of_colors_needed = 20 print( "extend palette to ensure that it has at least {number} colors".format( number=minimum_number_of_colors_needed)) palette.extend_palette( minimum_number_of_colors_needed=minimum_number_of_colors_needed) print("palette colors: {colors}".format(colors=palette)) pyprel.print_line()
def main(): pyprel.print_line() print("\nexample: accessing a palette and extending it\n") palette_name = "palette1" print("access palette {name}".format(name = palette_name)) palette = pyprel.access_palette(name = palette_name) print("palette colors default: {colors}".format(colors = palette)) minimum_number_of_colors_needed = 3 print( "extend palette to ensure that it has at least {number} colors".format( number = minimum_number_of_colors_needed )) palette.extend_palette( minimum_number_of_colors_needed = minimum_number_of_colors_needed ) print("palette colors: {colors}".format(colors = palette)) minimum_number_of_colors_needed = 20 print( "extend palette to ensure that it has at least {number} colors".format( number = minimum_number_of_colors_needed )) palette.extend_palette( minimum_number_of_colors_needed = minimum_number_of_colors_needed ) print("palette colors: {colors}".format(colors = palette)) pyprel.print_line()
def terminate(self): clock.stop() if not self.silent: log.info("termination time: {time}".format(time=clock.stop_time())) log.info("time statistics report:\n{report}".format( report=shijian.clocks.report())) log.info("terminate {name}".format(name=self.name)) pyprel.print_line() sys.exit()
def terminate(self): clock.stop() log.info("termination time: {time}".format(time=clock.stop_time())) log.info("time full report:\n{report}".format( report=shijian.clocks.report(style="full"))) log.info("time statistics report:\n{report}".format( report=shijian.clocks.report())) log.info("terminate {name}".format(name=self.name)) pyprel.print_line()
def engage(self): pyprel.print_line() # logo if self.display_logo: log.info(pyprel.center_string(text=self.logo)) pyprel.print_line() # engage alert if self.name: log.info("initiate {name}".format(name=self.name)) # version if self.version: log.info("version: {version}".format(version=self.version)) log.info("initiation time: {time}".format(time=clock.start_time()))
def main(): sentences = shijian.List_Consensus([ "This is a test.", "This test.", "This is a test.", "This is not a test.", "This is a test.", "This is a test.", "This is a test.", "This is a test.", "This is a test.", "This is a test." ]) print("list:\n{list}".format(list = sentences)) print("list size: {size}".format(size = sys.getsizeof(sentences))) print("list consensus: {consensus}".format(consensus = sentences.consensus())) print("ensure list size...") sentences.ensure_size() pyprel.print_line() print("append to list multiple times while ensuring list size...") print("list:\n{list}".format(list = sentences)) print("append to list while ensuring list size...") sentences.append("This is another test.") print("list:\n{list}".format(list = sentences)) print("append to list while ensuring list size...") sentences.append("This is another test.") print("list:\n{list}".format(list = sentences)) print("append to list while ensuring list size...") sentences.append("This is another test.") print("list:\n{list}".format(list = sentences)) print("append to list while ensuring list size...") sentences.append("This is another test.") print("list:\n{list}".format(list = sentences)) print("append to list while ensuring list size...") sentences.append("This is another test.") print("list:\n{list}".format(list = sentences)) pyprel.print_line() print("list:\n{list}".format(list = sentences))
def main(): pyprel.print_line() number = 1234567890123 print("number {number} in English text:\n\n{number_text}".format( number=number, number_text=shijian.number_to_English_text(number))) pyprel.print_line() text = "It is 03:14 and I have 3 apples in 400 wormholes." print("replace numbers with English text in the following text:\n{text}\n". format(text=text)) print(shijian.replace_numbers_in_text_with_English_text(text=text)) pyprel.print_line() text = "I've ten important invasions to consider." print( "change contractions to full words and change numbers to digits in the following text:\n{text}\n" .format(text=text)) print( shijian. replace_contractions_with_full_words_and_replace_numbers_with_digits( text=text)) pyprel.print_line()
def main(): pyprel.print_line() number = 1234567890123 print("number {number} in English text:\n\n{number_text}".format( number = number, number_text = shijian.number_to_English_text(number) )) pyprel.print_line() text = "It is 03:14 and I have 3 apples in 400 wormholes." print("replace numbers with English text in the following text:\n{text}\n".format( text = text )) print(shijian.replace_numbers_in_text_with_English_text(text = text)) pyprel.print_line() text = "I've ten important invasions to consider." print("change contractions to full words and change numbers to digits in the following text:\n{text}\n".format( text = text )) print(shijian.replace_contractions_with_full_words_and_replace_numbers_with_digits(text = text)) pyprel.print_line()
def restart( self ): clock.stop() if not self.silent: log.info("termination time: {time}".format( time = clock.stop_time() )) log.info("time statistics report:\n{report}".format( report = shijian.clocks.report() )) log.info("terminate {name}".format( name = self.name )) pyprel.print_line() restart()
def main(): pyprel.print_line() print("time styles:") datetime_object_current_time_UTC = datetime.datetime.utcnow() styles = [ "YYYY-MM-DDTHHMMSSZ", "YYYY-MM-DDTHHMMZ", "YYYY-MM-DDTHHMMSSMMMMMMZ", "YYYY-MM-DD HH:MM:SS UTC", "UNIX time S.SSSSSS", "UNIX time S", "day DD month YYYY", "HH:MM day DD month YYYY", "HH:MM:SS day DD month YYYY", "day DD month YYYY HH:MM:SS", "HH hours MM minutes SS seconds day DD month YYYY", "DD:HH:MM", "DD:HH:MM:SS", "HH:MM:SS", "HH hours MM minutes SS seconds" ] for style in styles: print("\nstyle: {style}".format( style = style )) print(shijian.style_datetime_object( datetime_object = datetime_object_current_time_UTC, style = style )) pyprel.print_line() print("current time UTC:\n") print(shijian.time_UTC(style = "HH hours MM minutes SS sounds day DD month YYYY")) pyprel.print_line() print("minimal time style for seconds:\n") for seconds in [10, 100, 1000, 10000, 100000]: print("{seconds} seconds: {seconds_styled}".format( seconds = seconds, seconds_styled = shijian.style_minimal_seconds(seconds) )) pyprel.print_line()
def main(options): global program program = propyte.Program( options = options, name = name, version = version, logo = logo ) global log from propyte import log # access options and arguments database_filename = options["--database"] log.info("") database = abstraction.access_database( filename = database_filename ) for table in database.tables: log.info("\ntable: {table}/n".format( table = table )) for entry in database[table].all(): pyprel.print_line() for column in database[table].columns: log.info("\n{column}: {content}".format( column = column, content = str(entry[column]) )) pyprel.print_line() log.info("") program.terminate()
def main(options): global program program = Program(options=options) # Print the program options dictionary and the program configuration # dictionary. pyprel.print_line() log.info("program options dictionary:") pyprel.print_dictionary(dictionary=program.options) pyprel.print_line() log.info("program configuration dictionary:") pyprel.print_dictionary(dictionary=program.configuration) pyprel.print_line() # Access a value of the program configuration dictionary. log.info("accessing a value of the program configuration") if "attribute1" in program.configuration["settings1"]["item1"]: log.info("attribute1 of item2 of settings1: {attribute}".format( attribute=program.configuration["settings1"]["item1"] ["attribute1"])) # Access a value of the program configuration dictionary that does not exist # and then assign to it a default value. log.info("accessing a nonexistent value of the program configuration") log.info("attribute3 of item1 of settings1: {attribute}".format( attribute=program.configuration["settings1"]["item1"].get( "attribute3", "nonexistent"))) # Loop over multiple values of the program configuration dictionary. log.info("loading items of settings1") for name, attributes in program.configuration["settings1"].iteritems(): log.info("loading item {name}".format(name=name)) log.info("attributes of item 2 of settings 1: {attributes}".format( attributes=attributes)) log.debug("message at level DEBUG") log.info("message at level INFO") log.warning("message at level WARNING") log.error("message at level ERROR") log.critical("message at level CRITICAL") # activity time.sleep(2) log.info("\nrun function 1 three times...") for run_number in xrange(1, 4): log.info("function 1 run {run_number} result: {result}".format( run_number=run_number, result=function_1())) log.info("") program.terminate()
def main(options): global program program = propyte.Program(options=options, name=name, version=version, logo=logo) global log from propyte import log log.info("") # access options and arguments ROOT_filename_ttH = options["--datattH"] ROOT_filename_ttbb = options["--datattbb"] engage_plotting = string_to_bool(options["--plot"]) log.info("ttH data file: {filename}".format(filename=ROOT_filename_ttH)) log.info("ttbb data file: {filename}".format(filename=ROOT_filename_ttbb)) # Access data for event classes ttbb and ttH. data_ttbb = abstraction.load_HEP_data(ROOT_filename=ROOT_filename_ttbb, tree_name="nominal", maximum_number_of_events=None) data_ttH = abstraction.load_HEP_data(ROOT_filename=ROOT_filename_ttH, tree_name="nominal", maximum_number_of_events=None) if engage_plotting is True: # Plot the loaded datasets. for variable_name in data_ttbb.variables(): log.info( "plot ttbb versus ttH comparison of {variable_name}".format( variable_name=variable_name)) datavision.save_histogram_comparison_matplotlib( values_1=data_ttbb.values(name=variable_name), values_2=data_ttH.values(name=variable_name), label_1=variable_name + "_ttbb", label_2=variable_name + "_ttH", normalize=True, label_ratio_x="frequency", label_y="", title=variable_name + "_ttbb_ttH", filename=variable_name + "_ttbb_ttH.png") # upcoming: consider data ordering # Preprocess all data (to be updated). data_ttbb.preprocess_all() data_ttH.preprocess_all() # Add class labels to the data sets, 0 for ttbb and 1 for ttH. for index in data_ttbb.indices(): data_ttbb.variable(index=index, name="class", value=0) for index in data_ttH.indices(): data_ttH.variable(index=index, name="class", value=1) # Convert the data sets to a simple list format with the first column # containing the class label. _data = [] for index in data_ttbb.indices(): _data.append([ data_ttbb.variable(index=index, name="el_1_pt"), data_ttbb.variable(index=index, name="el_1_eta"), data_ttbb.variable(index=index, name="el_1_phi"), data_ttbb.variable(index=index, name="jet_1_pt"), data_ttbb.variable(index=index, name="jet_1_eta"), data_ttbb.variable(index=index, name="jet_1_phi"), data_ttbb.variable(index=index, name="jet_1_e"), data_ttbb.variable(index=index, name="jet_2_pt"), data_ttbb.variable(index=index, name="jet_2_eta"), data_ttbb.variable(index=index, name="jet_2_phi"), data_ttbb.variable(index=index, name="jet_2_e"), data_ttbb.variable(index=index, name="met"), data_ttbb.variable(index=index, name="met_phi"), data_ttbb.variable(index=index, name="nJets"), data_ttbb.variable(index=index, name="Centrality_all"), #data_ttbb.variable(index = index, name = "Mbb_MindR") ]) _data.append([data_ttbb.variable(name="class")]) for index in data_ttH.indices(): _data.append([ data_ttH.variable(index=index, name="el_1_pt"), data_ttH.variable(index=index, name="el_1_eta"), data_ttH.variable(index=index, name="el_1_phi"), data_ttH.variable(index=index, name="jet_1_pt"), data_ttH.variable(index=index, name="jet_1_eta"), data_ttH.variable(index=index, name="jet_1_phi"), data_ttH.variable(index=index, name="jet_1_e"), data_ttH.variable(index=index, name="jet_2_pt"), data_ttH.variable(index=index, name="jet_2_eta"), data_ttH.variable(index=index, name="jet_2_phi"), data_ttH.variable(index=index, name="jet_2_e"), data_ttH.variable(index=index, name="met"), data_ttH.variable(index=index, name="met_phi"), data_ttH.variable(index=index, name="nJets"), data_ttH.variable(index=index, name="Centrality_all"), #data_ttH.variable(index = index, name = "Mbb_MindR") ]) _data.append([data_ttH.variable(name="class")]) dataset = abstraction.Dataset(data=_data) log.info("") # define data log.info("split data for cross-validation") features_train, features_test, targets_train, targets_test =\ cross_validation.train_test_split( dataset.features(), dataset.targets(), train_size = 0.7 ) # grid search import itertools epochs = [100, 100000] architecture = [200, 300, 300, 200] grid_search_map = {} grid_search_map["epoch"] = [] grid_search_map["hidden_nodes"] = [] grid_search_map["score_training"] = [] grid_search_map["score_test"] = [] # define progress count_total = 0 for epoch in epochs: for nodes_count in xrange(1, len(architecture) + 1): combinations = itertools.product(architecture, repeat=nodes_count) for combination in combinations: count_total += 1 count = 0 progress = shijian.Progress() progress.engage_quick_calculation_mode() for epoch in epochs: for nodes_count in xrange(1, len(architecture) + 1): combinations = itertools.product(architecture, repeat=nodes_count) for combination in combinations: hidden_nodes = list(combination) # define model log.info("define classification model") classifier = abstraction.Classification( number_of_classes=2, hidden_nodes=hidden_nodes, epochs=epoch) # train model log.info("fit model to dataset features and targets") classifier._model.fit(features_train, targets_train) #classifier.save() # predict and cross-validate training log.info("test trained model on training dataset") score_training = metrics.accuracy_score( classifier._model.predict(features_train), targets_train) score_test = metrics.accuracy_score( classifier._model.predict(features_test), targets_test) log.info("\ntraining-testing instance complete:") log.info("epoch: {epoch}".format(epoch=epoch)) log.info("architecture: {architecture}".format( architecture=hidden_nodes)) log.info("score training: {score_training}".format( score_training=100 * score_training)) log.info("score test: {score_test}".format(score_test=100 * score_test)) pyprel.print_line() grid_search_map["epoch"].append(epoch) grid_search_map["hidden_nodes"].append(hidden_nodes) grid_search_map["score_training"].append(score_training) grid_search_map["score_test"].append(score_test) # save current grid search map shijian.export_object(grid_search_map, filename="grid_search_map.pkl", overwrite=True) count += 1 print(progress.add_datum(fraction=(count + 1) / count_total)) number_of_entries = len(grid_search_map["epoch"]) # table table_contents = [[ "epoch", "architecture", "score training", "score testing" ]] for index in range(0, number_of_entries): table_contents.append([ str(grid_search_map["epoch"][index]), str(grid_search_map["hidden_nodes"][index]), str(grid_search_map["score_training"][index]), str(grid_search_map["score_test"][index]) ]) print("\ngrid search map:\n") print(pyprel.Table(contents=table_contents, )) # plot architectures = shijian.unique_list_elements( grid_search_map["hidden_nodes"]) architecture_epoch_score = {} for architecture in architectures: architecture_epoch_score[str(architecture)] = [] for index in range(0, number_of_entries): if grid_search_map["hidden_nodes"][index] == architecture: architecture_epoch_score[str(architecture)].append([ grid_search_map["epoch"][index], grid_search_map["score_test"][index] ]) figure = matplotlib.pyplot.figure() figure.set_size_inches(10, 10) axes = figure.add_subplot(1, 1, 1) axes.set_xscale("log") figure.suptitle("hyperparameter map", fontsize=20) matplotlib.pyplot.xlabel("epochs") matplotlib.pyplot.ylabel("training test score") for key, value in architecture_epoch_score.iteritems(): epochs = [element[0] for element in value] score_test = [element[1] for element in value] matplotlib.pyplot.plot(epochs, score_test, label=key) matplotlib.pyplot.legend(loc="center right") matplotlib.pyplot.savefig("hyperparameter_map.eps", bbox_inches="tight", format="eps") # find best-scoring models # Find the 3 best scores. best_models = sorted(zip(grid_search_map["score_test"], grid_search_map["hidden_nodes"]), reverse=True)[:3] # table table_contents = [["architecture", "score testing"]] for model in best_models: table_contents.append([str(model[1]), str(model[0])]) print("\nbest-scoring models:\n") print(pyprel.Table(contents=table_contents, )) log.info("") program.terminate()
def main(options): global program program = propyte.Program( options = options, name = name, version = version, logo = logo ) global log from propyte import log log.info("") # access options and arguments ROOT_filename_ttH = options["--datattH"] ROOT_filename_ttbb = options["--datattbb"] engage_plotting = string_to_bool(options["--plot"]) log.info("ttH data file: {filename}".format( filename = ROOT_filename_ttH )) log.info("ttbb data file: {filename}".format( filename = ROOT_filename_ttbb )) # Access data for event classes ttbb and ttH. data_ttbb = abstraction.load_HEP_data( ROOT_filename = ROOT_filename_ttbb, tree_name = "nominal", maximum_number_of_events = None ) data_ttH = abstraction.load_HEP_data( ROOT_filename = ROOT_filename_ttH, tree_name = "nominal", maximum_number_of_events = None ) if engage_plotting is True: # Plot the loaded datasets. for variable_name in data_ttbb.variables(): log.info("plot ttbb versus ttH comparison of {variable_name}".format( variable_name = variable_name )) datavision.save_histogram_comparison_matplotlib( values_1 = data_ttbb.values(name = variable_name), values_2 = data_ttH.values(name = variable_name), label_1 = variable_name + "_ttbb", label_2 = variable_name + "_ttH", normalize = True, label_ratio_x = "frequency", label_y = "", title = variable_name + "_ttbb_ttH", filename = variable_name + "_ttbb_ttH.png" ) # upcoming: consider data ordering # Preprocess all data (to be updated). data_ttbb.preprocess_all() data_ttH.preprocess_all() # Add class labels to the data sets, 0 for ttbb and 1 for ttH. for index in data_ttbb.indices(): data_ttbb.variable(index = index, name = "class", value = 0) for index in data_ttH.indices(): data_ttH.variable(index = index, name = "class", value = 1) # Convert the data sets to a simple list format with the first column # containing the class label. _data = [] for index in data_ttbb.indices(): _data.append([ data_ttbb.variable(index = index, name = "el_1_pt"), data_ttbb.variable(index = index, name = "el_1_eta"), data_ttbb.variable(index = index, name = "el_1_phi"), data_ttbb.variable(index = index, name = "jet_1_pt"), data_ttbb.variable(index = index, name = "jet_1_eta"), data_ttbb.variable(index = index, name = "jet_1_phi"), data_ttbb.variable(index = index, name = "jet_1_e"), data_ttbb.variable(index = index, name = "jet_2_pt"), data_ttbb.variable(index = index, name = "jet_2_eta"), data_ttbb.variable(index = index, name = "jet_2_phi"), data_ttbb.variable(index = index, name = "jet_2_e"), data_ttbb.variable(index = index, name = "met"), data_ttbb.variable(index = index, name = "met_phi"), data_ttbb.variable(index = index, name = "nJets"), data_ttbb.variable(index = index, name = "Centrality_all"), #data_ttbb.variable(index = index, name = "Mbb_MindR") ]) _data.append([ data_ttbb.variable(name = "class") ]) for index in data_ttH.indices(): _data.append([ data_ttH.variable(index = index, name = "el_1_pt"), data_ttH.variable(index = index, name = "el_1_eta"), data_ttH.variable(index = index, name = "el_1_phi"), data_ttH.variable(index = index, name = "jet_1_pt"), data_ttH.variable(index = index, name = "jet_1_eta"), data_ttH.variable(index = index, name = "jet_1_phi"), data_ttH.variable(index = index, name = "jet_1_e"), data_ttH.variable(index = index, name = "jet_2_pt"), data_ttH.variable(index = index, name = "jet_2_eta"), data_ttH.variable(index = index, name = "jet_2_phi"), data_ttH.variable(index = index, name = "jet_2_e"), data_ttH.variable(index = index, name = "met"), data_ttH.variable(index = index, name = "met_phi"), data_ttH.variable(index = index, name = "nJets"), data_ttH.variable(index = index, name = "Centrality_all"), #data_ttH.variable(index = index, name = "Mbb_MindR") ]) _data.append([ data_ttH.variable(name = "class") ]) dataset = abstraction.Dataset(data = _data) log.info("") # define data log.info("split data for cross-validation") features_train, features_test, targets_train, targets_test =\ cross_validation.train_test_split( dataset.features(), dataset.targets(), train_size = 0.7 ) # grid search import itertools epochs = [100, 100000] architecture = [200, 300, 300, 200] grid_search_map = {} grid_search_map["epoch"] = [] grid_search_map["hidden_nodes"] = [] grid_search_map["score_training"] = [] grid_search_map["score_test"] = [] # define progress count_total = 0 for epoch in epochs: for nodes_count in xrange(1, len(architecture) + 1): combinations = itertools.product(architecture, repeat = nodes_count) for combination in combinations: count_total += 1 count = 0 progress = shijian.Progress() progress.engage_quick_calculation_mode() for epoch in epochs: for nodes_count in xrange(1, len(architecture) + 1): combinations = itertools.product(architecture, repeat = nodes_count) for combination in combinations: hidden_nodes = list(combination) # define model log.info("define classification model") classifier = abstraction.Classification( number_of_classes = 2, hidden_nodes = hidden_nodes, epochs = epoch ) # train model log.info("fit model to dataset features and targets") classifier._model.fit(features_train, targets_train) #classifier.save() # predict and cross-validate training log.info("test trained model on training dataset") score_training = metrics.accuracy_score( classifier._model.predict(features_train), targets_train ) score_test = metrics.accuracy_score( classifier._model.predict(features_test), targets_test ) log.info("\ntraining-testing instance complete:") log.info("epoch: {epoch}".format( epoch = epoch )) log.info("architecture: {architecture}".format( architecture = hidden_nodes )) log.info("score training: {score_training}".format( score_training = 100 * score_training )) log.info("score test: {score_test}".format( score_test = 100 * score_test )) pyprel.print_line() grid_search_map["epoch"].append(epoch) grid_search_map["hidden_nodes"].append(hidden_nodes) grid_search_map["score_training"].append(score_training) grid_search_map["score_test"].append(score_test) # save current grid search map shijian.export_object( grid_search_map, filename = "grid_search_map.pkl", overwrite = True ) count += 1 print(progress.add_datum(fraction = (count + 1) / count_total)) number_of_entries = len(grid_search_map["epoch"]) # table table_contents = [ ["epoch", "architecture", "score training", "score testing"] ] for index in range(0, number_of_entries): table_contents.append( [ str(grid_search_map["epoch"][index]), str(grid_search_map["hidden_nodes"][index]), str(grid_search_map["score_training"][index]), str(grid_search_map["score_test"][index]) ] ) print("\ngrid search map:\n") print( pyprel.Table( contents = table_contents, ) ) # plot architectures = shijian.unique_list_elements(grid_search_map["hidden_nodes"]) architecture_epoch_score = {} for architecture in architectures: architecture_epoch_score[str(architecture)] = [] for index in range(0, number_of_entries): if grid_search_map["hidden_nodes"][index] == architecture: architecture_epoch_score[str(architecture)].append( [ grid_search_map["epoch"][index], grid_search_map["score_test"][index] ] ) figure = matplotlib.pyplot.figure() figure.set_size_inches(10, 10) axes = figure.add_subplot(1, 1, 1) axes.set_xscale("log") figure.suptitle("hyperparameter map", fontsize = 20) matplotlib.pyplot.xlabel("epochs") matplotlib.pyplot.ylabel("training test score") for key, value in architecture_epoch_score.iteritems(): epochs = [element[0] for element in value] score_test = [element[1] for element in value] matplotlib.pyplot.plot(epochs, score_test, label = key) matplotlib.pyplot.legend(loc = "center right") matplotlib.pyplot.savefig( "hyperparameter_map.eps", bbox_inches = "tight", format = "eps" ) # find best-scoring models # Find the 3 best scores. best_models = sorted(zip( grid_search_map["score_test"], grid_search_map["hidden_nodes"]), reverse = True )[:3] # table table_contents = [["architecture", "score testing"]] for model in best_models: table_contents.append([str(model[1]), str(model[0])]) print("\nbest-scoring models:\n") print( pyprel.Table( contents = table_contents, ) ) log.info("") program.terminate()
def main(options): global program program = propyte.Program( options = options, name = name, version = version, logo = logo ) global log from propyte import log log.info("") # access options and arguments input_data_filename = options["--data"] # define dataset # Load the SUSY dataset (https://archive.ics.uci.edu/ml/datasets/SUSY). # The first column is the class label (1 for signal, 0 for background), # followed by 18 features (8 low-level features and 10 high-level features): # # - lepton 1 pT # - lepton 1 eta # - lepton 1 phi # - lepton 2 pT # - lepton 2 eta # - lepton 2 phi # - missing energy magnitude # - missing energy phi # - MET_rel # - axial MET # - M_R # - M_TR_2 # - R # - MT2 # - S_R # - M_Delta_R # - dPhi_r_b # - cos(theta_r1) data = abstraction.access_SUSY_dataset_format_file(input_data_filename) dataset = abstraction.Dataset( data = data ) # define data log.info("split data for cross-validation") features_train, features_test, targets_train, targets_test =\ cross_validation.train_test_split( dataset.features(), dataset.targets(), train_size = 0.7 ) # grid search import itertools epochs = [10, 100, 500, 1000] architecture = [200, 300, 300, 300, 200] grid_search_map = {} grid_search_map["epoch"] = [] grid_search_map["hidden_nodes"] = [] grid_search_map["score_training"] = [] grid_search_map["score_test"] = [] # define progress count_total = 0 for epoch in epochs: for nodes_count in xrange(1, len(architecture) + 1): combinations = itertools.product(architecture, repeat = nodes_count) for combination in combinations: count_total += 1 count = 0 progress = shijian.Progress() progress.engage_quick_calculation_mode() for epoch in epochs: for nodes_count in xrange(1, len(architecture) + 1): combinations = itertools.product(architecture, repeat = nodes_count) for combination in combinations: hidden_nodes = list(combination) # define model log.info("define classification model") classifier = abstraction.Classification( number_of_classes = 2, hidden_nodes = hidden_nodes, epochs = epoch ) # train model log.info("fit model to dataset features and targets") classifier._model.fit(features_train, targets_train) #classifier.save() # predict and cross-validate training log.info("test trained model on training dataset") score_training = metrics.accuracy_score( classifier._model.predict(features_train), targets_train ) score_test = metrics.accuracy_score( classifier._model.predict(features_test), targets_test ) log.info("\ntraining-testing instance complete:") log.info("epoch: {epoch}".format( epoch = epoch )) log.info("architecture: {architecture}".format( architecture = hidden_nodes )) log.info("score training: {score_training}".format( score_training = 100 * score_training )) log.info("score test: {score_test}".format( score_test = 100 * score_test )) pyprel.print_line() grid_search_map["epoch"].append(epoch) grid_search_map["hidden_nodes"].append(hidden_nodes) grid_search_map["score_training"].append(score_training) grid_search_map["score_test"].append(score_test) # save current grid search map shijian.export_object( grid_search_map, filename = "grid_search_map.pkl", overwrite = True ) count += 1 print(progress.add_datum(fraction = (count + 1) / count_total)) number_of_entries = len(grid_search_map["epoch"]) # table table_contents = [ ["epoch", "architecture", "score training", "score testing"] ] for index in range(0, number_of_entries): table_contents.append( [ str(grid_search_map["epoch"][index]), str(grid_search_map["hidden_nodes"][index]), str(grid_search_map["score_training"][index]), str(grid_search_map["score_test"][index]) ] ) print("\ngrid search map:\n") print( pyprel.Table( contents = table_contents, ) ) # plot architectures = shijian.unique_list_elements(grid_search_map["hidden_nodes"]) architecture_epoch_score = {} for architecture in architectures: architecture_epoch_score[str(architecture)] = [] for index in range(0, number_of_entries): if grid_search_map["hidden_nodes"][index] == architecture: architecture_epoch_score[str(architecture)].append( [ grid_search_map["epoch"][index], grid_search_map["score_test"][index] ] ) figure = matplotlib.pyplot.figure() figure.set_size_inches(10, 10) axes = figure.add_subplot(1, 1, 1) axes.set_xscale("log") figure.suptitle("hyperparameter map", fontsize = 20) matplotlib.pyplot.xlabel("epochs") matplotlib.pyplot.ylabel("training test score") for key, value in architecture_epoch_score.iteritems(): epochs = [element[0] for element in value] score_test = [element[1] for element in value] matplotlib.pyplot.plot(epochs, score_test, label = key) matplotlib.pyplot.legend(loc = "center right") matplotlib.pyplot.savefig( "hyperparameter_map.eps", bbox_inches = "tight", format = "eps" ) # find best-scoring models # Find the 3 best scores. best_models = sorted(zip( grid_search_map["score_test"], grid_search_map["hidden_nodes"]), reverse = True )[:3] # table table_contents = [["architecture", "score testing"]] for model in best_models: table_contents.append([str(model[1]), str(model[0])]) print("\nbest-scoring models:\n") print( pyprel.Table( contents = table_contents, ) ) log.info("") program.terminate()
def main(): pyprel.print_line() print("current time UTC:\n") print( shijian.style_datetime_object( datetime_object = datetime.datetime.utcnow(), style = "HH hours MM minutes SS sounds day DD month YYYY" ) ) pyprel.print_line() year_ICHEP_2016 = 2016 month_ICHEP_2016 = 8 day_ICHEP_2016 = 3 datetime_object_ICHEP_2016_time = datetime.datetime( year_ICHEP_2016, month_ICHEP_2016, day_ICHEP_2016 ) datetime_object_current_time_UTC = datetime.datetime.utcnow() datetime_object_current_time_UTC_to_ICHEP_2016_time =\ datetime_object_ICHEP_2016_time - datetime_object_current_time_UTC print("time to ICHEP 2016 (DD:HH:MM:SS):\n") print( shijian.style_datetime_object( datetime_object = datetime_object_current_time_UTC_to_ICHEP_2016_time, style = "{DD}:{HH}:{MM}:{SS}" ) ) pyprel.print_line() current_time_UTC = shijian.style_datetime_object( datetime_object = datetime_object_current_time_UTC, style = "DD:HH:MM:SS" ) print("current time UTC:") print(pyprel.render_segment_display(text = current_time_UTC)) print(" D D H H M M S S") pyprel.print_line() current_time_to_ICHEP_2016_time = shijian.style_datetime_object( datetime_object = datetime_object_current_time_UTC_to_ICHEP_2016_time, style = "{DD}:{HH}:{MM}:{SS}" ) print("time to ICHEP 2016:") print(pyprel.render_segment_display(text = current_time_to_ICHEP_2016_time)) print(" D D H H M M S S") pyprel.print_line() timestamp = 1487600377.0 print("convert UNIX timestamp {timestamp} to YYYY-MM-DDTHHMM".format( timestamp = timestamp )) print(shijian.style_UNIX_timestamp( timestamp = timestamp, style = "YYYY-MM-DDTHHMMZ" )) pyprel.print_line()
def main(): print("\nexample: printout of dictionary") get_input("Press Enter to continue.") information = { "sample information": { "ID": 169888, "name": "ttH", "number of events": 124883, "cross section": 0.055519, "k factor": 1.0201, "generator": "pythia8", "variables": { "trk_n": 147, "zappo_n": 9001 } } } pyprel.print_line() pyprel.print_dictionary(dictionary = information) pyprel.print_line() print(pyprel.dictionary_string(dictionary = information)) pyprel.print_line() print("\nexample: printout of existing logo") get_input("Press Enter to continue.") text = ( " ____ _ _____ _ \n" " / ___|___ | | ___ _ __| ___| | _____ __ \n" " | | / _ \| |/ _ \| '__| |_ | |/ _ \ \ /\ / / \n" " | |__| (_) | | (_) | | | _| | | (_) \ V V / \n" " \____\___/|_|\___/|_| |_| |_|\___/ \_/\_/ " ) pyprel.print_center(text = text) print("\nexample: rendering and printout of logo") get_input("Press Enter to continue.") name = "aria" logo = pyprel.render_banner( text = name.upper() ) pyprel.print_line() print(pyprel.center_string(text = logo)) pyprel.print_line() print("\nexample: rendering and printout segment display") get_input("Press Enter to continue.") print(pyprel.render_segment_display(text = "0123456789")) print("\nexample: printout of tables") get_input("Press Enter to continue.") table_contents = [ ["heading 1", "heading 2"], ["some text", "some more text"], ["lots and lots and lots and lots and lots of text", "some more text"] ] print( pyprel.Table( contents = table_contents, column_width = 25 ) ) print( pyprel.Table( contents = table_contents, table_width_requested = 30 ) ) print( pyprel.Table( contents = table_contents, table_width_requested = 30, hard_wrapping = True ) ) print( pyprel.Table( contents = table_contents ) ) pyprel.print_center( text = pyprel.Table( contents = table_contents, table_width_requested = 30 ).__str__() ) print( pyprel.Table( contents = table_contents, column_width = 25, column_delimiter = "||" ) ) print( pyprel.Table( contents = table_contents, column_width = 25, row_delimiter = "~" ) ) table_contents = [ [ "heading 1", "heading 2", "heading 3" ], [ "some text", "some more text", "even more text" ], [ "lots and lots and lots and lots and lots of text", "some more text", "some more text" ] ] print( pyprel.Table( contents = table_contents ) ) table_contents = [ [ "heading 1", "heading 2", "heading 3", "heading 4" ], [ "some text", "some more text", "even more text", "yeah more text" ], [ "lots and lots and lots and lots and lots of text", "some more text", "some more text", "some more text" ] ] print( pyprel.Table( contents = table_contents ) )
def line(): pyprel.print_line(character="─")
def main(): pyprel.print_line() print("current time UTC:\n") print( shijian.style_datetime_object( datetime_object=datetime.datetime.utcnow(), style="HH hours MM minutes SS sounds day DD month YYYY")) pyprel.print_line() year_ICHEP_2016 = 2016 month_ICHEP_2016 = 8 day_ICHEP_2016 = 3 datetime_object_ICHEP_2016_time = datetime.datetime( year_ICHEP_2016, month_ICHEP_2016, day_ICHEP_2016) datetime_object_current_time_UTC = datetime.datetime.utcnow() datetime_object_current_time_UTC_to_ICHEP_2016_time =\ datetime_object_ICHEP_2016_time - datetime_object_current_time_UTC print("time to ICHEP 2016 (DD:HH:MM:SS):\n") print( shijian.style_datetime_object( datetime_object=datetime_object_current_time_UTC_to_ICHEP_2016_time, style="{DD}:{HH}:{MM}:{SS}")) pyprel.print_line() current_time_UTC = shijian.style_datetime_object( datetime_object=datetime_object_current_time_UTC, style="DD:HH:MM:SS") print("current time UTC:") print(pyprel.render_segment_display(text=current_time_UTC)) print(" D D H H M M S S") pyprel.print_line() current_time_to_ICHEP_2016_time = shijian.style_datetime_object( datetime_object=datetime_object_current_time_UTC_to_ICHEP_2016_time, style="{DD}:{HH}:{MM}:{SS}") print("time to ICHEP 2016:") print(pyprel.render_segment_display(text=current_time_to_ICHEP_2016_time)) print(" D D H H M M S S") pyprel.print_line() timestamp = 1487600377.0 print("convert UNIX timestamp {timestamp} to YYYY-MM-DDTHHMM".format( timestamp=timestamp)) print( shijian.style_UNIX_timestamp(timestamp=timestamp, style="YYYY-MM-DDTHHMMZ")) pyprel.print_line()
def main(options): global program program = propyte.Program(options=options, name=name, version=version, logo=logo) global log from propyte import log log.info("") # access options and arguments input_data_filename = options["--data"] # define dataset # Load the SUSY dataset (https://archive.ics.uci.edu/ml/datasets/SUSY). # The first column is the class label (1 for signal, 0 for background), # followed by 18 features (8 low-level features and 10 high-level features): # # - lepton 1 pT # - lepton 1 eta # - lepton 1 phi # - lepton 2 pT # - lepton 2 eta # - lepton 2 phi # - missing energy magnitude # - missing energy phi # - MET_rel # - axial MET # - M_R # - M_TR_2 # - R # - MT2 # - S_R # - M_Delta_R # - dPhi_r_b # - cos(theta_r1) data = abstraction.access_SUSY_dataset_format_file(input_data_filename) dataset = abstraction.Dataset(data=data) # define data log.info("split data for cross-validation") features_train, features_test, targets_train, targets_test =\ cross_validation.train_test_split( dataset.features(), dataset.targets(), train_size = 0.7 ) # grid search import itertools epochs = [10, 100, 500, 1000] architecture = [200, 300, 300, 300, 200] grid_search_map = {} grid_search_map["epoch"] = [] grid_search_map["hidden_nodes"] = [] grid_search_map["score_training"] = [] grid_search_map["score_test"] = [] # define progress count_total = 0 for epoch in epochs: for nodes_count in xrange(1, len(architecture) + 1): combinations = itertools.product(architecture, repeat=nodes_count) for combination in combinations: count_total += 1 count = 0 progress = shijian.Progress() progress.engage_quick_calculation_mode() for epoch in epochs: for nodes_count in xrange(1, len(architecture) + 1): combinations = itertools.product(architecture, repeat=nodes_count) for combination in combinations: hidden_nodes = list(combination) # define model log.info("define classification model") classifier = abstraction.Classification( number_of_classes=2, hidden_nodes=hidden_nodes, epochs=epoch) # train model log.info("fit model to dataset features and targets") classifier._model.fit(features_train, targets_train) #classifier.save() # predict and cross-validate training log.info("test trained model on training dataset") score_training = metrics.accuracy_score( classifier._model.predict(features_train), targets_train) score_test = metrics.accuracy_score( classifier._model.predict(features_test), targets_test) log.info("\ntraining-testing instance complete:") log.info("epoch: {epoch}".format(epoch=epoch)) log.info("architecture: {architecture}".format( architecture=hidden_nodes)) log.info("score training: {score_training}".format( score_training=100 * score_training)) log.info("score test: {score_test}".format(score_test=100 * score_test)) pyprel.print_line() grid_search_map["epoch"].append(epoch) grid_search_map["hidden_nodes"].append(hidden_nodes) grid_search_map["score_training"].append(score_training) grid_search_map["score_test"].append(score_test) # save current grid search map shijian.export_object(grid_search_map, filename="grid_search_map.pkl", overwrite=True) count += 1 print(progress.add_datum(fraction=(count + 1) / count_total)) number_of_entries = len(grid_search_map["epoch"]) # table table_contents = [[ "epoch", "architecture", "score training", "score testing" ]] for index in range(0, number_of_entries): table_contents.append([ str(grid_search_map["epoch"][index]), str(grid_search_map["hidden_nodes"][index]), str(grid_search_map["score_training"][index]), str(grid_search_map["score_test"][index]) ]) print("\ngrid search map:\n") print(pyprel.Table(contents=table_contents, )) # plot architectures = shijian.unique_list_elements( grid_search_map["hidden_nodes"]) architecture_epoch_score = {} for architecture in architectures: architecture_epoch_score[str(architecture)] = [] for index in range(0, number_of_entries): if grid_search_map["hidden_nodes"][index] == architecture: architecture_epoch_score[str(architecture)].append([ grid_search_map["epoch"][index], grid_search_map["score_test"][index] ]) figure = matplotlib.pyplot.figure() figure.set_size_inches(10, 10) axes = figure.add_subplot(1, 1, 1) axes.set_xscale("log") figure.suptitle("hyperparameter map", fontsize=20) matplotlib.pyplot.xlabel("epochs") matplotlib.pyplot.ylabel("training test score") for key, value in architecture_epoch_score.iteritems(): epochs = [element[0] for element in value] score_test = [element[1] for element in value] matplotlib.pyplot.plot(epochs, score_test, label=key) matplotlib.pyplot.legend(loc="center right") matplotlib.pyplot.savefig("hyperparameter_map.eps", bbox_inches="tight", format="eps") # find best-scoring models # Find the 3 best scores. best_models = sorted(zip(grid_search_map["score_test"], grid_search_map["hidden_nodes"]), reverse=True)[:3] # table table_contents = [["architecture", "score testing"]] for model in best_models: table_contents.append([str(model[1]), str(model[0])]) print("\nbest-scoring models:\n") print(pyprel.Table(contents=table_contents, )) log.info("") program.terminate()
def main(): pyprel.print_line() print("\nconvert Markdown table to pyprel table\n") table_Markdown = """ |**variable 1**|**variable 2**| |--------------|--------------| |1 |0.23545 | |2 |0.63523 | |3 |0.55231 | |4 |0.89563 | |5 |0.55345 | """ table_contents = pyprel.table_Markdown_to_table_pyprel( table = table_Markdown ) print( pyprel.Table( contents = table_contents, ) ) pyprel.print_line() print("\ncompose and print table\n") table_contents = [ [ "number", "letter" ], [ 1, "a" ], [ 2, "b" ] ] print( pyprel.Table( contents = table_contents ) ) pyprel.print_line() print("\ncompose and print a table using list comprehensions and zip\n") data_x = numpy.linspace(0, numpy.pi, 10) data_y = [numpy.sin(x) for x in data_x] print(pyprel.Table( contents = [["x", "y"]] + [[x, y] for x, y in zip(data_x, data_y)] )) pyprel.print_line() print("\ncompose aligned printouts of data using tables\n") table_contents = [ ["msg:" , "1536155474294"], ["signature:", "0C118313F6D19"], ["data:" , "1536155474294"] ] print(pyprel.Table( contents = table_contents, column_delimiter = "", row_delimiter = "", table_width_requested = 40 )) table_contents = [ ["msg:" , "15361554742941536155474294153615547429415361554742941536155474294"], ["signature:", "0C118313F6D190C118313F6D190C118313F6D190C118313F6D190C118313F6D19"], ["data:" , "15361554742941536155474294153615547429415361554742941536155474294"] ] print(pyprel.Table( contents = table_contents, column_delimiter = "", row_delimiter = "", table_width_requested = 40 )) pyprel.print_line()