def main(options): global program program = propyte.Program(options=options, name=name, version=version, logo=logo) global log from propyte import log filename_database = options["--database"] rows_limit = options["--rows"] if rows_limit is not None: rows_limit = int(rows_limit) log.info("\naccess database {filename}".format(filename=filename_database)) database = dataset.connect("sqlite:///{filename_database}".format( filename_database=filename_database)) for name_table in database.tables: log.info("access table \"{name_table}\"".format(name_table=name_table)) table = database[name_table] log.info("number of rows in table \"{name_table}\": {number_of_rows}". format(name_table=name_table, number_of_rows=str(len(table)))) log.info( "\ntable {name_table} printout:\n".format(name_table=name_table)) print( pyprel.Table(contents=pyprel.table_dataset_database_table( table=database[name_table], rows_limit=rows_limit))) program.terminate()
def main(options): global program program = propyte.Program(options=options, name=name, version=version, logo=logo) global log from propyte import log filename_database = options["--database"] name_table = options["--table"] name_table_metadata = options["--tablemetadata"] rows_limit = options["--rows"] if rows_limit is not None: rows_limit = int(rows_limit) log.info("\naccess database {filename}".format(filename=filename_database)) database = dataset.connect("sqlite:///{filename_database}".format( filename_database=filename_database)) log.info("access table \"{name_table}\"".format(name_table=name_table)) table = database[name_table] log.info( "number of rows in table \"{name_table}\": {number_of_rows}".format( name_table=name_table, number_of_rows=str(len(table)))) log.info("\ntable {name_table} printout:\n".format(name_table=name_table)) print( pyprel.Table(contents=pyprel.table_dataset_database_table( table=database[name_table], include_attributes=["utterance", "response", "exchangeReference"], rows_limit=rows_limit))) log.info("database metadata:") print( pyprel.Table(contents=pyprel.table_dataset_database_table( table=database[name_table_metadata], ))) program.terminate()
def main(options): filename_database = options["--database"] name_table = options["--table"] print("\npyprel database examples\n") if os.path.exists(filename_database): print("create database {database}".format(database=filename_database)) create_database(filename="database.db") print("access database {filename}".format(filename=filename_database)) database = dataset.connect("sqlite:///{filename_database}".format( filename_database=filename_database)) table = database[name_table] print("add data to database") table.insert( dict(name="Legolas Greenleaf", age=2000, country="Mirkwood", uuid4=str(uuid.uuid4()))) table.insert( dict(name="Cody Rapol", age=30, country="USA", activity="DDR", uuid4=str(uuid.uuid4()))) print(""" database tables:\n{tables} \ntable {table} columns:\n{columns} \ntable {table} row one:\n{row} """.format(tables=database.tables, table=name_table, columns=database[name_table].columns, row=[entry for entry in table.find(id="1")])) print("table {table} printout:\n".format(table=name_table)) print( pyprel.Table(contents=pyprel.table_dataset_database_table( table=database[name_table])))
def display_refresh(): members = [ user.get_display_name() for user in scalar.room.get_joined_members() ] events = scalar.room.get_events()[10:] datetimes = [ datetime.datetime.fromtimestamp(event["origin_server_ts"] / 1000) for event in events ] senders = [event["sender"] for event in events] senders = [sender.split("@")[1].split(":")[0] for sender in senders] messages = [event["content"]["body"] for event in events] table_contents = [["MEMBERS", "DATETIMES", "SENDERS", "MESSAGES"]] height_contents = int(0.5 * pyprel.terminal_height() - 0.5) - 3 members = members + [ "" for blank in list(range(1, height_contents - len(members))) ] datetimes = [ "" for blank in list(range(1, height_contents - len(datetimes))) ] + datetimes senders = ["" for blank in list(range(1, height_contents - len(senders))) ] + senders messages = [ "" for blank in list(range(1, height_contents - len(messages))) ] + messages for member, _datetime, sender, message in zip(members, datetimes, senders, messages): table_contents.append([member, _datetime, sender, message]) terminal_flash_clear() logo() print( pyprel.Table( contents=table_contents, column_delimiter=" ", row_delimiter=" ", table_width_requested=None #50 ))
def main(options): global program program = propyte.Program(options=options, name=name, version=version, logo=logo) global log from propyte import log # access options and arguments expression = options["--expression"] word_vector_model = options["--wordvectormodel"] # Define a dictionary of natural language expressions and word vectors. stored_expressions = { "This is a test.": numpy.array([ -0.3828682, -0.36397889, 0.46676171, 0.32530552, 0.20376287, -0.41326976, -0.58228827, 0.05073506, -0.29834735, 0.62523258, 0.48247468, 0.63565594, 0.61466146, -0.05790123, 0.49383548, 0.17871667, 0.26640224, -0.05172781, -0.43991241, 0.8027305, 0.13174312, -0.70332521, -0.56575418, -0.21705133, -0.93002945, 0.04151381, -0.15113404, 0.06264834, 0.03022593, -0.00822711, -0.23755306, -0.9215641, 0.21348992, 0.38396335, 0.3020944, -0.08034055, -0.36891997, -0.86551458, -1.02402425, 0.03633916, 0.34436008, 0.43058148, -0.32728755, 0.50974292, -0.31518513, -0.63085675, -0.40564051, 0.30009648, -0.06426927, -0.6588546, 0.06724164, 0.08611558, -0.13476974, 0.43107161, -0.26038069, 0.03187743, 0.05931987, 0.28155532, 0.3636784, -0.76867509, -0.2253349, -0.77433741, 0.01924273, 0.63751495, 0.03874384, 0.28651205, 0.14867969, -0.2256701, 0.23747981, 0.12383705, 0.27097231, -0.06902695, 0.06664967, 0.05863822, -0.06882346, 0.59539717, 0.08472043, -0.13579898, -0.31311297, -0.68136102, 0.33296993, 0.26578408, -0.55723149, 0.38583612, -0.18033087, -0.50730389, 0.39173275, 0.57567608, -0.42063141, 0.22387385, 0.473548, 0.41959459, 0.34881225, 0.1939103, -0.54997987, 0.30737191, -0.6659264, 0.0437102, -0.11230323, -0.13493723 ], dtype=numpy.float32), "All those moments will be lost in time.": numpy.array([ -1.19203818e+00, -2.22961619e-01, 6.69643760e-01, 3.70975524e-01, -6.15832031e-01, -4.36573088e-01, -6.77924156e-01, 6.26985192e-01, 1.36510044e-01, 1.09196387e-01, 7.61598766e-01, 7.17226386e-01, -1.08178332e-01, -1.00655735e+00, 7.45964348e-01, 1.64966106e-01, 5.85332870e-01, -3.83911550e-01, -6.85201228e-01, 1.31213856e+00, 8.04567218e-01, -1.28810382e+00, -2.52677381e-01, -9.27993536e-01, -4.17307138e-01, -4.56952095e-01, -7.27599859e-01, 7.54008472e-01, 6.67124987e-04, 2.75971144e-01, 2.75658131e-01, -6.79417193e-01, -1.73686996e-01, 8.78942013e-01, 4.39480424e-01, -6.37802243e-01, -6.99860230e-02, -7.99779966e-02, -7.58146644e-02, 8.09784770e-01, -3.71645451e-01, 1.04973994e-01, -1.34749603e+00, 2.96185315e-01, 5.85593104e-01, -1.40544206e-01, -3.77467513e-01, 3.46597135e-01, 2.56733745e-01, 4.04421866e-01, 1.57907709e-01, 3.00843865e-01, -5.41967154e-01, 5.51929235e-01, -1.69145897e-01, 4.42785203e-01, -2.69805342e-02, 1.31654418e+00, 3.19460958e-01, 5.08862257e-01, 3.44371676e-01, -6.95496798e-01, 4.88163918e-01, 2.55316138e-01, 5.03436685e-01, 9.24195647e-02, -2.38671958e-01, -8.97032142e-01, -3.73697281e-03, 2.99875826e-01, 1.65674359e-01, 2.01489821e-01, 1.58179402e-02, 1.30668238e-01, -1.56954467e-01, -2.88258016e-01, 6.76668346e-01, -3.77742261e-01, 2.20978767e-01, -6.34561360e-01, 8.33457410e-01, -2.13193640e-01, -6.35235757e-02, 1.89480215e-01, 6.02166615e-02, -6.64785147e-01, 1.07347333e+00, 6.22629285e-01, -4.63467717e-01, -1.13483839e-01, 3.43968630e-01, 2.75979757e-01, -1.28710240e-01, 1.50670230e+00, -3.10248852e-01, 3.29222828e-01, 1.64443821e-01, -7.78683364e-01, -9.80837345e-02, -1.07415296e-01 ], dtype=numpy.float32), "All those moments were lost in time.": numpy.array([ -0.94025505, -0.45476836, 0.41891485, 1.06683254, -0.49607083, -0.60043317, -0.55656326, 0.05368682, 0.20896676, 0.19261286, 0.51067233, 0.01298623, -0.67276001, -0.51130211, 0.61433661, 0.03579944, 0.4515644, -0.19222273, -0.3919456, 0.65209424, 0.98329031, -0.78390068, -0.0611292, -0.88086104, 0.25153416, -0.16051427, -0.33223695, 0.86147106, -0.19569418, -0.21456225, 0.27583197, -0.65764415, -0.76533222, 0.78306556, 0.84534264, -0.26408321, 0.04312199, -0.00636051, 0.1322974, 0.72321951, -0.01186696, 0.40505514, -0.87730938, 0.58147532, 0.89738142, -0.16748536, -0.38406748, -0.12007161, 0.49123141, 0.48998365, 0.15616624, 0.52637529, -0.66329396, 0.10376941, -0.33025965, 0.04188792, 0.30536407, 0.38240519, 0.01627355, 1.23012972, 0.46352714, -0.74617827, 0.43505573, -0.16246299, 0.34668511, -0.02247265, -0.34742412, -0.64483654, -0.2243523, 0.04222834, 0.42057285, 0.22310457, 0.36833102, -0.05716853, -0.44688487, -0.51298815, 0.61859602, -0.21154809, -0.08168469, -0.15004104, 0.21371906, 0.21713886, 0.21935812, 0.04912762, 0.02854752, -0.55747426, 0.70036995, 0.20306921, -0.46556181, -0.10637223, 0.60909081, 0.55366743, -0.22907487, 1.13089538, 0.34430629, 0.35133895, 0.085365, -0.58662325, -0.13062993, -0.04200239 ], dtype=numpy.float32), "All those moments are lost in time.": numpy.array([ -0.78943789, -0.30322614, 0.3780162, 0.80896467, -0.42042252, -0.64176518, -0.51211309, -0.1537444, -0.04233316, 0.07710438, 0.66949254, 0.37771451, -0.74869132, -0.55132926, 0.53695548, -0.11229508, 0.6673997, -0.34724045, -0.42173663, 0.7451877, 1.01433206, -0.85418928, -0.31583607, -0.6812892, 0.42722669, -0.43322188, -0.35293943, 0.7662127, -0.30090365, -0.13694993, -0.04172039, -0.65059775, -0.62617165, 0.71341687, 0.82349646, -0.31194365, 0.00356466, -0.32218212, 0.15857732, 0.82880032, 0.0566355, 0.43106011, -1.01921201, 0.51658779, 0.8068108, -0.09396499, -0.37920368, -0.08726061, 0.29975161, 0.25999272, 0.23571083, 0.24800834, -0.73045135, 0.19150458, -0.19696848, -0.11186107, 0.1336731, 0.33246318, 0.22474274, 1.15420532, 0.39482915, -0.70385826, 0.54841375, -0.03638301, 0.54499787, 0.02484709, -0.2070619, -0.69282937, -0.21465099, 0.11578664, 0.22713676, 0.21237181, 0.2007356, 0.14489903, -0.37357002, -0.50091666, 0.59818357, -0.36113665, 0.06037673, -0.26377741, 0.31544513, -0.23714744, -0.01429842, 0.17592101, -0.16280818, -0.58340323, 0.63590413, 0.31803992, -0.47035503, -0.17544734, 0.66008455, 0.77849454, -0.04235193, 1.29202402, 0.12573826, 0.20377615, -0.08164676, -0.41151166, -0.1280518, 0.02905136 ], dtype=numpy.float32), } model_word2vec = abstraction.load_word_vector_model( filename=word_vector_model) working_expression_NL = expression # Convert the expression to a word vector. working_expression_WV =\ abstraction.convert_sentence_string_to_word_vector( sentence_string = working_expression_NL, model_word2vec = model_word2vec ) log.info( "word vector representation of expression \"{working_expression_NL}\":" "\n{working_expression_WV}".format( working_expression_NL=working_expression_NL, working_expression_WV=working_expression_WV)) # Define table headings. table_contents = [[ "working expression natural language", "stored expression natural language", "absolute magnitude difference between working amd stored expression " "word vectors", "angle between working and stored expression word vectors" ]] # Compare the expression word vector representation to existing word # vectors. magnitude_differences = [] angles = [] stored_expressions_NL_list = [] magnitude_working_expression_WV = datavision.magnitude( working_expression_WV) for stored_expression_NL in stored_expressions: stored_expression_WV = stored_expressions[stored_expression_NL] magnitude_stored_expression_WV = datavision.magnitude( stored_expression_WV) magnitude_difference_working_expression_WV_stored_expression_WV = abs( magnitude_working_expression_WV - magnitude_stored_expression_WV) angle_working_expression_WV_stored_expression_WV = datavision.angle( working_expression_WV, stored_expression_WV) # Store comparison results in lists. magnitude_differences.append( magnitude_difference_working_expression_WV_stored_expression_WV) angles.append(angle_working_expression_WV_stored_expression_WV) stored_expressions_NL_list.append(stored_expression_NL) # Build table. table_contents.append([ str(working_expression_NL), str(stored_expression_NL), str(magnitude_difference_working_expression_WV_stored_expression_WV ), str(angle_working_expression_WV_stored_expression_WV) ]) # Record table. print(pyprel.Table(contents=table_contents)) log.info("") index_minimum_magnitude_differences =\ magnitude_differences.index(min(magnitude_differences)) index_minimum_angles = angles.index(min(angles)) index_minimum_match_width = len(angles) / 4 if abs(index_minimum_magnitude_differences - index_minimum_angles) < index_minimum_match_width: log.info("translation: {translation_expression_NL}".format( translation_expression_NL =\ stored_expressions_NL_list[index_minimum_angles] )) else: log.error("unable to translate") log.info("") program.terminate()
def most_similar_expression(expression=None, expressions=None, model_word2vec=None, detail=True): working_expression_NL = expression # Convert the expression to a word vector. working_expression_WV =\ abstraction.convert_sentence_string_to_word_vector( sentence_string = working_expression_NL, model_word2vec = model_word2vec ) stored_expressions = dict() for expression in expressions: stored_expressions[expression] =\ abstraction.convert_sentence_string_to_word_vector( sentence_string = expression, model_word2vec = model_word2vec ) # Define table headings. table_contents = [[ "working expression natural language", "stored expression natural language", "absolute magnitude difference between working amd stored expression " "word vectors", "angle between working and stored expression word vectors" ]] # Compare the expression word vector representation to existing word # vectors. magnitude_differences = [] angles = [] stored_expressions_NL_list = [] magnitude_working_expression_WV = datavision.magnitude( working_expression_WV) for stored_expression_NL in stored_expressions: stored_expression_WV = stored_expressions[stored_expression_NL] magnitude_stored_expression_WV = datavision.magnitude( stored_expression_WV) magnitude_difference_working_expression_WV_stored_expression_WV = abs( magnitude_working_expression_WV - magnitude_stored_expression_WV) angle_working_expression_WV_stored_expression_WV = datavision.angle( working_expression_WV, stored_expression_WV) # Store comparison results in lists. magnitude_differences.append( magnitude_difference_working_expression_WV_stored_expression_WV) angles.append(angle_working_expression_WV_stored_expression_WV) stored_expressions_NL_list.append(stored_expression_NL) # Build table. table_contents.append([ str(working_expression_NL), str(stored_expression_NL), str(magnitude_difference_working_expression_WV_stored_expression_WV ), str(angle_working_expression_WV_stored_expression_WV) ]) if detail: # Record table. print(pyprel.Table(contents=table_contents)) index_minimum_angles = angles.index(min(angles)) translation_expression_NL = stored_expressions_NL_list[ index_minimum_angles] return translation_expression_NL
def main(options): global program program = propyte.Program(options=options, name=name, version=version, logo=logo) global log from propyte import log log.info("") # access options and arguments grid_search_filename = options["--gridsearchfile"] # load grid search map grid_search_map = shijian.import_object(filename=grid_search_filename) number_of_entries = len(grid_search_map["epoch"]) log.info("number of entries: {number_of_entries}".format( number_of_entries=number_of_entries)) # table table_contents = [[ "epoch", "architecture", "score training", "score testing" ]] for index in range(0, number_of_entries): table_contents.append([ str(grid_search_map["epoch"][index]), str(grid_search_map["hidden_nodes"][index]), str(grid_search_map["score_training"][index]), str(grid_search_map["score_test"][index]) ]) log.info("\ngrid search map:\n") log.info(pyprel.Table(contents=table_contents, )) # parallel coordinates plot number_of_entries = len(grid_search_map["epoch"]) datasets = [] for index in range(0, number_of_entries): row = [] architecture_padded = grid_search_map["hidden_nodes"][index] + [0] * ( 5 - len(grid_search_map["hidden_nodes"][index])) row.append(grid_search_map["epoch"][index]) row.extend(architecture_padded) row.append(grid_search_map["score_training"][index]) row.append(grid_search_map["score_test"][index]) datasets.append(row) datavision.save_parallel_coordinates_matplotlib( datasets[::-1], filename="parallel_coordinates.png") # plot architectures = shijian.unique_list_elements( grid_search_map["hidden_nodes"]) architecture_epoch_score = {} for architecture in architectures: architecture_epoch_score[str(architecture)] = [] for index in range(0, number_of_entries): if grid_search_map["hidden_nodes"][index] == architecture: architecture_epoch_score[str(architecture)].append([ grid_search_map["epoch"][index], grid_search_map["score_test"][index] ]) figure = matplotlib.pyplot.figure() figure.set_size_inches(10, 10) axes = figure.add_subplot(1, 1, 1) axes.set_xscale("log") figure.suptitle("hyperparameter map", fontsize=20) matplotlib.pyplot.xlabel("epochs") matplotlib.pyplot.ylabel("training test score") for key, value in architecture_epoch_score.iteritems(): epochs = [element[0] for element in value] score_test = [element[1] for element in value] matplotlib.pyplot.plot(epochs, score_test, label=key) matplotlib.pyplot.legend(loc="center left", bbox_to_anchor=(1, 0.5), fontsize=10) matplotlib.pyplot.savefig("hyperparameter_map.eps", bbox_inches="tight", format="eps") # find best-scoring models # Find the 15 best scores and plot them using parallel coordinates. best_models = sorted(zip(grid_search_map["score_test"], grid_search_map["score_training"], grid_search_map["hidden_nodes"]), reverse=True)[:15] datasets = [] for model in best_models: row = [] architecture_padded = model[2] + [0] * (5 - len(model[2])) row.extend(architecture_padded) row.append(model[1]) row.append(model[0]) datasets.append(row) datavision.save_parallel_coordinates_matplotlib( datasets, filename="15_best_models_parallel_coordinates.png") # Find the 3 best scores. best_models = sorted(zip(grid_search_map["score_test"], grid_search_map["hidden_nodes"]), reverse=True)[:3] # table table_contents = [["architecture", "score testing"]] for model in best_models: table_contents.append([str(model[1]), str(model[0])]) log.info("\nbest-scoring models:\n") log.info(pyprel.Table(contents=table_contents, )) log.info("") program.terminate()
def main(options): global program program = propyte.Program(options=options, name=name, version=version, logo=logo) global log from propyte import log print("") filename_ROOT = options["--fileroot"] filename_CSV = options["--filecsv"] selection = options["--selection"] class_label = int(options["--classlabel"]) name_tree = options["--tree"] maximum_number_of_events = None if options["--maxevents"].lower() == "none"\ else int(options["--maxevents"]) include_headings = options["--headings"].lower() == "true" if not os.path.isfile(os.path.expandvars(filename_ROOT)): log.error("file {filename} not found".format(filename=filename_ROOT)) program.terminate() if os.path.isfile(os.path.expandvars(filename_CSV)): log.warning( "CSV file {filename} exists -- *append* data to file".format( filename=filename_CSV)) print("") append = True else: append = False file_ROOT = abstraction.open_ROOT_file(filename_ROOT) tree = file_ROOT.Get(name_tree) number_of_events = tree.GetEntries() file_CSV = open(filename_CSV, "a") writer = csv.writer(file_CSV, delimiter=",") log.info( textwrap.dedent(""" input ROOT file: {filename_ROOT} output CSV file: {filename_CSV} selection: {selection} class label: {class_label} """.format(filename_ROOT=filename_ROOT, filename_CSV=filename_CSV, selection=selection, class_label=class_label))) print("") log.info("save variables of events to CSV {filename}".format( filename=filename_CSV)) print("") progress = shijian.Progress() progress.engage_quick_calculation_mode() index_selected = 0 detail = True for index, event in enumerate(tree): if select_event(event=event, selection=selection): index_selected = index_selected + 1 if \ maximum_number_of_events is not None and \ index_selected > maximum_number_of_events: break line = [ #Variable_ttHbb(event = event, name = "Aplan_bjets"), Variable_ttHbb(event=event, name="Aplan_jets"), # Variable_ttHbb(event=event, name="Centrality_all"), # #Variable_ttHbb(event = event, name = "ClassifBDTOutput_6jsplit"), #Variable_ttHbb(event = event, name = "ClassifBDTOutput_basic"), #Variable_ttHbb(event = event, name = "ClassifBDTOutput_withReco_6jsplit"), #Variable_ttHbb(event = event, name = "ClassifBDTOutput_withReco_basic"), #Variable_ttHbb(event = event, name = "ClassifHPLUS_Semilep_HF_BDT200_Output"), Variable_ttHbb(event=event, name="dEtajj_MaxdEta"), # Variable_ttHbb(event=event, name="dRbb_avg"), # #Variable_ttHbb(event = event, name = "dRbb_MaxM"), Variable_ttHbb(event=event, name="dRbb_MaxPt"), # #Variable_ttHbb(event = event, name = "dRbb_min"), #Variable_ttHbb(event = event, name = "dRbj_Wmass"), #Variable_ttHbb(event = event, name = "dRHl_MaxdR"), #Variable_ttHbb(event = event, name = "dRHl_MindR"), #Variable_ttHbb(event = event, name = "dRjj_min"), #Variable_ttHbb(event = event, name = "dRlepbb_MindR"), #Variable_ttHbb(event = event, name = "dRlj_MindR"), #Variable_ttHbb(event = event, name = "dRuu_MindR"), Variable_ttHbb(event=event, name="H1_all"), # #Variable_ttHbb(event = event, name = "H4_all"), #Variable_ttHbb(event = event, name = "HhadT_nJets"), #Variable_ttHbb(event = event, name = "HiggsbbM"), #Variable_ttHbb(event = event, name = "HiggsjjM"), #Variable_ttHbb(event = event, name = "HT_all"), #Variable_ttHbb(event = event, name = "HT_jets"), #Variable_ttHbb(event = event, name = "Mbb_MaxM"), #Variable_ttHbb(event = event, name = "Mbb_MaxPt"), Variable_ttHbb(event=event, name="Mbb_MindR"), # #Variable_ttHbb(event = event, name = "Mbj_MaxPt"), #Variable_ttHbb(event = event, name = "Mbj_MindR"), #Variable_ttHbb(event = event, name = "Mbj_Wmass"), #Variable_ttHbb(event = event, name = "met_met"), #Variable_ttHbb(event = event, name = "met_phi"), #Variable_ttHbb(event = event, name = "MHiggs"), #Variable_ttHbb(event = event, name = "Mjj_HiggsMass"), #Variable_ttHbb(event = event, name = "Mjjj_MaxPt"), #Variable_ttHbb(event = event, name = "Mjj_MaxPt"), #Variable_ttHbb(event = event, name = "Mjj_MindR"), #Variable_ttHbb(event = event, name = "Mjj_MinM"), #Variable_ttHbb(event = event, name = "mu"), #Variable_ttHbb(event = event, name = "Muu_MindR"), #Variable_ttHbb(event = event, name = "NBFricoNN_dil"), #Variable_ttHbb(event = event, name = "nBTags"), #Variable_ttHbb(event = event, name = "nBTags30"), #Variable_ttHbb(event = event, name = "nBTags50"), #Variable_ttHbb(event = event, name = "nBTags60"), #Variable_ttHbb(event = event, name = "nBTags70"), #Variable_ttHbb(event = event, name = "nBTags77"), #Variable_ttHbb(event = event, name = "nBTags80"), #Variable_ttHbb(event = event, name = "nBTags85"), #Variable_ttHbb(event = event, name = "nBTags90"), #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_30"), #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_40"), #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_50"), #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_60"), #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_70"), #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_77"), #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_85"), #Variable_ttHbb(event = event, name = "nElectrons"), #Variable_ttHbb(event = event, name = "nHFJets"), Variable_ttHbb(event=event, name="NHiggs_30"), # #Variable_ttHbb(event = event, name = "Njet_pt40"), #Variable_ttHbb(event = event, name = "Njet_pt40"), #Variable_ttHbb(event = event, name = "nJets"), #Variable_ttHbb(event = event, name = "nMuons"), #Variable_ttHbb(event = event, name = "nPrimaryVtx"), #Variable_ttHbb(event = event, name = "pT_jet3"), Variable_ttHbb(event=event, name="pT_jet5"), # #Variable_ttHbb(event = event, name = "pTuu_MindR"), #Variable_ttHbb(event = event, name = "semilepMVAreco_b1higgsbhadtop_dR"), #Variable_ttHbb(event = event, name = "semilepMVAreco_bbhiggs_dR"), #Variable_ttHbb(event = event, name = "semilepMVAreco_BDT_output"), #Variable_ttHbb(event = event, name = "semilepMVAreco_BDT_output_6jsplit"), #Variable_ttHbb(event = event, name = "semilepMVAreco_BDT_output_truthMatchPattern"), #Variable_ttHbb(event = event, name = "semilepMVAreco_BDT_withH_output"), #Variable_ttHbb(event = event, name = "semilepMVAreco_BDT_withH_output_6jsplit"), #Variable_ttHbb(event = event, name = "semilepMVAreco_BDT_withH_output_truthMatchPattern"), #Variable_ttHbb(event = event, name = "semilepMVAreco_hadWb1Higgs_mass"), #Variable_ttHbb(event = event, name = "semilepMVAreco_higgsbhadtop_withH_dR"), #Variable_ttHbb(event = event, name = "semilepMVAreco_higgsbleptop_mass"), #Variable_ttHbb(event = event, name = "semilepMVAreco_higgsbleptop_withH_dR"), #Variable_ttHbb(event = event, name = "semilepMVAreco_higgslep_dR"), #Variable_ttHbb(event = event, name = "semilepMVAreco_higgsleptop_dR"), #Variable_ttHbb(event = event, name = "semilepMVAreco_higgs_mass"), #Variable_ttHbb(event = event, name = "semilepMVAreco_higgsq1hadW_mass"), #Variable_ttHbb(event = event, name = "semilepMVAreco_higgsttbar_withH_dR"), #Variable_ttHbb(event = event, name = "semilepMVAreco_leptophadtop_dR"), #Variable_ttHbb(event = event, name = "semilepMVAreco_leptophadtop_withH_dR"), #Variable_ttHbb(event = event, name = "semilepMVAreco_Ncombinations"), #Variable_ttHbb(event = event, name = "semilepMVAreco_nuApprox_recoBDT"), #Variable_ttHbb(event = event, name = "semilepMVAreco_nuApprox_recoBDT_6jsplit"), #Variable_ttHbb(event = event, name = "semilepMVAreco_nuApprox_recoBDT_withH"), #Variable_ttHbb(event = event, name = "semilepMVAreco_nuApprox_recoBDT_withH_6jsplit"), #Variable_ttHbb(event = event, name = "semilepMVAreco_ttH_Ht_withH"), #Variable_ttHbb(event = event, name = "ttHF_mva_discriminant"), #Variable_ttHbb(event = event, name = "el_d0sig[0]"), #Variable_ttHbb(event = event, name = "el_delta_z0_sintheta[0]"), #Variable_ttHbb(event = event, name = "el_e[0]"), #Variable_ttHbb(event = event, name = "el_eta[0]"), #Variable_ttHbb(event = event, name = "el_phi[0]"), #Variable_ttHbb(event = event, name = "el_pt[0]"), #Variable_ttHbb(event = event, name = "el_topoetcone20[0]"), #Variable_ttHbb(event = event, name = "mu_d0sig[0]"), #Variable_ttHbb(event = event, name = "mu_delta_z0_sintheta[0]"), #Variable_ttHbb(event = event, name = "mu_e[0]"), #Variable_ttHbb(event = event, name = "mu_eta[0]"), #Variable_ttHbb(event = event, name = "mu_phi[0]"), #Variable_ttHbb(event = event, name = "mu_pt[0]"), #Variable_ttHbb(event = event, name = "mu_topoetcone20[0]"), #Variable_ttHbb(event = event, name = "jet_e[0]"), #Variable_ttHbb(event = event, name = "jet_eta[0]"), #Variable_ttHbb(event = event, name = "jet_jvt[0]"), #Variable_ttHbb(event = event, name = "jet_mv2c10[0]"), #Variable_ttHbb(event = event, name = "jet_mv2c20[0]"), #Variable_ttHbb(event = event, name = "jet_phi[0]"), #Variable_ttHbb(event = event, name = "jet_pt[0]"), #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand[0]"), #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand_6jsplit[0]"), #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand[0]"), #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand_6jsplit[0]"), #Variable_ttHbb(event = event, name = "jet_e[1]"), #Variable_ttHbb(event = event, name = "jet_eta[1]"), #Variable_ttHbb(event = event, name = "jet_jvt[1]"), #Variable_ttHbb(event = event, name = "jet_mv2c10[1]"), #Variable_ttHbb(event = event, name = "jet_mv2c20[1]"), #Variable_ttHbb(event = event, name = "jet_phi[1]"), #Variable_ttHbb(event = event, name = "jet_pt[1]"), #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand[1]"), #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand_6jsplit[1]"), #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand[1]"), #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand_6jsplit[1]"), #Variable_ttHbb(event = event, name = "jet_e[2]"), #Variable_ttHbb(event = event, name = "jet_eta[2]"), #Variable_ttHbb(event = event, name = "jet_jvt[2]"), #Variable_ttHbb(event = event, name = "jet_mv2c10[2]"), #Variable_ttHbb(event = event, name = "jet_mv2c20[2]"), #Variable_ttHbb(event = event, name = "jet_phi[2]"), #Variable_ttHbb(event = event, name = "jet_pt[2]"), #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand[2]"), #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand_6jsplit[2]"), #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand[2]"), #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand_6jsplit[2]"), #Variable_ttHbb(event = event, name = "jet_e[3]"), #Variable_ttHbb(event = event, name = "jet_eta[3]"), #Variable_ttHbb(event = event, name = "jet_jvt[3]"), #Variable_ttHbb(event = event, name = "jet_mv2c10[3]"), #Variable_ttHbb(event = event, name = "jet_mv2c20[3]"), #Variable_ttHbb(event = event, name = "jet_phi[3]"), #Variable_ttHbb(event = event, name = "jet_pt[3]"), #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand[3]"), #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand_6jsplit[3]"), #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand[3]"), #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand_6jsplit[3]"), # large-R jets #Variable_ttHbb(event = event, name = "FirstLjetM"), #Variable_ttHbb(event = event, name = "FirstLjetPt"), #Variable_ttHbb(event = event, name = "HhadT_nLjets"), #Variable_ttHbb(event = event, name = "HT_ljets"), #Variable_ttHbb(event = event, name = "NBFricoNN_ljets"), #Variable_ttHbb(event = event, name = "nBjetOutsideLjet"), #Variable_ttHbb(event = event, name = "nJetOutsideLjet"), #Variable_ttHbb(event = event, name = "nLjet_m100"), #Variable_ttHbb(event = event, name = "nLjet_m50"), #Variable_ttHbb(event = event, name = "nLjets"), #Variable_ttHbb(event = event, name = "SecondLjetM"), #Variable_ttHbb(event = event, name = "SecondLjetPt"), #Variable_ttHbb(event = event, name = "ljet_C2[0]"), #Variable_ttHbb(event = event, name = "ljet_D2[0]"), #Variable_ttHbb(event = event, name = "ljet_e[0]"), #Variable_ttHbb(event = event, name = "ljet_eta[0]"), #Variable_ttHbb(event = event, name = "ljet_m[0]"), #Variable_ttHbb(event = event, name = "ljet_phi[0]"), #Variable_ttHbb(event = event, name = "ljet_pt[0]"), #Variable_ttHbb(event = event, name = "ljet_sd12[0]"), #Variable_ttHbb(event = event, name = "ljet_sd23[0]"), #Variable_ttHbb(event = event, name = "ljet_tau21[0]"), #Variable_ttHbb(event = event, name = "ljet_tau21_wta[0]"), #Variable_ttHbb(event = event, name = "ljet_tau32[0]"), #Variable_ttHbb(event = event, name = "ljet_tau32_wta[0]"), #rcjet_d12, #rcjet_d23, #rcjet_e, #rcjet_eta, #rcjet_phi, #rcjet_pt, Variable_ttHbb(name="class", value=class_label) ] if detail: log.info("event variable details:") log.info( "\nnumber of variables: {number}".format(number=len(line))) table_contents = [["variable value", "variable type"]] for variable in line: table_contents.append( [str(variable.name()), str(type(variable.value()))]) print(pyprel.Table(contents=table_contents, )) detail = False if include_headings and not append: headings = [variable.name() for variable in line] writer.writerow(headings) include_headings = False values = [variable.value() for variable in line] writer.writerow(values) print(progress.add_datum(fraction=index / number_of_events)) print("") log.info( "{number_selected} events of {number_total} passed selection".format( number_selected=index_selected, number_total=index)) print("") program.terminate()
def main(options): global program program = propyte.Program(options=options, name=name, version=version, logo=logo) global log from propyte import log log.info("") # access options and arguments input_data_filename = options["--data"] # define dataset # Load the SUSY dataset (https://archive.ics.uci.edu/ml/datasets/SUSY). # The first column is the class label (1 for signal, 0 for background), # followed by 18 features (8 low-level features and 10 high-level features): # # - lepton 1 pT # - lepton 1 eta # - lepton 1 phi # - lepton 2 pT # - lepton 2 eta # - lepton 2 phi # - missing energy magnitude # - missing energy phi # - MET_rel # - axial MET # - M_R # - M_TR_2 # - R # - MT2 # - S_R # - M_Delta_R # - dPhi_r_b # - cos(theta_r1) data = abstraction.access_SUSY_dataset_format_file(input_data_filename) dataset = abstraction.Dataset(data=data) # define data log.info("split data for cross-validation") features_train, features_test, targets_train, targets_test =\ cross_validation.train_test_split( dataset.features(), dataset.targets(), train_size = 0.7 ) # grid search import itertools epochs = [10, 100, 500, 1000] architecture = [200, 300, 300, 300, 200] grid_search_map = {} grid_search_map["epoch"] = [] grid_search_map["hidden_nodes"] = [] grid_search_map["score_training"] = [] grid_search_map["score_test"] = [] # define progress count_total = 0 for epoch in epochs: for nodes_count in xrange(1, len(architecture) + 1): combinations = itertools.product(architecture, repeat=nodes_count) for combination in combinations: count_total += 1 count = 0 progress = shijian.Progress() progress.engage_quick_calculation_mode() for epoch in epochs: for nodes_count in xrange(1, len(architecture) + 1): combinations = itertools.product(architecture, repeat=nodes_count) for combination in combinations: hidden_nodes = list(combination) # define model log.info("define classification model") classifier = abstraction.Classification( number_of_classes=2, hidden_nodes=hidden_nodes, epochs=epoch) # train model log.info("fit model to dataset features and targets") classifier._model.fit(features_train, targets_train) #classifier.save() # predict and cross-validate training log.info("test trained model on training dataset") score_training = metrics.accuracy_score( classifier._model.predict(features_train), targets_train) score_test = metrics.accuracy_score( classifier._model.predict(features_test), targets_test) log.info("\ntraining-testing instance complete:") log.info("epoch: {epoch}".format(epoch=epoch)) log.info("architecture: {architecture}".format( architecture=hidden_nodes)) log.info("score training: {score_training}".format( score_training=100 * score_training)) log.info("score test: {score_test}".format(score_test=100 * score_test)) pyprel.print_line() grid_search_map["epoch"].append(epoch) grid_search_map["hidden_nodes"].append(hidden_nodes) grid_search_map["score_training"].append(score_training) grid_search_map["score_test"].append(score_test) # save current grid search map shijian.export_object(grid_search_map, filename="grid_search_map.pkl", overwrite=True) count += 1 print(progress.add_datum(fraction=(count + 1) / count_total)) number_of_entries = len(grid_search_map["epoch"]) # table table_contents = [[ "epoch", "architecture", "score training", "score testing" ]] for index in range(0, number_of_entries): table_contents.append([ str(grid_search_map["epoch"][index]), str(grid_search_map["hidden_nodes"][index]), str(grid_search_map["score_training"][index]), str(grid_search_map["score_test"][index]) ]) print("\ngrid search map:\n") print(pyprel.Table(contents=table_contents, )) # plot architectures = shijian.unique_list_elements( grid_search_map["hidden_nodes"]) architecture_epoch_score = {} for architecture in architectures: architecture_epoch_score[str(architecture)] = [] for index in range(0, number_of_entries): if grid_search_map["hidden_nodes"][index] == architecture: architecture_epoch_score[str(architecture)].append([ grid_search_map["epoch"][index], grid_search_map["score_test"][index] ]) figure = matplotlib.pyplot.figure() figure.set_size_inches(10, 10) axes = figure.add_subplot(1, 1, 1) axes.set_xscale("log") figure.suptitle("hyperparameter map", fontsize=20) matplotlib.pyplot.xlabel("epochs") matplotlib.pyplot.ylabel("training test score") for key, value in architecture_epoch_score.iteritems(): epochs = [element[0] for element in value] score_test = [element[1] for element in value] matplotlib.pyplot.plot(epochs, score_test, label=key) matplotlib.pyplot.legend(loc="center right") matplotlib.pyplot.savefig("hyperparameter_map.eps", bbox_inches="tight", format="eps") # find best-scoring models # Find the 3 best scores. best_models = sorted(zip(grid_search_map["score_test"], grid_search_map["hidden_nodes"]), reverse=True)[:3] # table table_contents = [["architecture", "score testing"]] for model in best_models: table_contents.append([str(model[1]), str(model[0])]) print("\nbest-scoring models:\n") print(pyprel.Table(contents=table_contents, )) log.info("") program.terminate()
def main(): global options options = docopt.docopt(__doc__) if options["--version"]: print(__version__) sys.exit(0) graph_power = options["--graphpower"] graph_temperature = options["--graphtemperature"] table = options["--table"] CSV_logging = options["--CSV_logging"].lower() == "true" filepath_CSV = options["--filepath_CSV"] interval = float(options["--interval"]) if CSV_logging: log.info("logging to CSV " + filepath_CSV) command_general = "nvidia-smi " \ "--query-gpu=" \ "name," \ "temperature.gpu," \ "power.draw," \ "memory.used," \ "memory.total," \ "utilization.gpu " \ "--format=" \ "csv," \ "noheader" command_power = "nvidia-smi " \ "--query-gpu=power.draw " \ "--format=csv,noheader" command_temperature = "nvidia-smi " \ "--query-gpu=temperature.gpu "\ "--format=csv,noheader" measurements = [] try: while True: if not graph_power and not graph_temperature: timestamp = datetime.datetime.utcnow() timestamp_string = timestamp.strftime("%Y-%m-%dT%H%M%SZ") result = subprocess.check_output(command_general.split(' ')).decode('utf-8') data = [datum.strip() for datum in result.split(",")] temperature = str(data[1]) temperature_string = temperature + " °C" power_draw = str(data[2]) utilization = str(data[3]) memory_used = str(data[4]) memory_total = str(data[5]) if table: print(pyprel.Table( contents = [[ timestamp_string, temperature_string, utilization, memory_used, memory_total, power_draw ]] )) if CSV_logging: df = pd.DataFrame(columns = [ "datetime", "temperature_C", "power_draw_W", "utilization_MiB", "memory_used_MiB", "memory_total_percentage" ]) df = df.append( { "datetime" : timestamp, "temperature_C" : temperature, "power_draw_W" : power_draw[:-2], "utilization_MiB" : utilization[:-4], "memory_used_MiB" : memory_used[:-4], "memory_total_percentage": memory_total[:-2] }, ignore_index=True ) log.info(timestamp_string + " log to CSV " + filepath_CSV) df.to_csv(filepath_CSV, header=not os.path.isfile(filepath_CSV), index=False, mode="a") else: temperature_string = temperature_string.rjust(5) power_draw = power_draw.rjust(8) utilization = utilization.rjust(8) memory_used = memory_used.rjust(8) memory_total = memory_total.rjust(5) print( "|{timestamp_string}|{temperature_string}|{power_draw}"\ "|{utilization}|{memory_used}|{memory_total}|".format( timestamp_string = timestamp_string, temperature_string = temperature_string, power_draw = power_draw, utilization = utilization, memory_used = memory_used, memory_total = memory_total )) time.sleep(interval) elif graph_power or graph_temperature: if graph_power: result = subprocess.check_output(command_power.split(' ')).decode('utf-8') result = result.strip().strip(" W") elif graph_temperature: result = subprocess.check_output(command_temperature.split(' ')).decode('utf-8') measurements.append(float(result.strip())) measurements = measurements[-20:] y = measurements x = range(0, len(y)) plot = datavision.TTYFigure() tmp = plot.plot(x, y, marker="_o") print(tmp) time.sleep(interval) print(chr(27) + "[2J") except KeyboardInterrupt: print("")
def main(options): global program program = propyte.Program( options = options, name = name, version = version, logo = logo ) global log from propyte import log print("") filename_CSV = options["--infile"] make_histogram_comparisons = options["--histogramcomparisons"].lower() == "true" make_scatter_matrix = options["--scattermatrix"].lower() == "true" make_event_images = options["--eventimages"].lower() == "true" number_of_event_images = int(options["--numberofeventimages"]) directoryname_plots = options["--directoryplots"] if not os.path.isfile(os.path.expandvars(filename_CSV)): log.error("file {filename} not found".format( filename = filename_CSV )) program.terminate() log.info("read CSV from {filename}".format(filename = filename_CSV)) data = pd.read_csv(filename_CSV) number_of_columns = data.shape[1] indices_of_feature_columns = range(0, number_of_columns -1) feature_names = list(data.columns) data_class_0 = data.loc[data["class"] == 0] data_class_1 = data.loc[data["class"] == 1] print("") log.info("basic feature characteristics") print("") table_contents = [[ "feature", "minimum value in class 0", "minimum value in class 1", "maximum value in class 0", "maximum value in class 1", "mean value in class 0", "mean value in class 1" ]] for feature_name in feature_names: values_class_0 = list(data_class_0[feature_name]) values_class_1 = list(data_class_1[feature_name]) table_contents.append([ feature_name, min(values_class_0), min(values_class_1), max(values_class_0), max(values_class_1), sum(values_class_0)/len(values_class_0), sum(values_class_1)/len(values_class_1) ]) print( pyprel.Table( contents = table_contents ) ) if make_histogram_comparisons: for feature_name in feature_names: filename = shijian.propose_filename( filename = feature_name + "_ttbb_ttH.png" ) log.info("save histogram {filename}".format(filename = filename)) datavision.save_histogram_comparison_matplotlib( values_1 = list(data_class_0[feature_name]), values_2 = list(data_class_1[feature_name]), label_1 = "ttbb", label_2 = "ttH", label_ratio_x = "", label_y = "", title = feature_name, filename = filename, directory = directoryname_plots ) if make_scatter_matrix: filename = "scatter_matrix_ttbb_ttH.jpg" log.info("save scatter matrix {filename}".format(filename = filename)) scatter_matrix = pd.scatter_matrix( data, figsize = [15, 15], marker = ".", s = 0.2, diagonal = "kde" ) for ax in scatter_matrix.ravel(): ax.set_xlabel( ax.get_xlabel(), fontsize = 15, rotation = 90 ) ax.set_ylabel( ax.get_ylabel(), fontsize = 15, rotation = 0, labelpad = 60 ) ax.get_xaxis().set_ticks([]) ax.get_yaxis().set_ticks([]) if not os.path.exists(directoryname_plots): os.makedirs(directoryname_plots) plt.savefig( directoryname_plots + "/" + filename, dpi = 700 ) if make_event_images: directoryname = "event_images" if not os.path.exists(directoryname): os.makedirs(directoryname) for class_label in [0, 1]: data_class = data.loc[data["class"] == class_label] for index, row in data_class[0:number_of_event_images].iterrows(): image = datavision.NumPy_array_pad_square_shape( array = row.as_matrix(), pad_value = -4 ) plt.imshow( image, cmap = "Greys", interpolation = "nearest" ) filename = "event_image_class_" + str(class_label) + "_index_" + str(index) + ".png" log.info("save event image {filename}".format(filename = filename)) plt.savefig( directoryname + "/" + filename, dpi = 200 ) print("") program.terminate()
def main(): print("\nexample: printout of dictionary") get_input("Press Enter to continue.") information = { "sample information": { "ID": 169888, "name": "ttH", "number of events": 124883, "cross section": 0.055519, "k factor": 1.0201, "generator": "pythia8", "variables": { "trk_n": 147, "zappo_n": 9001 } } } pyprel.print_line() pyprel.print_dictionary(dictionary = information) pyprel.print_line() print(pyprel.dictionary_string(dictionary = information)) pyprel.print_line() print("\nexample: printout of existing logo") get_input("Press Enter to continue.") text = ( " ____ _ _____ _ \n" " / ___|___ | | ___ _ __| ___| | _____ __ \n" " | | / _ \| |/ _ \| '__| |_ | |/ _ \ \ /\ / / \n" " | |__| (_) | | (_) | | | _| | | (_) \ V V / \n" " \____\___/|_|\___/|_| |_| |_|\___/ \_/\_/ " ) pyprel.print_center(text = text) print("\nexample: rendering and printout of logo") get_input("Press Enter to continue.") name = "aria" logo = pyprel.render_banner( text = name.upper() ) pyprel.print_line() print(pyprel.center_string(text = logo)) pyprel.print_line() print("\nexample: rendering and printout segment display") get_input("Press Enter to continue.") print(pyprel.render_segment_display(text = "0123456789")) print("\nexample: printout of tables") get_input("Press Enter to continue.") table_contents = [ ["heading 1", "heading 2"], ["some text", "some more text"], ["lots and lots and lots and lots and lots of text", "some more text"] ] print( pyprel.Table( contents = table_contents, column_width = 25 ) ) print( pyprel.Table( contents = table_contents, table_width_requested = 30 ) ) print( pyprel.Table( contents = table_contents, table_width_requested = 30, hard_wrapping = True ) ) print( pyprel.Table( contents = table_contents ) ) pyprel.print_center( text = pyprel.Table( contents = table_contents, table_width_requested = 30 ).__str__() ) print( pyprel.Table( contents = table_contents, column_width = 25, column_delimiter = "||" ) ) print( pyprel.Table( contents = table_contents, column_width = 25, row_delimiter = "~" ) ) table_contents = [ [ "heading 1", "heading 2", "heading 3" ], [ "some text", "some more text", "even more text" ], [ "lots and lots and lots and lots and lots of text", "some more text", "some more text" ] ] print( pyprel.Table( contents = table_contents ) ) table_contents = [ [ "heading 1", "heading 2", "heading 3", "heading 4" ], [ "some text", "some more text", "even more text", "yeah more text" ], [ "lots and lots and lots and lots and lots of text", "some more text", "some more text", "some more text" ] ] print( pyprel.Table( contents = table_contents ) )
def main(): pyprel.print_line() print("\nconvert Markdown table to pyprel table\n") table_Markdown = """ |**variable 1**|**variable 2**| |--------------|--------------| |1 |0.23545 | |2 |0.63523 | |3 |0.55231 | |4 |0.89563 | |5 |0.55345 | """ table_contents = pyprel.table_Markdown_to_table_pyprel( table = table_Markdown ) print( pyprel.Table( contents = table_contents, ) ) pyprel.print_line() print("\ncompose and print table\n") table_contents = [ [ "number", "letter" ], [ 1, "a" ], [ 2, "b" ] ] print( pyprel.Table( contents = table_contents ) ) pyprel.print_line() print("\ncompose and print a table using list comprehensions and zip\n") data_x = numpy.linspace(0, numpy.pi, 10) data_y = [numpy.sin(x) for x in data_x] print(pyprel.Table( contents = [["x", "y"]] + [[x, y] for x, y in zip(data_x, data_y)] )) pyprel.print_line() print("\ncompose aligned printouts of data using tables\n") table_contents = [ ["msg:" , "1536155474294"], ["signature:", "0C118313F6D19"], ["data:" , "1536155474294"] ] print(pyprel.Table( contents = table_contents, column_delimiter = "", row_delimiter = "", table_width_requested = 40 )) table_contents = [ ["msg:" , "15361554742941536155474294153615547429415361554742941536155474294"], ["signature:", "0C118313F6D190C118313F6D190C118313F6D190C118313F6D190C118313F6D19"], ["data:" , "15361554742941536155474294153615547429415361554742941536155474294"] ] print(pyprel.Table( contents = table_contents, column_delimiter = "", row_delimiter = "", table_width_requested = 40 )) pyprel.print_line()
def main(options): global program program = propyte.Program(options=options, name=name, version=version, logo=logo) global log from propyte import log log.info("") # access options and arguments ROOT_filename_ttH = options["--datattH"] ROOT_filename_ttbb = options["--datattbb"] engage_plotting = string_to_bool(options["--plot"]) log.info("ttH data file: {filename}".format(filename=ROOT_filename_ttH)) log.info("ttbb data file: {filename}".format(filename=ROOT_filename_ttbb)) # Access data for event classes ttbb and ttH. data_ttbb = abstraction.load_HEP_data(ROOT_filename=ROOT_filename_ttbb, tree_name="nominal", maximum_number_of_events=None) data_ttH = abstraction.load_HEP_data(ROOT_filename=ROOT_filename_ttH, tree_name="nominal", maximum_number_of_events=None) if engage_plotting is True: # Plot the loaded datasets. for variable_name in data_ttbb.variables(): log.info( "plot ttbb versus ttH comparison of {variable_name}".format( variable_name=variable_name)) datavision.save_histogram_comparison_matplotlib( values_1=data_ttbb.values(name=variable_name), values_2=data_ttH.values(name=variable_name), label_1=variable_name + "_ttbb", label_2=variable_name + "_ttH", normalize=True, label_ratio_x="frequency", label_y="", title=variable_name + "_ttbb_ttH", filename=variable_name + "_ttbb_ttH.png") # upcoming: consider data ordering # Preprocess all data (to be updated). data_ttbb.preprocess_all() data_ttH.preprocess_all() # Add class labels to the data sets, 0 for ttbb and 1 for ttH. for index in data_ttbb.indices(): data_ttbb.variable(index=index, name="class", value=0) for index in data_ttH.indices(): data_ttH.variable(index=index, name="class", value=1) # Convert the data sets to a simple list format with the first column # containing the class label. _data = [] for index in data_ttbb.indices(): _data.append([ data_ttbb.variable(index=index, name="el_1_pt"), data_ttbb.variable(index=index, name="el_1_eta"), data_ttbb.variable(index=index, name="el_1_phi"), data_ttbb.variable(index=index, name="jet_1_pt"), data_ttbb.variable(index=index, name="jet_1_eta"), data_ttbb.variable(index=index, name="jet_1_phi"), data_ttbb.variable(index=index, name="jet_1_e"), data_ttbb.variable(index=index, name="jet_2_pt"), data_ttbb.variable(index=index, name="jet_2_eta"), data_ttbb.variable(index=index, name="jet_2_phi"), data_ttbb.variable(index=index, name="jet_2_e"), data_ttbb.variable(index=index, name="met"), data_ttbb.variable(index=index, name="met_phi"), data_ttbb.variable(index=index, name="nJets"), data_ttbb.variable(index=index, name="Centrality_all"), #data_ttbb.variable(index = index, name = "Mbb_MindR") ]) _data.append([data_ttbb.variable(name="class")]) for index in data_ttH.indices(): _data.append([ data_ttH.variable(index=index, name="el_1_pt"), data_ttH.variable(index=index, name="el_1_eta"), data_ttH.variable(index=index, name="el_1_phi"), data_ttH.variable(index=index, name="jet_1_pt"), data_ttH.variable(index=index, name="jet_1_eta"), data_ttH.variable(index=index, name="jet_1_phi"), data_ttH.variable(index=index, name="jet_1_e"), data_ttH.variable(index=index, name="jet_2_pt"), data_ttH.variable(index=index, name="jet_2_eta"), data_ttH.variable(index=index, name="jet_2_phi"), data_ttH.variable(index=index, name="jet_2_e"), data_ttH.variable(index=index, name="met"), data_ttH.variable(index=index, name="met_phi"), data_ttH.variable(index=index, name="nJets"), data_ttH.variable(index=index, name="Centrality_all"), #data_ttH.variable(index = index, name = "Mbb_MindR") ]) _data.append([data_ttH.variable(name="class")]) dataset = abstraction.Dataset(data=_data) log.info("") # define data log.info("split data for cross-validation") features_train, features_test, targets_train, targets_test =\ cross_validation.train_test_split( dataset.features(), dataset.targets(), train_size = 0.7 ) # grid search import itertools epochs = [100, 100000] architecture = [200, 300, 300, 200] grid_search_map = {} grid_search_map["epoch"] = [] grid_search_map["hidden_nodes"] = [] grid_search_map["score_training"] = [] grid_search_map["score_test"] = [] # define progress count_total = 0 for epoch in epochs: for nodes_count in xrange(1, len(architecture) + 1): combinations = itertools.product(architecture, repeat=nodes_count) for combination in combinations: count_total += 1 count = 0 progress = shijian.Progress() progress.engage_quick_calculation_mode() for epoch in epochs: for nodes_count in xrange(1, len(architecture) + 1): combinations = itertools.product(architecture, repeat=nodes_count) for combination in combinations: hidden_nodes = list(combination) # define model log.info("define classification model") classifier = abstraction.Classification( number_of_classes=2, hidden_nodes=hidden_nodes, epochs=epoch) # train model log.info("fit model to dataset features and targets") classifier._model.fit(features_train, targets_train) #classifier.save() # predict and cross-validate training log.info("test trained model on training dataset") score_training = metrics.accuracy_score( classifier._model.predict(features_train), targets_train) score_test = metrics.accuracy_score( classifier._model.predict(features_test), targets_test) log.info("\ntraining-testing instance complete:") log.info("epoch: {epoch}".format(epoch=epoch)) log.info("architecture: {architecture}".format( architecture=hidden_nodes)) log.info("score training: {score_training}".format( score_training=100 * score_training)) log.info("score test: {score_test}".format(score_test=100 * score_test)) pyprel.print_line() grid_search_map["epoch"].append(epoch) grid_search_map["hidden_nodes"].append(hidden_nodes) grid_search_map["score_training"].append(score_training) grid_search_map["score_test"].append(score_test) # save current grid search map shijian.export_object(grid_search_map, filename="grid_search_map.pkl", overwrite=True) count += 1 print(progress.add_datum(fraction=(count + 1) / count_total)) number_of_entries = len(grid_search_map["epoch"]) # table table_contents = [[ "epoch", "architecture", "score training", "score testing" ]] for index in range(0, number_of_entries): table_contents.append([ str(grid_search_map["epoch"][index]), str(grid_search_map["hidden_nodes"][index]), str(grid_search_map["score_training"][index]), str(grid_search_map["score_test"][index]) ]) print("\ngrid search map:\n") print(pyprel.Table(contents=table_contents, )) # plot architectures = shijian.unique_list_elements( grid_search_map["hidden_nodes"]) architecture_epoch_score = {} for architecture in architectures: architecture_epoch_score[str(architecture)] = [] for index in range(0, number_of_entries): if grid_search_map["hidden_nodes"][index] == architecture: architecture_epoch_score[str(architecture)].append([ grid_search_map["epoch"][index], grid_search_map["score_test"][index] ]) figure = matplotlib.pyplot.figure() figure.set_size_inches(10, 10) axes = figure.add_subplot(1, 1, 1) axes.set_xscale("log") figure.suptitle("hyperparameter map", fontsize=20) matplotlib.pyplot.xlabel("epochs") matplotlib.pyplot.ylabel("training test score") for key, value in architecture_epoch_score.iteritems(): epochs = [element[0] for element in value] score_test = [element[1] for element in value] matplotlib.pyplot.plot(epochs, score_test, label=key) matplotlib.pyplot.legend(loc="center right") matplotlib.pyplot.savefig("hyperparameter_map.eps", bbox_inches="tight", format="eps") # find best-scoring models # Find the 3 best scores. best_models = sorted(zip(grid_search_map["score_test"], grid_search_map["hidden_nodes"]), reverse=True)[:3] # table table_contents = [["architecture", "score testing"]] for model in best_models: table_contents.append([str(model[1]), str(model[0])]) print("\nbest-scoring models:\n") print(pyprel.Table(contents=table_contents, )) log.info("") program.terminate()
def main(options): global program program = propyte.Program(options=options, name=name, version=version, logo=logo) global log from propyte import log # access options and arguments database_filename = options["--database"] table_limit = options["--tableLimit"] if table_limit is not None: table_limit = int(table_limit) output_filename = options["--outputFile"] if output_filename is not None: output_filename = str(output_filename) # Access database. database = abstraction.access_database(filename=database_filename) log.info("\ndatabase metadata:") abstraction.log_database_metadata(filename=database_filename) log.info("") # Print the tables in the database. log.info("tables in database: {tables}".format(tables=database.tables)) # Access the exchanges table. table_name = "exchanges" log.info("access table \"{table_name}\"".format(table_name=table_name)) # Print the columns of the table. log.info("columns in table \"{table_name}\": {columns}".format( table_name=table_name, columns=database[table_name].columns)) # Print the number of rows of the table. log.info( "number of rows in table \"{table_name}\": {number_of_rows}".format( table_name=table_name, number_of_rows=str(len(database[table_name])))) # Print the table entries: log.info("entries of table {table_name}:\n".format(table_name=table_name)) # Define table headings. table_contents = [[ "id", "utterance", "response", "utteranceTimeUNIX", "responseTimeUNIX", "utteranceReference", "responseReference", "exchangeReference" ]] simple_training_representation = "" # Fill table data. count_entries = 0 for entry in database[table_name].all(): table_contents.append([ str(entry["id"]), str(entry["utterance"]), str(entry["response"]), str(entry["utteranceTimeUNIX"]), str(entry["responseTimeUNIX"]), str(entry["utteranceReference"]), str(entry["responseReference"]), str(entry["exchangeReference"]) ]) count_entries += 1 # simple training representation if output_filename is not None: if simple_training_representation is "": simple_training_representation = \ str(entry["utterance"]) + \ " => " + \ str(entry["response"]) else: simple_training_representation = \ simple_training_representation + \ "\n" + \ str(entry["utterance"]) + \ " => " + \ str(entry["response"]) if table_limit is not None: if count_entries >= table_limit: break # Record table. print(pyprel.Table(contents=table_contents)) # Record to file, if specified. if output_filename is not None: log.info( "save simple training representation to file {filename}".format( filename=output_filename)) output_file = open(output_filename, "w") output_file.write(simple_training_representation) output_file.close() program.terminate()