def main():
    sentences = shijian.List_Consensus([
        "This is a test.", "This test.", "This is a test.",
        "This is not a test.", "This is a test.", "This is a test.",
        "This is a test.", "This is a test.", "This is a test.",
        "This is a test."
    ])
    print("list:\n{list}".format(list=sentences))
    print("list size: {size}".format(size=sys.getsizeof(sentences)))
    print(
        "list consensus: {consensus}".format(consensus=sentences.consensus()))
    print("ensure list size...")
    sentences.ensure_size()
    pyprel.print_line()
    print("append to list multiple times while ensuring list size...")
    print("list:\n{list}".format(list=sentences))
    print("append to list while ensuring list size...")
    sentences.append("This is another test.")
    print("list:\n{list}".format(list=sentences))
    print("append to list while ensuring list size...")
    sentences.append("This is another test.")
    print("list:\n{list}".format(list=sentences))
    print("append to list while ensuring list size...")
    sentences.append("This is another test.")
    print("list:\n{list}".format(list=sentences))
    print("append to list while ensuring list size...")
    sentences.append("This is another test.")
    print("list:\n{list}".format(list=sentences))
    print("append to list while ensuring list size...")
    sentences.append("This is another test.")
    print("list:\n{list}".format(list=sentences))
    pyprel.print_line()
    print("list:\n{list}".format(list=sentences))
Beispiel #2
0
 def engage(
     self
     ):
     if not self.silent:
         pyprel.print_line()
     # logo
     if self.display_logo:
         if not self.silent:
             log.info(pyprel.center_string(text = self.logo))
             pyprel.print_line()
     # engage alert
     if self.name:
         if not self.silent:
             log.info("initiate {name}".format(
                 name = self.name
             ))
     # version
     if self.version:
         if not self.silent:
             log.info("version: {version}".format(
                 version = self.version
             ))
     if not self.silent:
         log.info("initiation time: {time}".format(
             time = clock.start_time()
         ))
Beispiel #3
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    expression = options["--expression"]
    word_vector_model = options["--wordvectormodel"]

    model_word2vec = abstraction.load_word_vector_model(
        filename=word_vector_model)

    sentences = [
        "What are you dirty hooers doing on my planet?", "What time is it?",
        "What can you do?", "Change the color from red to black.",
        "All those moments will be lost in time.",
        "All of those moments will be lost in time.",
        "All of those moments are to be lost in time."
    ]

    result = most_similar_expression(expression=expression,
                                     expressions=sentences,
                                     model_word2vec=model_word2vec)

    pyprel.print_line()
    log.info(
        "input expression:        {expression}".format(expression=expression))
    log.info("most similar expression: {expression}".format(expression=result))
    pyprel.print_line()

    program.terminate()
Beispiel #4
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    # access options and arguments
    database_filename = options["--database"]

    log.info("")

    database = abstraction.access_database(filename=database_filename)

    for table in database.tables:
        log.info("\ntable: {table}/n".format(table=table))
        for entry in database[table].all():
            pyprel.print_line()
            for column in database[table].columns:
                log.info("\n{column}: {content}".format(column=column,
                                                        content=str(
                                                            entry[column])))
        pyprel.print_line()

    log.info("")

    program.terminate()
Beispiel #5
0
def main():

    pyprel.print_line()

    print("\nexample: accessing a palette and extending it\n")
    palette_name = "palette1"
    print("access palette {name}".format(name=palette_name))
    palette = pyprel.access_palette(name=palette_name)
    print("palette colors default: {colors}".format(colors=palette))
    minimum_number_of_colors_needed = 3
    print(
        "extend palette to ensure that it has at least {number} colors".format(
            number=minimum_number_of_colors_needed))
    palette.extend_palette(
        minimum_number_of_colors_needed=minimum_number_of_colors_needed)
    print("palette colors: {colors}".format(colors=palette))
    minimum_number_of_colors_needed = 20
    print(
        "extend palette to ensure that it has at least {number} colors".format(
            number=minimum_number_of_colors_needed))
    palette.extend_palette(
        minimum_number_of_colors_needed=minimum_number_of_colors_needed)
    print("palette colors: {colors}".format(colors=palette))

    pyprel.print_line()
Beispiel #6
0
def main():

    pyprel.print_line()

    print("\nexample: accessing a palette and extending it\n")
    palette_name = "palette1"
    print("access palette {name}".format(name = palette_name))
    palette = pyprel.access_palette(name = palette_name)
    print("palette colors default: {colors}".format(colors = palette))
    minimum_number_of_colors_needed = 3
    print(
        "extend palette to ensure that it has at least {number} colors".format(
            number = minimum_number_of_colors_needed
    ))
    palette.extend_palette(
        minimum_number_of_colors_needed = minimum_number_of_colors_needed
    )
    print("palette colors: {colors}".format(colors = palette))
    minimum_number_of_colors_needed = 20
    print(
        "extend palette to ensure that it has at least {number} colors".format(
            number = minimum_number_of_colors_needed
    ))
    palette.extend_palette(
        minimum_number_of_colors_needed = minimum_number_of_colors_needed
    )
    print("palette colors: {colors}".format(colors = palette))

    pyprel.print_line()
Beispiel #7
0
 def terminate(self):
     clock.stop()
     if not self.silent:
         log.info("termination time: {time}".format(time=clock.stop_time()))
         log.info("time statistics report:\n{report}".format(
             report=shijian.clocks.report()))
         log.info("terminate {name}".format(name=self.name))
         pyprel.print_line()
     sys.exit()
Beispiel #8
0
 def terminate(self):
     clock.stop()
     log.info("termination time: {time}".format(time=clock.stop_time()))
     log.info("time full report:\n{report}".format(
         report=shijian.clocks.report(style="full")))
     log.info("time statistics report:\n{report}".format(
         report=shijian.clocks.report()))
     log.info("terminate {name}".format(name=self.name))
     pyprel.print_line()
Beispiel #9
0
 def engage(self):
     pyprel.print_line()
     # logo
     if self.display_logo:
         log.info(pyprel.center_string(text=self.logo))
         pyprel.print_line()
     # engage alert
     if self.name:
         log.info("initiate {name}".format(name=self.name))
     # version
     if self.version:
         log.info("version: {version}".format(version=self.version))
     log.info("initiation time: {time}".format(time=clock.start_time()))
def main():

    sentences = shijian.List_Consensus([
        "This is a test.",
        "This test.",
        "This is a test.",
        "This is not a test.",
        "This is a test.",
        "This is a test.",
        "This is a test.",
        "This is a test.",
        "This is a test.",
        "This is a test."
    ])

    print("list:\n{list}".format(list = sentences))
    print("list size: {size}".format(size = sys.getsizeof(sentences)))
    print("list consensus: {consensus}".format(consensus = sentences.consensus()))
    print("ensure list size...")
    sentences.ensure_size()

    pyprel.print_line()

    print("append to list multiple times while ensuring list size...")

    print("list:\n{list}".format(list = sentences))

    print("append to list while ensuring list size...")
    sentences.append("This is another test.")
    print("list:\n{list}".format(list = sentences))

    print("append to list while ensuring list size...")
    sentences.append("This is another test.")
    print("list:\n{list}".format(list = sentences))

    print("append to list while ensuring list size...")
    sentences.append("This is another test.")
    print("list:\n{list}".format(list = sentences))

    print("append to list while ensuring list size...")
    sentences.append("This is another test.")
    print("list:\n{list}".format(list = sentences))

    print("append to list while ensuring list size...")
    sentences.append("This is another test.")

    print("list:\n{list}".format(list = sentences))

    pyprel.print_line()

    print("list:\n{list}".format(list = sentences))
Beispiel #11
0
def main():

    pyprel.print_line()

    number = 1234567890123
    print("number {number} in English text:\n\n{number_text}".format(
        number=number, number_text=shijian.number_to_English_text(number)))

    pyprel.print_line()

    text = "It is 03:14 and I have 3 apples in 400 wormholes."
    print("replace numbers with English text in the following text:\n{text}\n".
          format(text=text))
    print(shijian.replace_numbers_in_text_with_English_text(text=text))

    pyprel.print_line()

    text = "I've ten important invasions to consider."
    print(
        "change contractions to full words and change numbers to digits in the following text:\n{text}\n"
        .format(text=text))
    print(
        shijian.
        replace_contractions_with_full_words_and_replace_numbers_with_digits(
            text=text))

    pyprel.print_line()
def main():

    pyprel.print_line()

    number = 1234567890123
    print("number {number} in English text:\n\n{number_text}".format(
        number = number,
        number_text = shijian.number_to_English_text(number)
    ))

    pyprel.print_line()

    text = "It is 03:14 and I have 3 apples in 400 wormholes."
    print("replace numbers with English text in the following text:\n{text}\n".format(
        text = text
    ))
    print(shijian.replace_numbers_in_text_with_English_text(text = text))

    pyprel.print_line()

    text = "I've ten important invasions to consider."
    print("change contractions to full words and change numbers to digits in the following text:\n{text}\n".format(
        text = text
    ))
    print(shijian.replace_contractions_with_full_words_and_replace_numbers_with_digits(text = text))

    pyprel.print_line()
Beispiel #13
0
 def restart(
     self
     ):
     clock.stop()
     if not self.silent:
         log.info("termination time: {time}".format(
             time = clock.stop_time()
         ))
         log.info("time statistics report:\n{report}".format(
             report = shijian.clocks.report()
         ))
         log.info("terminate {name}".format(
             name = self.name
         ))
         pyprel.print_line()
     restart()
def main():

    pyprel.print_line()

    print("time styles:")
    datetime_object_current_time_UTC = datetime.datetime.utcnow()
    styles = [
        "YYYY-MM-DDTHHMMSSZ",
        "YYYY-MM-DDTHHMMZ",
        "YYYY-MM-DDTHHMMSSMMMMMMZ",
        "YYYY-MM-DD HH:MM:SS UTC",
        "UNIX time S.SSSSSS",
        "UNIX time S",
        "day DD month YYYY",
        "HH:MM day DD month YYYY",
        "HH:MM:SS day DD month YYYY",
        "day DD month YYYY HH:MM:SS",
        "HH hours MM minutes SS seconds day DD month YYYY",
        "DD:HH:MM",
        "DD:HH:MM:SS",
        "HH:MM:SS",
        "HH hours MM minutes SS seconds"
    ]
    for style in styles:
        print("\nstyle: {style}".format(
            style = style
        ))
        print(shijian.style_datetime_object(
            datetime_object = datetime_object_current_time_UTC,
            style           = style
        ))

    pyprel.print_line()

    print("current time UTC:\n")
    print(shijian.time_UTC(style = "HH hours MM minutes SS sounds day DD month YYYY"))

    pyprel.print_line()

    print("minimal time style for seconds:\n")
    for seconds in [10, 100, 1000, 10000, 100000]:
        print("{seconds} seconds: {seconds_styled}".format(
            seconds        = seconds,
            seconds_styled = shijian.style_minimal_seconds(seconds)
        ))

    pyprel.print_line()
Beispiel #15
0
def main(options):

    global program
    program = propyte.Program(
        options = options,
        name    = name,
        version = version,
        logo    = logo
        )
    global log
    from propyte import log

    # access options and arguments
    database_filename = options["--database"]

    log.info("")

    database = abstraction.access_database(
        filename = database_filename
    )

    for table in database.tables:
        log.info("\ntable: {table}/n".format(
            table = table
        ))
        for entry in database[table].all():
            pyprel.print_line()
            for column in database[table].columns:
                log.info("\n{column}: {content}".format(
                    column  = column,
                    content = str(entry[column])
                ))
        pyprel.print_line()

    log.info("")

    program.terminate()
def main():
    pyprel.print_line()
    print("time styles:")
    datetime_object_current_time_UTC = datetime.datetime.utcnow()
    styles = [
        "YYYY-MM-DDTHHMMSSZ",
        "YYYY-MM-DDTHHMMZ",
        "YYYY-MM-DDTHHMMSSMMMMMMZ",
        "YYYY-MM-DD HH:MM:SS UTC",
        "UNIX time S.SSSSSS",
        "UNIX time S",
        "day DD month YYYY",
        "HH:MM day DD month YYYY",
        "HH:MM:SS day DD month YYYY",
        "day DD month YYYY HH:MM:SS",
        "HH hours MM minutes SS seconds day DD month YYYY",
        "DD:HH:MM",
        "DD:HH:MM:SS",
        "HH:MM:SS",
        "HH hours MM minutes SS seconds"
    ]
    for style in styles:
        print("\nstyle: {style}".format(
            style = style
        ))
        print(shijian.style_datetime_object(
            datetime_object = datetime_object_current_time_UTC,
            style           = style
        ))
    pyprel.print_line()
    print("current time UTC:\n")
    print(shijian.time_UTC(style = "HH hours MM minutes SS sounds day DD month YYYY"))
    pyprel.print_line()
    print("minimal time style for seconds:\n")
    for seconds in [10, 100, 1000, 10000, 100000]:
        print("{seconds} seconds: {seconds_styled}".format(
            seconds        = seconds,
            seconds_styled = shijian.style_minimal_seconds(seconds)
        ))
    pyprel.print_line()
Beispiel #17
0
def main(options):

    global program
    program = Program(options=options)

    # Print the program options dictionary and the program configuration
    # dictionary.
    pyprel.print_line()
    log.info("program options dictionary:")
    pyprel.print_dictionary(dictionary=program.options)
    pyprel.print_line()
    log.info("program configuration dictionary:")
    pyprel.print_dictionary(dictionary=program.configuration)
    pyprel.print_line()

    # Access a value of the program configuration dictionary.
    log.info("accessing a value of the program configuration")
    if "attribute1" in program.configuration["settings1"]["item1"]:
        log.info("attribute1 of item2 of settings1: {attribute}".format(
            attribute=program.configuration["settings1"]["item1"]
            ["attribute1"]))

    # Access a value of the program configuration dictionary that does not exist
    # and then assign to it a default value.
    log.info("accessing a nonexistent value of the program configuration")
    log.info("attribute3 of item1 of settings1: {attribute}".format(
        attribute=program.configuration["settings1"]["item1"].get(
            "attribute3", "nonexistent")))

    # Loop over multiple values of the program configuration dictionary.
    log.info("loading items of settings1")
    for name, attributes in program.configuration["settings1"].iteritems():
        log.info("loading item {name}".format(name=name))
        log.info("attributes of item 2 of settings 1: {attributes}".format(
            attributes=attributes))

    log.debug("message at level DEBUG")
    log.info("message at level INFO")
    log.warning("message at level WARNING")
    log.error("message at level ERROR")
    log.critical("message at level CRITICAL")

    # activity
    time.sleep(2)
    log.info("\nrun function 1 three times...")
    for run_number in xrange(1, 4):
        log.info("function 1 run {run_number} result: {result}".format(
            run_number=run_number, result=function_1()))
    log.info("")
    program.terminate()
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    log.info("")

    # access options and arguments
    ROOT_filename_ttH = options["--datattH"]
    ROOT_filename_ttbb = options["--datattbb"]
    engage_plotting = string_to_bool(options["--plot"])

    log.info("ttH data file: {filename}".format(filename=ROOT_filename_ttH))
    log.info("ttbb data file: {filename}".format(filename=ROOT_filename_ttbb))

    # Access data for event classes ttbb and ttH.

    data_ttbb = abstraction.load_HEP_data(ROOT_filename=ROOT_filename_ttbb,
                                          tree_name="nominal",
                                          maximum_number_of_events=None)

    data_ttH = abstraction.load_HEP_data(ROOT_filename=ROOT_filename_ttH,
                                         tree_name="nominal",
                                         maximum_number_of_events=None)

    if engage_plotting is True:

        # Plot the loaded datasets.

        for variable_name in data_ttbb.variables():
            log.info(
                "plot ttbb versus ttH comparison of {variable_name}".format(
                    variable_name=variable_name))
            datavision.save_histogram_comparison_matplotlib(
                values_1=data_ttbb.values(name=variable_name),
                values_2=data_ttH.values(name=variable_name),
                label_1=variable_name + "_ttbb",
                label_2=variable_name + "_ttH",
                normalize=True,
                label_ratio_x="frequency",
                label_y="",
                title=variable_name + "_ttbb_ttH",
                filename=variable_name + "_ttbb_ttH.png")

    # upcoming: consider data ordering

    # Preprocess all data (to be updated).

    data_ttbb.preprocess_all()
    data_ttH.preprocess_all()

    # Add class labels to the data sets, 0 for ttbb and 1 for ttH.

    for index in data_ttbb.indices():
        data_ttbb.variable(index=index, name="class", value=0)

    for index in data_ttH.indices():
        data_ttH.variable(index=index, name="class", value=1)

    # Convert the data sets to a simple list format with the first column
    # containing the class label.
    _data = []
    for index in data_ttbb.indices():
        _data.append([
            data_ttbb.variable(index=index, name="el_1_pt"),
            data_ttbb.variable(index=index, name="el_1_eta"),
            data_ttbb.variable(index=index, name="el_1_phi"),
            data_ttbb.variable(index=index, name="jet_1_pt"),
            data_ttbb.variable(index=index, name="jet_1_eta"),
            data_ttbb.variable(index=index, name="jet_1_phi"),
            data_ttbb.variable(index=index, name="jet_1_e"),
            data_ttbb.variable(index=index, name="jet_2_pt"),
            data_ttbb.variable(index=index, name="jet_2_eta"),
            data_ttbb.variable(index=index, name="jet_2_phi"),
            data_ttbb.variable(index=index, name="jet_2_e"),
            data_ttbb.variable(index=index, name="met"),
            data_ttbb.variable(index=index, name="met_phi"),
            data_ttbb.variable(index=index, name="nJets"),
            data_ttbb.variable(index=index, name="Centrality_all"),
            #data_ttbb.variable(index = index, name = "Mbb_MindR")
        ])
        _data.append([data_ttbb.variable(name="class")])
    for index in data_ttH.indices():
        _data.append([
            data_ttH.variable(index=index, name="el_1_pt"),
            data_ttH.variable(index=index, name="el_1_eta"),
            data_ttH.variable(index=index, name="el_1_phi"),
            data_ttH.variable(index=index, name="jet_1_pt"),
            data_ttH.variable(index=index, name="jet_1_eta"),
            data_ttH.variable(index=index, name="jet_1_phi"),
            data_ttH.variable(index=index, name="jet_1_e"),
            data_ttH.variable(index=index, name="jet_2_pt"),
            data_ttH.variable(index=index, name="jet_2_eta"),
            data_ttH.variable(index=index, name="jet_2_phi"),
            data_ttH.variable(index=index, name="jet_2_e"),
            data_ttH.variable(index=index, name="met"),
            data_ttH.variable(index=index, name="met_phi"),
            data_ttH.variable(index=index, name="nJets"),
            data_ttH.variable(index=index, name="Centrality_all"),
            #data_ttH.variable(index = index, name = "Mbb_MindR")
        ])
        _data.append([data_ttH.variable(name="class")])
    dataset = abstraction.Dataset(data=_data)

    log.info("")

    # define data

    log.info("split data for cross-validation")
    features_train, features_test, targets_train, targets_test =\
        cross_validation.train_test_split(
            dataset.features(),
            dataset.targets(),
            train_size = 0.7
        )
    # grid search

    import itertools

    epochs = [100, 100000]
    architecture = [200, 300, 300, 200]

    grid_search_map = {}
    grid_search_map["epoch"] = []
    grid_search_map["hidden_nodes"] = []
    grid_search_map["score_training"] = []
    grid_search_map["score_test"] = []

    # define progress
    count_total = 0
    for epoch in epochs:
        for nodes_count in xrange(1, len(architecture) + 1):
            combinations = itertools.product(architecture, repeat=nodes_count)
            for combination in combinations:
                count_total += 1
    count = 0
    progress = shijian.Progress()
    progress.engage_quick_calculation_mode()

    for epoch in epochs:
        for nodes_count in xrange(1, len(architecture) + 1):
            combinations = itertools.product(architecture, repeat=nodes_count)
            for combination in combinations:
                hidden_nodes = list(combination)

                # define model

                log.info("define classification model")
                classifier = abstraction.Classification(
                    number_of_classes=2,
                    hidden_nodes=hidden_nodes,
                    epochs=epoch)

                # train model

                log.info("fit model to dataset features and targets")
                classifier._model.fit(features_train, targets_train)
                #classifier.save()

                # predict and cross-validate training

                log.info("test trained model on training dataset")
                score_training = metrics.accuracy_score(
                    classifier._model.predict(features_train), targets_train)
                score_test = metrics.accuracy_score(
                    classifier._model.predict(features_test), targets_test)
                log.info("\ntraining-testing instance complete:")
                log.info("epoch:          {epoch}".format(epoch=epoch))
                log.info("architecture:   {architecture}".format(
                    architecture=hidden_nodes))
                log.info("score training: {score_training}".format(
                    score_training=100 * score_training))
                log.info("score test:     {score_test}".format(score_test=100 *
                                                               score_test))
                pyprel.print_line()
                grid_search_map["epoch"].append(epoch)
                grid_search_map["hidden_nodes"].append(hidden_nodes)
                grid_search_map["score_training"].append(score_training)
                grid_search_map["score_test"].append(score_test)

                # save current grid search map
                shijian.export_object(grid_search_map,
                                      filename="grid_search_map.pkl",
                                      overwrite=True)

                count += 1
                print(progress.add_datum(fraction=(count + 1) / count_total))

    number_of_entries = len(grid_search_map["epoch"])

    # table

    table_contents = [[
        "epoch", "architecture", "score training", "score testing"
    ]]
    for index in range(0, number_of_entries):
        table_contents.append([
            str(grid_search_map["epoch"][index]),
            str(grid_search_map["hidden_nodes"][index]),
            str(grid_search_map["score_training"][index]),
            str(grid_search_map["score_test"][index])
        ])
    print("\ngrid search map:\n")
    print(pyprel.Table(contents=table_contents, ))

    # plot

    architectures = shijian.unique_list_elements(
        grid_search_map["hidden_nodes"])

    architecture_epoch_score = {}
    for architecture in architectures:
        architecture_epoch_score[str(architecture)] = []
        for index in range(0, number_of_entries):
            if grid_search_map["hidden_nodes"][index] == architecture:
                architecture_epoch_score[str(architecture)].append([
                    grid_search_map["epoch"][index],
                    grid_search_map["score_test"][index]
                ])

    figure = matplotlib.pyplot.figure()
    figure.set_size_inches(10, 10)
    axes = figure.add_subplot(1, 1, 1)
    axes.set_xscale("log")
    figure.suptitle("hyperparameter map", fontsize=20)
    matplotlib.pyplot.xlabel("epochs")
    matplotlib.pyplot.ylabel("training test score")

    for key, value in architecture_epoch_score.iteritems():
        epochs = [element[0] for element in value]
        score_test = [element[1] for element in value]
        matplotlib.pyplot.plot(epochs, score_test, label=key)

    matplotlib.pyplot.legend(loc="center right")

    matplotlib.pyplot.savefig("hyperparameter_map.eps",
                              bbox_inches="tight",
                              format="eps")

    # find best-scoring models

    # Find the 3 best scores.
    best_models = sorted(zip(grid_search_map["score_test"],
                             grid_search_map["hidden_nodes"]),
                         reverse=True)[:3]

    # table
    table_contents = [["architecture", "score testing"]]
    for model in best_models:
        table_contents.append([str(model[1]), str(model[0])])
    print("\nbest-scoring models:\n")
    print(pyprel.Table(contents=table_contents, ))

    log.info("")

    program.terminate()
def main(options):

    global program
    program = propyte.Program(
        options = options,
        name    = name,
        version = version,
        logo    = logo
        )
    global log
    from propyte import log

    log.info("")

    # access options and arguments
    ROOT_filename_ttH  = options["--datattH"]
    ROOT_filename_ttbb = options["--datattbb"]
    engage_plotting    = string_to_bool(options["--plot"])

    log.info("ttH data file: {filename}".format(
        filename = ROOT_filename_ttH
    ))
    log.info("ttbb data file: {filename}".format(
        filename = ROOT_filename_ttbb
    ))

    # Access data for event classes ttbb and ttH.

    data_ttbb = abstraction.load_HEP_data(
        ROOT_filename            = ROOT_filename_ttbb,
        tree_name                = "nominal",
        maximum_number_of_events = None
    )

    data_ttH = abstraction.load_HEP_data(
        ROOT_filename            = ROOT_filename_ttH,
        tree_name                = "nominal",
        maximum_number_of_events = None
    )

    if engage_plotting is True:

        # Plot the loaded datasets.

        for variable_name in data_ttbb.variables():
            log.info("plot ttbb versus ttH comparison of {variable_name}".format(
                variable_name = variable_name
            ))
            datavision.save_histogram_comparison_matplotlib(
                values_1      = data_ttbb.values(name = variable_name),
                values_2      = data_ttH.values(name = variable_name),
                label_1       = variable_name + "_ttbb",
                label_2       = variable_name + "_ttH",
                normalize     = True,
                label_ratio_x = "frequency",
                label_y       = "",
                title         = variable_name + "_ttbb_ttH",
                filename      = variable_name + "_ttbb_ttH.png"
            )

    # upcoming: consider data ordering

    # Preprocess all data (to be updated).

    data_ttbb.preprocess_all()
    data_ttH.preprocess_all()

    # Add class labels to the data sets, 0 for ttbb and 1 for ttH.

    for index in data_ttbb.indices():
        data_ttbb.variable(index = index, name = "class", value = 0)

    for index in data_ttH.indices():
        data_ttH.variable(index = index, name = "class", value = 1)

    # Convert the data sets to a simple list format with the first column
    # containing the class label.
    _data = []
    for index in data_ttbb.indices():
        _data.append([
            data_ttbb.variable(index = index, name = "el_1_pt"),
            data_ttbb.variable(index = index, name = "el_1_eta"),
            data_ttbb.variable(index = index, name = "el_1_phi"),
            data_ttbb.variable(index = index, name = "jet_1_pt"),
            data_ttbb.variable(index = index, name = "jet_1_eta"),
            data_ttbb.variable(index = index, name = "jet_1_phi"),
            data_ttbb.variable(index = index, name = "jet_1_e"),
            data_ttbb.variable(index = index, name = "jet_2_pt"),
            data_ttbb.variable(index = index, name = "jet_2_eta"),
            data_ttbb.variable(index = index, name = "jet_2_phi"),
            data_ttbb.variable(index = index, name = "jet_2_e"),
            data_ttbb.variable(index = index, name = "met"),
            data_ttbb.variable(index = index, name = "met_phi"),
            data_ttbb.variable(index = index, name = "nJets"),
            data_ttbb.variable(index = index, name = "Centrality_all"),
            #data_ttbb.variable(index = index, name = "Mbb_MindR")
        ])
        _data.append([
            data_ttbb.variable(name = "class")
        ])
    for index in data_ttH.indices():
        _data.append([
            data_ttH.variable(index = index, name = "el_1_pt"),
            data_ttH.variable(index = index, name = "el_1_eta"),
            data_ttH.variable(index = index, name = "el_1_phi"),
            data_ttH.variable(index = index, name = "jet_1_pt"),
            data_ttH.variable(index = index, name = "jet_1_eta"),
            data_ttH.variable(index = index, name = "jet_1_phi"),
            data_ttH.variable(index = index, name = "jet_1_e"),
            data_ttH.variable(index = index, name = "jet_2_pt"),
            data_ttH.variable(index = index, name = "jet_2_eta"),
            data_ttH.variable(index = index, name = "jet_2_phi"),
            data_ttH.variable(index = index, name = "jet_2_e"),
            data_ttH.variable(index = index, name = "met"),
            data_ttH.variable(index = index, name = "met_phi"),
            data_ttH.variable(index = index, name = "nJets"),
            data_ttH.variable(index = index, name = "Centrality_all"),
            #data_ttH.variable(index = index, name = "Mbb_MindR")
        ])
        _data.append([
            data_ttH.variable(name = "class")
        ])
    dataset = abstraction.Dataset(data = _data)

    log.info("")

    # define data
    
    log.info("split data for cross-validation")
    features_train, features_test, targets_train, targets_test =\
        cross_validation.train_test_split(
            dataset.features(),
            dataset.targets(),
            train_size = 0.7
        )
    # grid search

    import itertools

    epochs       = [100, 100000]
    architecture = [200, 300, 300, 200]

    grid_search_map = {}
    grid_search_map["epoch"]          = []
    grid_search_map["hidden_nodes"]   = []
    grid_search_map["score_training"] = []
    grid_search_map["score_test"]     = []

    # define progress
    count_total = 0
    for epoch in epochs:
        for nodes_count in xrange(1, len(architecture) + 1):
            combinations = itertools.product(architecture, repeat = nodes_count)
            for combination in combinations:
                count_total += 1
    count = 0
    progress = shijian.Progress()
    progress.engage_quick_calculation_mode()

    for epoch in epochs:
        for nodes_count in xrange(1, len(architecture) + 1):
            combinations = itertools.product(architecture, repeat = nodes_count)
            for combination in combinations:
                hidden_nodes = list(combination)

                # define model

                log.info("define classification model")
                classifier = abstraction.Classification(
                    number_of_classes = 2,
                    hidden_nodes      = hidden_nodes,
                    epochs            = epoch
                )
                
                # train model
                
                log.info("fit model to dataset features and targets")
                classifier._model.fit(features_train, targets_train)
                #classifier.save()
                
                # predict and cross-validate training
                
                log.info("test trained model on training dataset")
                score_training = metrics.accuracy_score(
                    classifier._model.predict(features_train),
                    targets_train
                )
                score_test = metrics.accuracy_score(
                    classifier._model.predict(features_test),
                    targets_test
                )
                log.info("\ntraining-testing instance complete:")
                log.info("epoch:          {epoch}".format(
                    epoch = epoch
                ))
                log.info("architecture:   {architecture}".format(
                    architecture = hidden_nodes
                ))
                log.info("score training: {score_training}".format(
                    score_training = 100 * score_training
                ))
                log.info("score test:     {score_test}".format(
                    score_test = 100 * score_test
                ))
                pyprel.print_line()
                grid_search_map["epoch"].append(epoch)
                grid_search_map["hidden_nodes"].append(hidden_nodes)
                grid_search_map["score_training"].append(score_training)
                grid_search_map["score_test"].append(score_test)

                # save current grid search map
                shijian.export_object(
                    grid_search_map,
                    filename  = "grid_search_map.pkl",
                    overwrite = True
                )

                count += 1
                print(progress.add_datum(fraction = (count + 1) / count_total))

    number_of_entries = len(grid_search_map["epoch"])

    # table

    table_contents = [
        ["epoch", "architecture", "score training", "score testing"]
    ]
    for index in range(0, number_of_entries):
        table_contents.append(
            [
                str(grid_search_map["epoch"][index]),
                str(grid_search_map["hidden_nodes"][index]),
                str(grid_search_map["score_training"][index]),
                str(grid_search_map["score_test"][index])
            ]
        )
    print("\ngrid search map:\n")
    print(
        pyprel.Table(
            contents = table_contents,
        )
    )

    # plot

    architectures = shijian.unique_list_elements(grid_search_map["hidden_nodes"])

    architecture_epoch_score = {}
    for architecture in architectures:
        architecture_epoch_score[str(architecture)] = []
        for index in range(0, number_of_entries):
            if grid_search_map["hidden_nodes"][index] == architecture:
                architecture_epoch_score[str(architecture)].append(
                    [
                        grid_search_map["epoch"][index],
                        grid_search_map["score_test"][index]
                    ]
                )
    
    figure = matplotlib.pyplot.figure()
    figure.set_size_inches(10, 10)
    axes = figure.add_subplot(1, 1, 1)
    axes.set_xscale("log")
    figure.suptitle("hyperparameter map", fontsize = 20)
    matplotlib.pyplot.xlabel("epochs")
    matplotlib.pyplot.ylabel("training test score")

    for key, value in architecture_epoch_score.iteritems():
        epochs     = [element[0] for element in value]
        score_test = [element[1] for element in value]
        matplotlib.pyplot.plot(epochs, score_test, label = key)
    
    matplotlib.pyplot.legend(loc = "center right")

    matplotlib.pyplot.savefig(
        "hyperparameter_map.eps",
        bbox_inches = "tight",
        format      = "eps"
    )

    # find best-scoring models

    # Find the 3 best scores.
    best_models = sorted(zip(
        grid_search_map["score_test"],
        grid_search_map["hidden_nodes"]),
        reverse = True
    )[:3]

    # table
    table_contents = [["architecture", "score testing"]]
    for model in best_models:
        table_contents.append([str(model[1]), str(model[0])])
    print("\nbest-scoring models:\n")
    print(
        pyprel.Table(
            contents = table_contents,
        )
    )

    log.info("")

    program.terminate()
def main(options):

    global program
    program = propyte.Program(
        options = options,
        name    = name,
        version = version,
        logo    = logo
        )
    global log
    from propyte import log

    log.info("")

    # access options and arguments
    input_data_filename = options["--data"]

    # define dataset

    # Load the SUSY dataset (https://archive.ics.uci.edu/ml/datasets/SUSY). 
    # The first column is the class label (1 for signal, 0 for background),
    # followed by 18 features (8 low-level features and 10 high-level features):
    #
    # - lepton 1 pT
    # - lepton 1 eta
    # - lepton 1 phi
    # - lepton 2 pT
    # - lepton 2 eta
    # - lepton 2 phi
    # - missing energy magnitude
    # - missing energy phi
    # - MET_rel
    # - axial MET
    # - M_R
    # - M_TR_2
    # - R
    # - MT2
    # - S_R
    # - M_Delta_R
    # - dPhi_r_b
    # - cos(theta_r1)

    data = abstraction.access_SUSY_dataset_format_file(input_data_filename)

    dataset = abstraction.Dataset(
        data = data
    )

    # define data

    log.info("split data for cross-validation")
    features_train, features_test, targets_train, targets_test =\
        cross_validation.train_test_split(
            dataset.features(),
            dataset.targets(),
            train_size = 0.7
        )

    # grid search

    import itertools

    epochs       = [10, 100, 500, 1000]
    architecture = [200, 300, 300, 300, 200]

    grid_search_map = {}
    grid_search_map["epoch"]          = []
    grid_search_map["hidden_nodes"]   = []
    grid_search_map["score_training"] = []
    grid_search_map["score_test"]     = []

    # define progress
    count_total = 0
    for epoch in epochs:
        for nodes_count in xrange(1, len(architecture) + 1):
            combinations = itertools.product(architecture, repeat = nodes_count)
            for combination in combinations:
                count_total += 1
    count = 0
    progress = shijian.Progress()
    progress.engage_quick_calculation_mode()

    for epoch in epochs:
        for nodes_count in xrange(1, len(architecture) + 1):
            combinations = itertools.product(architecture, repeat = nodes_count)
            for combination in combinations:
                hidden_nodes = list(combination)

                # define model

                log.info("define classification model")
                classifier = abstraction.Classification(
                    number_of_classes = 2,
                    hidden_nodes      = hidden_nodes,
                    epochs            = epoch
                )
                
                # train model
                
                log.info("fit model to dataset features and targets")
                classifier._model.fit(features_train, targets_train)
                #classifier.save()
                
                # predict and cross-validate training
                
                log.info("test trained model on training dataset")
                score_training = metrics.accuracy_score(
                    classifier._model.predict(features_train),
                    targets_train
                )
                score_test = metrics.accuracy_score(
                    classifier._model.predict(features_test),
                    targets_test
                )
                log.info("\ntraining-testing instance complete:")
                log.info("epoch:          {epoch}".format(
                    epoch = epoch
                ))
                log.info("architecture:   {architecture}".format(
                    architecture = hidden_nodes
                ))
                log.info("score training: {score_training}".format(
                    score_training = 100 * score_training
                ))
                log.info("score test:     {score_test}".format(
                    score_test = 100 * score_test
                ))
                pyprel.print_line()
                grid_search_map["epoch"].append(epoch)
                grid_search_map["hidden_nodes"].append(hidden_nodes)
                grid_search_map["score_training"].append(score_training)
                grid_search_map["score_test"].append(score_test)

                # save current grid search map
                shijian.export_object(
                    grid_search_map,
                    filename = "grid_search_map.pkl",
                    overwrite = True
                )

                count += 1
                print(progress.add_datum(fraction = (count + 1) / count_total))

    number_of_entries = len(grid_search_map["epoch"])

    # table

    table_contents = [
        ["epoch", "architecture", "score training", "score testing"]
    ]
    for index in range(0, number_of_entries):
        table_contents.append(
            [
                str(grid_search_map["epoch"][index]),
                str(grid_search_map["hidden_nodes"][index]),
                str(grid_search_map["score_training"][index]),
                str(grid_search_map["score_test"][index])
            ]
        )
    print("\ngrid search map:\n")
    print(
        pyprel.Table(
            contents = table_contents,
        )
    )

    # plot

    architectures = shijian.unique_list_elements(grid_search_map["hidden_nodes"])

    architecture_epoch_score = {}
    for architecture in architectures:
        architecture_epoch_score[str(architecture)] = []
        for index in range(0, number_of_entries):
            if grid_search_map["hidden_nodes"][index] == architecture:
                architecture_epoch_score[str(architecture)].append(
                    [
                        grid_search_map["epoch"][index],
                        grid_search_map["score_test"][index]
                    ]
                )
    
    figure = matplotlib.pyplot.figure()
    figure.set_size_inches(10, 10)
    axes = figure.add_subplot(1, 1, 1)
    axes.set_xscale("log")
    figure.suptitle("hyperparameter map", fontsize = 20)
    matplotlib.pyplot.xlabel("epochs")
    matplotlib.pyplot.ylabel("training test score")

    for key, value in architecture_epoch_score.iteritems():
        epochs     = [element[0] for element in value]
        score_test = [element[1] for element in value]
        matplotlib.pyplot.plot(epochs, score_test, label = key)
    
    matplotlib.pyplot.legend(loc = "center right")

    matplotlib.pyplot.savefig(
        "hyperparameter_map.eps",
        bbox_inches = "tight",
        format      = "eps"
    )

    # find best-scoring models

    # Find the 3 best scores.
    best_models = sorted(zip(
        grid_search_map["score_test"],
        grid_search_map["hidden_nodes"]),
        reverse = True
    )[:3]

    # table
    table_contents = [["architecture", "score testing"]]
    for model in best_models:
        table_contents.append([str(model[1]), str(model[0])])
    print("\nbest-scoring models:\n")
    print(
        pyprel.Table(
            contents = table_contents,
        )
    )

    log.info("")

    program.terminate()
Beispiel #21
0
def main():

    pyprel.print_line()

    print("current time UTC:\n")
    print(
        shijian.style_datetime_object(
            datetime_object = datetime.datetime.utcnow(),
            style = "HH hours MM minutes SS sounds day DD month YYYY"
        )
    )

    pyprel.print_line()

    year_ICHEP_2016  = 2016
    month_ICHEP_2016 = 8
    day_ICHEP_2016   = 3
    datetime_object_ICHEP_2016_time = datetime.datetime(
        year_ICHEP_2016,
        month_ICHEP_2016,
        day_ICHEP_2016
    )
    datetime_object_current_time_UTC = datetime.datetime.utcnow()
    datetime_object_current_time_UTC_to_ICHEP_2016_time =\
        datetime_object_ICHEP_2016_time - datetime_object_current_time_UTC

    print("time to ICHEP 2016 (DD:HH:MM:SS):\n")
    print(
        shijian.style_datetime_object(
            datetime_object = datetime_object_current_time_UTC_to_ICHEP_2016_time,
            style = "{DD}:{HH}:{MM}:{SS}"
        )
    )

    pyprel.print_line()

    current_time_UTC = shijian.style_datetime_object(
        datetime_object = datetime_object_current_time_UTC,
        style           = "DD:HH:MM:SS"
    )

    print("current time UTC:")
    print(pyprel.render_segment_display(text = current_time_UTC))
    print(" D  D     H  H     M  M     S  S")

    pyprel.print_line()

    current_time_to_ICHEP_2016_time = shijian.style_datetime_object(
        datetime_object = datetime_object_current_time_UTC_to_ICHEP_2016_time,
        style = "{DD}:{HH}:{MM}:{SS}"
    )

    print("time to ICHEP 2016:")
    print(pyprel.render_segment_display(text = current_time_to_ICHEP_2016_time))
    print(" D  D     H  H     M  M     S  S")

    pyprel.print_line()

    timestamp = 1487600377.0
    print("convert UNIX timestamp {timestamp} to YYYY-MM-DDTHHMM".format(
        timestamp = timestamp
    ))
    print(shijian.style_UNIX_timestamp(
        timestamp = timestamp,
        style     = "YYYY-MM-DDTHHMMZ"
    ))

    pyprel.print_line()
Beispiel #22
0
def main():

    print("\nexample: printout of dictionary")
    get_input("Press Enter to continue.")

    information = {
        "sample information": {
            "ID": 169888,
            "name": "ttH",
            "number of events": 124883,
            "cross section": 0.055519,
            "k factor": 1.0201,
            "generator": "pythia8",
            "variables": {
                "trk_n": 147,
                "zappo_n": 9001
            }
        }
    }

    pyprel.print_line()
    pyprel.print_dictionary(dictionary = information)
    pyprel.print_line()
    print(pyprel.dictionary_string(dictionary = information))
    pyprel.print_line()

    print("\nexample: printout of existing logo")
    get_input("Press Enter to continue.")

    text = (
    "   ____      _            _____ _                \n"
    "  / ___|___ | | ___  _ __|  ___| | _____      __ \n"
    " | |   / _ \| |/ _ \| '__| |_  | |/ _ \ \ /\ / / \n"
    " | |__| (_) | | (_) | |  |  _| | | (_) \ V  V /  \n"
    "  \____\___/|_|\___/|_|  |_|   |_|\___/ \_/\_/   "
    )

    pyprel.print_center(text = text)

    print("\nexample: rendering and printout of logo")
    get_input("Press Enter to continue.")

    name = "aria"
    logo = pyprel.render_banner(
        text = name.upper()
    )
    pyprel.print_line()
    print(pyprel.center_string(text = logo))
    pyprel.print_line()

    print("\nexample: rendering and printout segment display")
    get_input("Press Enter to continue.")

    print(pyprel.render_segment_display(text = "0123456789"))

    print("\nexample: printout of tables")
    get_input("Press Enter to continue.")

    table_contents = [
        ["heading 1", "heading 2"],
        ["some text", "some more text"],
        ["lots and lots and lots and lots and lots of text", "some more text"]
    ]
    print(
        pyprel.Table(
            contents = table_contents,
            column_width = 25
        )
    )
    print(
        pyprel.Table(
            contents = table_contents,
            table_width_requested = 30
        )
    )
    print(
        pyprel.Table(
            contents = table_contents,
            table_width_requested = 30,
            hard_wrapping = True
        )
    )
    print(
        pyprel.Table(
            contents = table_contents
        )
    )
    pyprel.print_center(
        text = pyprel.Table(
            contents = table_contents,
            table_width_requested = 30
        ).__str__()
    )
    print(
        pyprel.Table(
            contents = table_contents,
            column_width = 25,
            column_delimiter = "||"
        )
    )
    print(
        pyprel.Table(
            contents = table_contents,
            column_width = 25,
            row_delimiter = "~"
        )
    )
    table_contents = [
        [
            "heading 1",
            "heading 2",
            "heading 3"
        ],
        [
            "some text",
            "some more text",
            "even more text"
        ],
        [
            "lots and lots and lots and lots and lots of text",
            "some more text",
            "some more text"
        ]
    ]
    print(
        pyprel.Table(
            contents = table_contents
        )
    )
    table_contents = [
        [
            "heading 1",
            "heading 2",
            "heading 3",
            "heading 4"
        ],
        [
            "some text",
            "some more text",
            "even more text",
            "yeah more text"
        ],
        [
            "lots and lots and lots and lots and lots of text",
            "some more text",
            "some more text",
            "some more text"
        ]
    ]
    print(
        pyprel.Table(
            contents = table_contents
        )
    )
Beispiel #23
0
def line():
    pyprel.print_line(character="─")
Beispiel #24
0
def main():
    pyprel.print_line()
    print("current time UTC:\n")
    print(
        shijian.style_datetime_object(
            datetime_object=datetime.datetime.utcnow(),
            style="HH hours MM minutes SS sounds day DD month YYYY"))
    pyprel.print_line()
    year_ICHEP_2016 = 2016
    month_ICHEP_2016 = 8
    day_ICHEP_2016 = 3
    datetime_object_ICHEP_2016_time = datetime.datetime(
        year_ICHEP_2016, month_ICHEP_2016, day_ICHEP_2016)
    datetime_object_current_time_UTC = datetime.datetime.utcnow()
    datetime_object_current_time_UTC_to_ICHEP_2016_time =\
        datetime_object_ICHEP_2016_time - datetime_object_current_time_UTC
    print("time to ICHEP 2016 (DD:HH:MM:SS):\n")
    print(
        shijian.style_datetime_object(
            datetime_object=datetime_object_current_time_UTC_to_ICHEP_2016_time,
            style="{DD}:{HH}:{MM}:{SS}"))
    pyprel.print_line()
    current_time_UTC = shijian.style_datetime_object(
        datetime_object=datetime_object_current_time_UTC, style="DD:HH:MM:SS")
    print("current time UTC:")
    print(pyprel.render_segment_display(text=current_time_UTC))
    print(" D  D     H  H     M  M     S  S")
    pyprel.print_line()
    current_time_to_ICHEP_2016_time = shijian.style_datetime_object(
        datetime_object=datetime_object_current_time_UTC_to_ICHEP_2016_time,
        style="{DD}:{HH}:{MM}:{SS}")
    print("time to ICHEP 2016:")
    print(pyprel.render_segment_display(text=current_time_to_ICHEP_2016_time))
    print(" D  D     H  H     M  M     S  S")
    pyprel.print_line()
    timestamp = 1487600377.0
    print("convert UNIX timestamp {timestamp} to YYYY-MM-DDTHHMM".format(
        timestamp=timestamp))
    print(
        shijian.style_UNIX_timestamp(timestamp=timestamp,
                                     style="YYYY-MM-DDTHHMMZ"))
    pyprel.print_line()
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    log.info("")

    # access options and arguments
    input_data_filename = options["--data"]

    # define dataset

    # Load the SUSY dataset (https://archive.ics.uci.edu/ml/datasets/SUSY).
    # The first column is the class label (1 for signal, 0 for background),
    # followed by 18 features (8 low-level features and 10 high-level features):
    #
    # - lepton 1 pT
    # - lepton 1 eta
    # - lepton 1 phi
    # - lepton 2 pT
    # - lepton 2 eta
    # - lepton 2 phi
    # - missing energy magnitude
    # - missing energy phi
    # - MET_rel
    # - axial MET
    # - M_R
    # - M_TR_2
    # - R
    # - MT2
    # - S_R
    # - M_Delta_R
    # - dPhi_r_b
    # - cos(theta_r1)

    data = abstraction.access_SUSY_dataset_format_file(input_data_filename)

    dataset = abstraction.Dataset(data=data)

    # define data

    log.info("split data for cross-validation")
    features_train, features_test, targets_train, targets_test =\
        cross_validation.train_test_split(
            dataset.features(),
            dataset.targets(),
            train_size = 0.7
        )

    # grid search

    import itertools

    epochs = [10, 100, 500, 1000]
    architecture = [200, 300, 300, 300, 200]

    grid_search_map = {}
    grid_search_map["epoch"] = []
    grid_search_map["hidden_nodes"] = []
    grid_search_map["score_training"] = []
    grid_search_map["score_test"] = []

    # define progress
    count_total = 0
    for epoch in epochs:
        for nodes_count in xrange(1, len(architecture) + 1):
            combinations = itertools.product(architecture, repeat=nodes_count)
            for combination in combinations:
                count_total += 1
    count = 0
    progress = shijian.Progress()
    progress.engage_quick_calculation_mode()

    for epoch in epochs:
        for nodes_count in xrange(1, len(architecture) + 1):
            combinations = itertools.product(architecture, repeat=nodes_count)
            for combination in combinations:
                hidden_nodes = list(combination)

                # define model

                log.info("define classification model")
                classifier = abstraction.Classification(
                    number_of_classes=2,
                    hidden_nodes=hidden_nodes,
                    epochs=epoch)

                # train model

                log.info("fit model to dataset features and targets")
                classifier._model.fit(features_train, targets_train)
                #classifier.save()

                # predict and cross-validate training

                log.info("test trained model on training dataset")
                score_training = metrics.accuracy_score(
                    classifier._model.predict(features_train), targets_train)
                score_test = metrics.accuracy_score(
                    classifier._model.predict(features_test), targets_test)
                log.info("\ntraining-testing instance complete:")
                log.info("epoch:          {epoch}".format(epoch=epoch))
                log.info("architecture:   {architecture}".format(
                    architecture=hidden_nodes))
                log.info("score training: {score_training}".format(
                    score_training=100 * score_training))
                log.info("score test:     {score_test}".format(score_test=100 *
                                                               score_test))
                pyprel.print_line()
                grid_search_map["epoch"].append(epoch)
                grid_search_map["hidden_nodes"].append(hidden_nodes)
                grid_search_map["score_training"].append(score_training)
                grid_search_map["score_test"].append(score_test)

                # save current grid search map
                shijian.export_object(grid_search_map,
                                      filename="grid_search_map.pkl",
                                      overwrite=True)

                count += 1
                print(progress.add_datum(fraction=(count + 1) / count_total))

    number_of_entries = len(grid_search_map["epoch"])

    # table

    table_contents = [[
        "epoch", "architecture", "score training", "score testing"
    ]]
    for index in range(0, number_of_entries):
        table_contents.append([
            str(grid_search_map["epoch"][index]),
            str(grid_search_map["hidden_nodes"][index]),
            str(grid_search_map["score_training"][index]),
            str(grid_search_map["score_test"][index])
        ])
    print("\ngrid search map:\n")
    print(pyprel.Table(contents=table_contents, ))

    # plot

    architectures = shijian.unique_list_elements(
        grid_search_map["hidden_nodes"])

    architecture_epoch_score = {}
    for architecture in architectures:
        architecture_epoch_score[str(architecture)] = []
        for index in range(0, number_of_entries):
            if grid_search_map["hidden_nodes"][index] == architecture:
                architecture_epoch_score[str(architecture)].append([
                    grid_search_map["epoch"][index],
                    grid_search_map["score_test"][index]
                ])

    figure = matplotlib.pyplot.figure()
    figure.set_size_inches(10, 10)
    axes = figure.add_subplot(1, 1, 1)
    axes.set_xscale("log")
    figure.suptitle("hyperparameter map", fontsize=20)
    matplotlib.pyplot.xlabel("epochs")
    matplotlib.pyplot.ylabel("training test score")

    for key, value in architecture_epoch_score.iteritems():
        epochs = [element[0] for element in value]
        score_test = [element[1] for element in value]
        matplotlib.pyplot.plot(epochs, score_test, label=key)

    matplotlib.pyplot.legend(loc="center right")

    matplotlib.pyplot.savefig("hyperparameter_map.eps",
                              bbox_inches="tight",
                              format="eps")

    # find best-scoring models

    # Find the 3 best scores.
    best_models = sorted(zip(grid_search_map["score_test"],
                             grid_search_map["hidden_nodes"]),
                         reverse=True)[:3]

    # table
    table_contents = [["architecture", "score testing"]]
    for model in best_models:
        table_contents.append([str(model[1]), str(model[0])])
    print("\nbest-scoring models:\n")
    print(pyprel.Table(contents=table_contents, ))

    log.info("")

    program.terminate()
Beispiel #26
0
def main():

    pyprel.print_line()

    print("\nconvert Markdown table to pyprel table\n")

    table_Markdown = """
|**variable 1**|**variable 2**|
|--------------|--------------|
|1             |0.23545       |
|2             |0.63523       |
|3             |0.55231       |
|4             |0.89563       |
|5             |0.55345       |
"""

    table_contents = pyprel.table_Markdown_to_table_pyprel(
        table = table_Markdown
    )

    print(
        pyprel.Table(
            contents = table_contents,
        )
    )

    pyprel.print_line()

    print("\ncompose and print table\n")
    
    table_contents = [
        [
            "number",
            "letter"
        ],
        [
            1,
            "a"
        ],
        [
            2,
            "b"
        ]
    ]
    print(
        pyprel.Table(
            contents = table_contents
        )
    )

    pyprel.print_line()

    print("\ncompose and print a table using list comprehensions and zip\n")

    data_x = numpy.linspace(0, numpy.pi, 10)
    data_y = [numpy.sin(x) for x in data_x]

    print(pyprel.Table(
        contents = [["x", "y"]] + [[x, y] for x, y in zip(data_x, data_y)]
    ))

    pyprel.print_line()

    print("\ncompose aligned printouts of data using tables\n")

    table_contents = [
        ["msg:"      , "1536155474294"],
        ["signature:", "0C118313F6D19"],
        ["data:"     , "1536155474294"]
    ]
    print(pyprel.Table(
        contents              = table_contents,
        column_delimiter      = "",
        row_delimiter         = "",
        table_width_requested = 40
    ))

    table_contents = [
        ["msg:"      , "15361554742941536155474294153615547429415361554742941536155474294"],
        ["signature:", "0C118313F6D190C118313F6D190C118313F6D190C118313F6D190C118313F6D19"],
        ["data:"     , "15361554742941536155474294153615547429415361554742941536155474294"]
    ]
    print(pyprel.Table(
        contents              = table_contents,
        column_delimiter      = "",
        row_delimiter         = "",
        table_width_requested = 40
    ))

    pyprel.print_line()