Example #1
0
def train():
    print(f"\n***** Initializing *****\n")
    x_train, x_test, y_train, y_test = load()
    y_test_original = y_test.copy()

    print(f"\n***** One-Hot Encoding Dataset Labels *****\n")
    encoder = LabelEncoder()
    encoder.fit(y_train)
    y_train = to_categorical(encoder.transform(y_train))
    y_test = to_categorical(encoder.transform(y_test))

    model = create_model()
    history = model.fit(x_train,
                        y_train,
                        validation_split=0.2,
                        epochs=2,
                        batch_size=5)
    model.save("gas_predictor")
    y_pred_original = encoder.transform(
        [yp.argmax() for yp in model.predict(x_test)])
    y_pred = to_categorical(y_pred_original)

    results = classification_report(y_test,
                                    y_pred,
                                    target_names=list(MOLECULES.keys()))
    print(f"\nClassification Report:\n{results}\n")
    plot_results(history)
 def RunBagFiles(self):
     """Either process bag file via rosbag API"""
     if self.bag_file_path:  # use rosbag API
         rospy.loginfo(
             "Processing file using rosbag API: {}. Please wait..".format(
                 self.bag_file_path))
         if self.bag_secs_to_skip > 0:
             rospy.loginfo(
                 "Skipping {} seconds from the start of the bag file.".
                 format(self.bag_secs_to_skip))
         bag = rosbag.Bag(self.bag_file_path)
         total_time_secs = int(bag.get_end_time() - bag.get_start_time())
         init_t = None
         last_info_time_secs = int(bag.get_start_time())
         for topic, msg, t in bag.read_messages(topics=[
                 self.gt_topic, self.dr_topic, self.ekf_topic,
                 self.depth_topic, self.imu_topic
         ]):
             if not init_t:
                 init_t = t
                 continue
             if (t - init_t).to_sec() < self.bag_secs_to_skip:
                 continue
             if rospy.is_shutdown():
                 break
             elapsed_time_secs = int(t.to_sec() - bag.get_start_time())
             if elapsed_time_secs % 100 == 0 and elapsed_time_secs != last_info_time_secs:
                 last_info_time_secs = elapsed_time_secs
                 rospy.loginfo("Elapsed time: {}/{} [s]".format(
                     elapsed_time_secs, total_time_secs))
             if topic == self.gt_topic:
                 self.GtCallback(msg)
             elif topic == self.dr_topic:
                 self.DrCallback(msg)
             elif topic == self.ekf_topic:
                 self.EkfCallback(msg)
             elif topic == self.depth_topic:
                 self.DepthCallback(msg)
             elif topic == self.imu_topic:
                 self.ImuCallback(msg)
         bag.close()
         rospy.loginfo("Bag processed.")
         if self.plot_results:
             rospy.loginfo("Preparing plots. Please wait..")
             if not os.path.exists(self.save_dir):
                 os.makedirs(self.save_dir)
             plots.plot_results(self.results, self.save_dir, self.use_gt,
                                self.use_dr, self.use_ekf, self.use_depth,
                                self.use_imu)
         if self.compute_error:
             rospy.loginfo("Computing error. Please wait..")
             errorAnalysis.compute_results_error(self.error_results,
                                                 self.use_gt, self.use_dr,
                                                 self.use_ekf,
                                                 self.use_depth,
                                                 self.use_imu)
         rospy.spin()
 def run(self):
     """Either process bag file via rosbag API or subscribe to topics"""
     if self.bag_file_path:  # use rosbag API
         rospy.loginfo(
             "Processing file using rosbag API: {}. Please wait..".format(
                 self.bag_file_path))
         if self.bag_secs_to_skip > 0:
             rospy.loginfo(
                 "Skipping {} seconds from the start of the bag file.".
                 format(self.bag_secs_to_skip))
         bag = rosbag.Bag(self.bag_file_path)
         total_time_secs = int(bag.get_end_time() - bag.get_start_time())
         init_t = None
         last_info_time_secs = int(bag.get_start_time())
         for topic, msg, t in bag.read_messages(
                 topics=[self.pose_topic, self.gps_topic, self.imu_topic]):
             if not init_t:
                 init_t = t
                 continue
             if (t - init_t).to_sec() < self.bag_secs_to_skip:
                 continue
             if rospy.is_shutdown():
                 break
             elapsed_time_secs = int(t.to_sec() - bag.get_start_time())
             if elapsed_time_secs % 100 == 0 and elapsed_time_secs != last_info_time_secs:
                 last_info_time_secs = elapsed_time_secs
                 rospy.loginfo("Elapsed time: {}/{} [s]".format(
                     elapsed_time_secs, total_time_secs))
             if topic == self.gps_topic:
                 self.__gps_callback(msg)
             elif topic == self.pose_topic:
                 self.__pose_callback(msg)
             elif topic == self.imu_topic:
                 self.__imu_callback(msg)
         bag.close()
         rospy.loginfo("Bag processed.")
         if self.plot_results:
             rospy.loginfo("Preparing plots. Please wait..")
             if not os.path.exists(self.save_dir):
                 os.makedirs(self.save_dir)
             plots.plot_results(self.__results, self.__fusion_name,
                                self.save_dir, self.use_gps, self.use_pose)
     else:  # subscribe to topics
         self.gps_sub = rospy.Subscriber(self.gps_topic,
                                         NavSatFix,
                                         self.__gps_callback,
                                         queue_size=100)
         self.pose_sub = rospy.Subscriber(self.pose_topic,
                                          PoseWithCovarianceStamped,
                                          self.__pose_callback,
                                          queue_size=100)
         self.imu_sub = rospy.Subscriber(self.imu_topic,
                                         Imu,
                                         self.__imu_callback,
                                         queue_size=1000)
         rospy.spin()
def dataflow(X, y=None, cmd_plot=False):
    '''
	Primary function responsible for predictions and GUI output from a pre-processed file.
	Returns signals used for plotting of features as well as generated summary statistics.
	'''
    epochs = epochs_from_prep(X.copy(),
                              None,
                              settings.EPOCH_LENGTH,
                              settings.OVERLAP_FACTOR,
                              settings.SAMPLE_RATE,
                              filter=False,
                              removal=True)
    epochs = dataset(epochs, shuffle=False, exclude_ptt=False,
                     only_rwa=True).epochs
    epochs = gru(load_graph=True, path=settings.BEST_MODEL).predict(epochs)
    epochs.sort(key=lambda x: x.index_start, reverse=False)
    yhat, timecol = reconstruct(X, epochs)
    full = epochs_from_prep(X,
                            None,
                            settings.EPOCH_LENGTH,
                            settings.OVERLAP_FACTOR,
                            settings.SAMPLE_RATE,
                            filter=False,
                            removal=False)
    full.sort(key=lambda x: x.index_start, reverse=False)
    wake, nrem, rem, illegal = timeseries(full)
    summary = summary_statistics(timecol, yhat, wake, nrem, rem, illegal)
    X, y, mask = make_features(X, y, settings.SAMPLE_RATE, removal=False)
    X = transpose(X)
    ss = X[6].copy().astype(float)
    for i, _ in enumerate(ss):
        if X[7, i]:
            ss[i] = 2.0
        elif X[5, i]:
            ss[i] = 0.0
    data = X[0] / settings.SAMPLE_RATE, [X[1], X[2], X[3], X[4], ss, yhat], [
        'RR', 'RWA', 'PTT', 'PWA', 'Sleep stage', 'Arousals'
    ], region(X[5]), region(X[7]), None, None, int(X[0, -1] /
                                                   settings.SAMPLE_RATE)
    if cmd_plot:
        d = list(data)
        if y is not None:
            d[1] += [y]
            d[2] += ['y']
            d[2][5] = 'yhat'
        plot_results(*d)
    return data, summary
def preprocess(subject, arousals=True):
    '''
	Preprocessing of a single subject. each feature and annotation is
	handled by submodules. This method creates X matrix and y vector.
	'''
    # Get data signals from container
    sig_ECG = subject.ECG_signal
    sig_PPG = subject.PPG_signal
    anno_SleepStage = subject.SleepStage_anno

    # Gets R-peak indexes and amplitudes
    index_sec, amp = QRS(subject)
    index = (array(index_sec) * settings.SAMPLE_RATE).astype(int)

    # Preprocess Features
    x_RR, x_RWA = RR(index_sec), array(amp).astype(float)
    x_PTT, x_PWA = PPG(sig_PPG, index_sec)
    x_SS = SleepStageBin(anno_SleepStage, subject.frequency, index)

    # Collect Matrix
    features = [index, x_RR, x_RWA, x_PTT, x_PWA, x_SS]
    X = empty((len(features), len(x_RR) - 1))
    for i, feat in enumerate(features):
        X[i] = feat[1:len(feat)]
    X = transpose(X)

    # Include Arousals
    if arousals:
        anno_Arousal = subject.Arousal_anno
        y_AA = ArousalBin(anno_Arousal, subject.frequency, index)
        y = array(y_AA[1:len(y_AA)])
        return X, y

    # plots
    Xt = transpose(X)
    plot_results(Xt[0], [x for x in Xt[1:]], ['rr+', 'rwa', 'ptt', 'pwa'],
                 None, None, None, None, None, len(Xt[0]))

    return X
Example #6
0
def find_best_settings_direct(G, filter_dim, epsilon, trisection_lim,
                              train_iterations, predictor):
    if filter_dim < 1:
        raise Exception("The filter dimensions cannot be less than 1!")

    starting_filter_parameters = [[0, 1] for x in range(int(filter_dim))]
    rectangles = [
        direct.Rectangle(G, starting_filter_parameters, -1, 0,
                         train_iterations, np.nan, predictor)
    ]
    counter, direct_iteration = 0, 1

    start = time.time()
    while True:
        indexes = direct.find_optimal_rectangles(rectangles, epsilon)
        rectangles, counter, predictor = direct.trisect(
            indexes, rectangles, counter, direct_iteration, trisection_lim,
            train_iterations, predictor)
        direct_iteration += 1
        if counter >= trisection_lim: break
        fig = direct.draw_rectangles(rectangles)
    end = time.time()
    logger.log("The time it took for the DIRECT algorithm in total was",
               end - start, "secs or", (end - start) / 60, "hours.")

    fig = direct.draw_rectangles(rectangles)

    logger.log(counter, "trisections have been performed.")

    #logger.log("The lowest loss value was", loss_min, "and was found at the following filter:", best_rectangle.centre, ". At this filter, the sihlouette value was", best_rectangle.sihl)

    rec_results, rec_configs, train_results, train_configs = list_results(
        rectangles)

    fig2 = plots.plot_results(train_results, train_configs, rectangles,
                              predictor)
    plt.show

    sorted_results = [
        direct.sort_rectangles(rectangles, "loss"),
        direct.sort_rectangles(rectangles, "sihlouette")
    ]

    if predictor.test_predictor_acc:
        results_to_csv.cvalidation_data_to_csv(predictor.cvalidation_data)
    results_to_csv.export_results_to_csv(rec_results, rec_configs,
                                         train_results, train_configs,
                                         predictor, True, rectangles,
                                         sorted_results)

    return rectangles, sorted_results, counter, predictor
Example #7
0
def main():
    global encoding

    args = parse_args()

    # determine whether to use the aligned or unaligned data
    assert args.aligned in [0, 1], "Too many instances of --aligned switch, should be 0 or 1"
    aligned = bool(args.aligned)

    # and decide between feature encodings and character embeddings
    assert args.ortho in [0, 1], "Too many instances of --ortho switch, should be 0 or 1"
    ortho = bool(args.ortho)

    # load data
    data_file = Path(args.data)
    assert data_file.exists() and data_file.is_file(), "Data file {} does not exist".format(data_file)
    # determine model
    assert args.model in MODELS, "Model should be one of {}".format(MODELS)
    # determine path to alphabet file & encoding
    alphabet_file = None
    if args.model == "ipa":
        encoding = 'utf-16'
        alphabet_file = Path("../data/alphabets/ipa.csv")
    elif args.model == "asjp":
        encoding = 'ascii'
        alphabet_file = Path("../data/alphabets/asjp.csv")
    elif args.model == 'latin':
        encoding = 'utf-16'
        alphabet_file = Path("../data/alphabets/latin.csv")
    # load data from file
    assert alphabet_file.exists() and alphabet_file.is_file(), "Alphabet file {} does not exist".format(alphabet_file)
    alphabet = Alphabet(alphabet_file, encoding=encoding, ortho=ortho)

    # number of epochs
    assert isinstance(args.epochs, int), "Epochs not int, but {}".format(type(args.epochs))
    assert args.epochs > 0, "Epochs out of range: {}".format(args.epochs)
    epochs = args.epochs

    # number of hidden layers
    # assert args.n_hidden > 0, "Number of hidden layers should be at least 1 ;)"
    # n_hidden = args.n_hidden

    # determine output directories, create them if they do not exist
    out_tag = "_{}".format(args.out_tag)
    # and tag for files with train/test indices
    indices_tag = args.out_tag
    plots_dir = Path("../out/plots{}_many2one".format(out_tag))
    if not plots_dir.exists():
        plots_dir.mkdir(parents=True)
    results_dir = Path("../out/results{}_many2one".format(out_tag))
    if not results_dir.exists():
        results_dir.mkdir(parents=True)
    # create file for results
    result_file_path = results_dir / "m2one_{}{}{}.txt".format(args.model,
                                                               "_aligned" if aligned else "",
                                                               "_ortho" if ortho else "")
    result_file_path.touch()
    result_file = result_file_path.open('w', encoding=encoding)

    # determine ancestor
    ancestor = args.ancestor

    # create cognate sets
    cognate_sets = []
    data = data_file.open(encoding='utf-16').read().split("\n")
    cols = data[HEADER_ROW].split(COLUMN_SEPARATOR)
    langs = cols[2:]

    # import tensorflow here to comply with the wiki entry https://wiki.lsv.uni-saarland.de/doku.php?id=cluster
    import tensorflow as tf
    # set random seed for weights
    tf.random.set_seed(seed=42)

    # start data extraction
    for li, line in enumerate(data[HEADER_ROW:]):
        # have to do that because the file with the latin characters doesn't contain aligned cognate sets
        if args.model == 'latin':
            if line == "":
                continue
        # but the other two do
        elif aligned:
            if line == "" or li % 2 == 0:
                continue
        # the unaligned case
        else:
            if line == "" or li % 2 != 0:
                continue
        row_split = line.split(COLUMN_SEPARATOR)
        id = row_split[ID_COLUMN]
        concept = row_split[CONCEPT_COLUMN]
        words = row_split[CONCEPT_COLUMN + 1:]
        cognate_dict = {}
        assert len(langs) == len(words), "Langs / Words mismatch, expected {}, got {}".format(len(langs), len(words))
        for lang, word in zip(langs, words):
            cognate_dict[lang] = alphabet.translate(word)
        cognate_set = CognateSet(id=id,
                                 concept=concept,
                                 ancestor=ancestor,
                                 cognate_dict=cognate_dict,
                                 alphabet=alphabet)
        cognate_sets.append(cognate_set)


    # prepare train_test_split
    total_data = {str(i + 1): cognate_set for i, cognate_set in enumerate(cognate_sets)}
    train_indices = set(total_data.keys())
    runs = cross_validation_runs(5, train_indices)
    # test_indices = Path("../data/{}_test_indices.txt".format(indices_tag)).open('r').read().split("\n")
    # train_data = {i: cognate_set for i, cognate_set in data.items() if i in train_indices}
    # test_data = {i: cognate_set for i, cognate_set in data.items() if i in test_indices}

    # define model
    model, optimizer, loss_object = create_many_to_one_model(lstm_dim=128,
                                                             timesteps=len(langs) - 1,
                                                             data_dim=alphabet.feature_dim,
                                                             fc_dim=100,
                                                             output_dim=alphabet.feature_dim)
    model.summary()

    # save model weights for reset
    initital_weights = model.get_weights()

    words_true = []
    words_pred = []
    wts = []
    wps = []
    epoch_losses = []
    batch_losses = []

    # Training with cross-validation
    for i, run in enumerate(runs):
        print("***** Cross-validation run [{}/{}] *****".format(i + 1, len(runs)))
        # reload initial model weights
        model.set_weights(initital_weights)
        # get train & test folds
        train_data = {i: cognate_set for i, cognate_set in total_data.items() if i in run['train']}
        test_data = {i: cognate_set for i, cognate_set in total_data.items() if i in run['test']}
        print("***** Start training *****")
        for epoch in range(1, epochs + 1):
            words_true.clear()
            words_pred.clear()
            batch_losses.clear()
            for batch, cognate_set in train_data.items():
                output_characters = []
                for lang_array in cognate_set:
                    target = tf.keras.backend.expand_dims(lang_array.pop(ancestor).to_numpy(), axis=0)
                    target = tf.dtypes.cast(target, tf.float32)
                    data = []
                    for lang, vec in lang_array.items():
                        data.append(list(vec))
                    data = np.array(data)
                    data = tf.keras.backend.expand_dims(data, axis=0)
                    data = tf.dtypes.cast(data, tf.float32)
                    # data = tf.reshape(data, (1, -1))
                    with tf.GradientTape() as tape:
                        output = model(data)
                        loss = loss_object(target, output)
                        batch_losses.append(float(loss))
                        gradients = tape.gradient(loss, model.trainable_weights)
                        optimizer.apply_gradients(zip(gradients, model.trainable_weights))
                        output_characters.append(alphabet.get_char_by_vector(output))
                words_pred.append("".join(output_characters))
                words_true.append(str(cognate_set.ancestor_word))
                # print("".join(output_characters), str(cognate_set.ancestor_word))
                if int(batch) % 100 == 0:
                    print("Epoch [{}/{}], Batch [{}/{}]".format(epoch, epochs, batch, len(cognate_sets)))
            # calculate mean epoch loss
            mean_loss = np.mean(batch_losses)
            epoch_losses.append(mean_loss)
            print("Epoch[{}]/[{}], mean batch loss = {}".format(epoch, epochs, mean_loss))
            # calculate levenshtein distance
            ld = LevenshteinDistance(true=words_true, pred=words_pred)
            ld.print_distances()
            ld.print_percentiles()

        words_pred.clear()
        words_true.clear()
        print("***** Training finished *****")
        print()

        # Testing
        # Do the same thing as above with the test data, but don't collect the gradients
        # and don't backpropagate
        print("***** Start testing *****")
        for i, cognate_set in test_data.items():
            output_characters = []
            for lang_array in cognate_set:
                target = tf.keras.backend.expand_dims(lang_array.pop(ancestor).to_numpy(), axis=0)
                target = tf.dtypes.cast(target, tf.float32)
                data = []
                for lang, vec in lang_array.items():
                    data.append(list(vec))
                data = np.array(data)
                data = tf.keras.backend.expand_dims(data, axis=0)
                data = tf.dtypes.cast(data, tf.float32)
                output = model(data)
                # loss = loss_object(target, output)
                output_characters.append(alphabet.get_char_by_vector(output))
            # compile the reconstructed word
            words_pred.append("".join(output_characters))
            # save the true word for the distance calculation
            words_true.append(str(cognate_set.ancestor_word))
        wts.extend(words_true)
        wps.extend(words_pred)

        # create plots
        ld = LevenshteinDistance(words_true, words_pred)
        ld.print_distances()
        ld.print_percentiles()
        print("***** Testing finished *****")

    # save results after last run
    outfile = plots_dir / "many2one_test_{}{}{}.jpg".format(args.model, "_aligned" if aligned else "",
                                                        "_ortho" if ortho else "")
    title = "Model [Test]: LSTM {}{}{}\n 5 cross-validation folds" \
        .format(", " + args.model, ", aligned" if aligned else "", ", orthographic" if ortho else "")
    ld = LevenshteinDistance(wts, wps)
    plot_results(title=title,
                 distances={"=<" + str(d): count / 5 for d, count in ld.distances.items()},
                 percentiles={"=<" + str(d): perc for d, perc in ld.percentiles.items()},
                 mean_dist=ld.mean_distance,
                 mean_dist_norm=ld.mean_distance_normalized,
                 losses=[],
                 outfile=Path(outfile),
                 testing=True)
Example #8
0
def train():

    # Command line call I used:
    # python ciobanu_rnn.py --data=ipa --model=ipa --epochs=10 --out_tag=test --model=ipa --ancestor=ancestor

    global encoding
    args = parser_args()
    # determine whether the model should use feature encodings or character embeddings
    assert args.ortho in [
        0, 1
    ], "Too many instances of --orthographic switch, should be 0 or 1"
    ortho = bool(args.ortho)
    # determine whether to use the aligned or unaligned data
    assert args.aligned in [
        0, 1
    ], "Too many instances of --aligned switch, should be 0 or 1"
    aligned = bool(args.aligned)
    # load data
    data_file = None
    if args.data == "ipa":
        encoding = 'utf-16'
        data_file = Path("../data/romance_ciobanu_ipa.csv")
    elif args.data == "asjp":
        encoding = 'ascii'
        data_file = Path("../data/romance_ciobanu_asjp.csv")
    assert data_file.exists() and data_file.is_file(
    ), "Data file {} does not exist".format(data_file)
    # determine model
    assert args.model in MODELS, "Model should be one of {}".format(MODELS)
    # determine path to alphabet file & encoding
    alphabet_file = None
    if args.model == "ipa":
        encoding = 'utf-16'
        alphabet_file = Path("../data/alphabets/ipa.csv")
    elif args.model == "asjp":
        encoding = 'ascii'
        alphabet_file = Path("../data/alphabets/asjp.csv")
    # load data from file
    assert alphabet_file.exists() and alphabet_file.is_file(
    ), "Alphabet file {} does not exist".format(alphabet_file)
    alphabet = Alphabet(alphabet_file, encoding=encoding, ortho=ortho)
    assert isinstance(args.epochs,
                      int), "Epochs not int, but {}".format(type(args.epochs))
    assert args.epochs > 0, "Epochs out of range: {}".format(args.epochs)
    epochs = args.epochs

    # ancestor
    ancestor = args.ancestor

    # determine output directories, create them if they do not exist
    out_tag = "_{}".format(args.out_tag)
    plots_dir = Path("../out/plots{}_deep".format(out_tag))
    if not plots_dir.exists():
        plots_dir.mkdir(parents=True)
    results_dir = Path("../out/results{}_deep".format(out_tag))
    if not results_dir.exists():
        results_dir.mkdir(parents=True)
    # create file for results
    result_file_path = results_dir / "deep_{}{}{}.txt".format(
        args.model, "_aligned" if aligned else "", "_ortho" if ortho else "")
    result_file_path.touch()
    result_file = result_file_path.open('w', encoding=encoding)

    print("alphabet:")
    print(alphabet)

    # initialize model
    model, optimizer, loss_object = create_model(
        input_dim=alphabet.get_feature_dim(),
        embedding_dim=28,
        context_dim=128,
        output_dim=alphabet.get_feature_dim())

    model.summary()

    print("data_file: {}".format(data_file.absolute()))
    print("model: {}, orthographic={}, aligned={}".format(
        args.model, ortho, aligned))
    print("alphabet: {}, read from {}".format(args.model,
                                              alphabet_file.absolute()))
    print("epochs: {}".format(epochs))

    # create cognate sets

    cognate_sets = []

    data = data_file.open(encoding='utf-16').read().split("\n")
    cols = data[HEADER_ROW].split(COLUMN_SEPARATOR)
    langs = cols[2:]
    print("langs")
    print(langs)

    for li, line in enumerate(data[HEADER_ROW:]):
        if aligned:
            if line == "" or li % 2 != 0:
                continue
        else:
            if line == "" or li % 2 == 0:
                continue
        row_split = line.split(COLUMN_SEPARATOR)
        id = row_split[ID_COLUMN]
        concept = row_split[CONCEPT_COLUMN]
        words = row_split[CONCEPT_COLUMN + 1:]
        # print("words")
        # print(words)
        cognate_dict = {}
        assert len(langs) == len(
            words), "Langs / Words mismatch, expected {}, got {}".format(
                len(langs), len(words))
        for lang, word in zip(langs, words):
            # print("lang, word")
            # print(lang, word)
            cognate_dict[lang] = alphabet.translate(word)
        cs = CognateSet(id=id,
                        concept=concept,
                        ancestor=ancestor,
                        cognate_dict=cognate_dict,
                        alphabet=alphabet)
        cognate_sets.append(cs)

    # maybe we needn't do the evaluation, since we mainly want to know how
    # the model behaves with the different inputs

    split_index = int(valid_size * len(cognate_sets))
    train_data = cognate_sets[:split_index]
    valid_data = cognate_sets[split_index:]
    print("train size: {}".format(len(train_data)))
    print("valid size: {}".format(len(valid_data)))
    # cognate_sets = cognate_sets[10:30]
    # print("cognate_sets in ral")
    # print(cognate_sets)

    words_true = []
    words_pred = []
    epoch_losses = []
    batch_losses = []

    for epoch in range(epochs):
        # reset lists
        epoch_losses.clear()
        words_true.clear()
        words_pred.clear()
        # iterate over the cognate sets
        for i, cs in enumerate(cognate_sets):
            # reset batch loss
            batch_losses.clear()
            # iterate over the character embeddings
            for j, char_embeddings in enumerate(cs):
                # add a dimension to the latin character embedding (ancestor embedding)
                # we add a dimension because we use a batch size of 1 and TensorFlow does not
                # automatically insert the batch size dimension
                target = tf.keras.backend.expand_dims(char_embeddings.pop(
                    cs.ancestor).to_numpy(),
                                                      axis=0)
                # convert the latin character embedding to float32 to match the dtype of the output (line 137)
                target = tf.dtypes.cast(target, tf.float32)
                # iterate through the embeddings
                # initialize the GradientTape
                with tf.GradientTape(persistent=True) as tape:
                    for lang, embedding in char_embeddings.items():
                        # add a dimension to the the embeddings
                        data = tf.keras.backend.expand_dims(
                            embedding.to_numpy(), axis=0)
                        output = model(data)
                        # calculate the loss
                        loss = loss_object(target, output)
                        epoch_losses.append(float(loss))
                        batch_losses.append(float(loss))
                        # calculate the gradients
                        gradients = tape.gradient(loss,
                                                  model.trainable_weights)
                        # backpropagate
                        optimizer.apply_gradients(
                            zip(gradients, model.trainable_weights))
                        # convert the character vector into a character
                    output_char = alphabet.get_char_by_feature_vector(output)
                    # append the converted vectors to a list so we can see the reconstructed word
                    output_characters.append(output_char)
            # append the reconstructed word and the ancestor to the true/pred lists
            words_pred.append("".join(output_characters))
            words_true.append(str(cs.ancestor))
            # clear the list of output characters so we can create another word
            output_characters.clear()
            print("Batch {}, mean loss={}".format(i, np.mean(batch_losses)))
        # calculate distances
        ld = LevenshteinDistance(true=words_true, pred=words_pred)
        print("Epoch {} finished".format(epoch + 1))
        print("Mean loss={}".format(epoch, np.mean(epoch_losses)))
        ld.print_distances()
        ld.print_percentiles()
        if epoch == epochs:
            outfile = "../out/plots_swadesh_deep/deep_{}{}{}.jpg".format(
                args.model, "_aligned" if aligned else "",
                "_ortho" if ortho else "")
            title = "Model: deep net{}{}{}".format(
                ", " + args.model, ", aligned" if aligned else "",
                ", orthographic" if ortho else "")
            plot_results(title=title,
                         distances={
                             "=<" + str(d): count
                             for d, count in ld.distances.items()
                         },
                         percentiles={
                             "=<" + str(d): perc
                             for d, perc in ld.percentiles.items()
                         },
                         mean_dist=ld.mean_distance,
                         mean_dist_norm=ld.mean_distance_normalized,
                         losses=epoch_losses,
                         outfile=Path(outfile))
            # save reconstructed words (but only if the edit distance is at least one)
            import nltk
            for t, p in zip(words_true, words_pred):
                distance = nltk.edit_distance(t, p)
                if distance > 0:
                    line = "{},{},distance={}\n".format(
                        t, p, nltk.edit_distance(t, p))
                    result_file.write(line)
            result_file.close()
        H1 = []
        H2 = []
        for i in range(len(H)):
            H1.append(np.mean(H[i]))
        for i in range(len(H1) - 1000):
            H2.append(np.mean(H1[i:i + 1000]))
        loss_mean.append(np.asarray(H2))

    np.save('mean_vector', vector_mean)
    np.save('vector_mean1', vector_mean1)
    np.save('negative_r1', negative_r1)
    np.save('negative_r2', negative_r2)
    np.save('pos_r', pos_r)
    np.save('pos_r2', pos_r2)
    np.save('null_r', null_r)
    np.save('loss_mean', loss_mean)
    np.save('LOSSES', LOSSES)
    np.save('success_episodes', success_episodes)
    plot_results(vector_mean, vector_mean1, train_episode, negative_r1,
                 negative_r2, pos_r, pos_r2, null_r, loss_mean, labels[0],
                 LOSSES, success_episodes)
    #plot_training(VARIATIONS)
    #plot_memory_replay(SMR,GSMR,TMR,GTMR,Mappa,labels[v])

    #plot_qvalues(VAR,MIN,MAX,MEAN,labels[v])

    #np.save('example',example1)
    #♦np.save('test_nn',example)
    #np.save('training',example1)
Example #10
0
        "service_rate": 2,
        "queue_length": 50,
        "num_delays_required": n_delays,
    },  # Arrival Rate = 0.5 * Service Rate && Limited Queue maxsize = 50
]
current_config = configs[selected_config]

# Main
if __name__ == "__main__":
    print("\nQUEUING SYSTEM\n")
    print("Model (" + str(n_runs) + " runs):")

    # First Run: Also Prints Model Info
    result_time = run_queue_simulation(current_config, first=True)
    results.append(result_time)
    for i in range(n_runs):
        # Result of every run
        result_time = run_queue_simulation(current_config, first=False)
        results.append(result_time)
        print("Run " + str(i + 1) + " of " + str(n_runs) + " finished")
    print()

    # Expected analytic values
    expected = get_expected_values(current_config)

    # Show Analytic vs Simulated Results in Console
    values_comparison(results, expected)

    # Plot all the run's results (comparison with analytic values)
    plot_results(results, expected, save)
Example #11
0
for n in np.arange(args.n_iter):
    X_pred[..., n] = model.predict(X_true, batch_size=int(1e4), verbose=1)
X_pred = np.mean(X_pred, axis=2)
print()

# BKG SUPPRESION AND MASS SCULPTING METRIC
wp_metric = 'Latent' if (args.OE_type == 'KLD'
                         and 'Latent' in metrics) else 'MSE'

# CUT ON RECONSTRUCTION LOSS
if args.apply_cut == 'ON' or args.bump_hunter == 'ON':
    for cut_type in ['gain', 'sigma']:
        cut_sample = apply_best_cut(y_true, X_true, X_pred, sample,
                                    args.n_dims, model, wp_metric, cut_type)
        if args.bump_hunter == 'ON':
            bump_hunter(cut_sample, args.output_dir, cut_type)
        plot_distributions([sample, cut_sample],
                           args.output_dir,
                           bin_sizes={
                               'm': 2.5,
                               'pt': 10
                           },
                           plot_var='m',
                           sig_tag=sig_data,
                           file_name='bkg_supp-' + cut_type + '.png')

# PLOTTING RESULTS
if args.plotting == 'ON':
    plot_results(y_true, X_true, X_pred, sample, args.n_dims, model, metrics,
                 wp_metric, sig_data, args.output_dir)
Example #12
0
                                args.n_dims,
                                metric='X-S',
                                cut_type='gain')
    samples = [sample, cut_sample]
    #bump_hunter(np.where(cut_sample['JZW']==-1,0,1), cut_sample, args.output_dir); sys.exit()
    var_distributions(samples,
                      args.output_dir,
                      sig_bins=200,
                      bkg_bins=200,
                      var='M',
                      normalize=False)

# PLOTTING RESULTS
if args.plotting == 'ON':
    if not os.path.isdir(args.output_dir): os.mkdir(args.output_dir)
    plot_results(y_true, X_true, X_pred, sample, train_var, args.n_dims,
                 metrics, model, args.encoder, args.output_dir)
'''
from sklearn.manifold import TSNE
from tensorflow.keras import models
import matplotlib.pyplot as plt
import time
#codings_layer = models.Model(inputs=model.inputs, outputs=model.get_layer('model').get_layer('codings').output)
codings_layer = models.Model(inputs=model.inputs, outputs=model.get_layer('model').get_layer('mean').output)
codings       = codings_layer(np.float32(X_true))
print(codings.shape)
start_time = time.time()
tsne = TSNE(n_jobs=-1, random_state=0)
codings_2D = tsne.fit_transform(codings)
print(codings_2D.shape)
#plt.scatter(codings_2D[:,0], codings_2D[:,1], c=['tab:orange', 'tab:blue'], s=10, label=['Tops', 'QCD'], alpha=0.5)