def __init__(self, args):
        self.GLOBAL_BEST_DEVEL_PRED_RESULTS = []

        self.args = args
        self._LogFileHandler = open(args.logfileaddress, "wt")
        self.lp("Program started ...")
        self.__validate_args__()

        self.PARAMS = collections.OrderedDict()
        self.PARAMS[
            "train_filename"] = args.data_folder + '/' + args.ann_set + '/' + args.ann_type + '/' + args.ann_type + '-train-annotations.txt'
        self.PARAMS[
            "devel_filename"] = args.data_folder + '/' + args.ann_set + '/' + args.ann_type + '/' + args.ann_type + '-devel-annotations.txt'
        self.PARAMS[
            "test_filename"] = args.data_folder + '/' + args.ann_set + '/' + args.ann_type + '/' + args.ann_type + '-test-annotations.txt'

        self.PARAMS["X_lower_row_len"] = 1  # Lower text length threshold
        self.PARAMS["X_upper_row_len"] = 400  # Upper text length threshold
        self.PARAMS["X_used_row_len"] = -1
        self.PARAMS[
            "default_embeddings_dim"] = 300  # default size of the used word embeddings when no pre-created embeddings model is given

        MSG = ["" * 80, "PARAMETERS:", "-" * 20]
        for key in self.PARAMS.keys():
            MSG.append(GF.NVLR(key, 20) + " : " + str(self.PARAMS[key]))
        MSG.append("*" * 80)
        self.lp(MSG)
 def __validate_args__(self):
     self.lp("Validating args ...")
     D = self.args.__dict__
     MSG = ["" * 80, "Command-Line args:", "-" * 20]
     for key in sorted(D.keys()):
         MSG.append(GF.NVLR(key, 20) + " : " + str(D[key]))
     MSG.append("*" * 80)
     self.lp(MSG)
    def __LoadData__(self):
        self.lp("Fetching information about the data set ...")
        # ----------------------------------
        train_data_obj = X_y_dataHandler(ann_set=self.args.ann_set,
                                         include_o_labels=0)
        train_data_obj.load_data_set(self.PARAMS["train_filename"])
        # ----------------------------------
        devel_data_obj = X_y_dataHandler(ann_set=self.args.ann_set,
                                         include_o_labels=0)
        devel_data_obj.load_data_set(self.PARAMS["devel_filename"])
        # ----------------------------------
        test_data_obj = X_y_dataHandler(ann_set=self.args.ann_set,
                                        include_o_labels=0)
        test_data_obj.load_data_set(self.PARAMS["test_filename"])
        # ----------------------------------
        X_word_max_value = max([
            train_data_obj.get_X_max_word_value(),
            devel_data_obj.get_X_max_word_value(),
            test_data_obj.get_X_max_word_value()
        ])
        X_lemma_max_value = max([
            train_data_obj.get_X_max_lemma_value(),
            devel_data_obj.get_X_max_lemma_value(),
            test_data_obj.get_X_max_lemma_value()
        ])
        X_pos_max_value = max([
            train_data_obj.get_X_max_pos_value(),
            devel_data_obj.get_X_max_pos_value(),
            test_data_obj.get_X_max_pos_value()
        ])
        y_max_value = max([
            train_data_obj.get_y_max_value(),
            devel_data_obj.get_y_max_value(),
            test_data_obj.get_y_max_value()
        ])
        # ----------------------------------
        X_data_max_row_len = max([
            train_data_obj.get_X_max_len(),
            devel_data_obj.get_X_max_len(),
            test_data_obj.get_X_max_len()
        ])
        if X_data_max_row_len <= self.PARAMS["X_lower_row_len"]:
            X_used_row_len = self.PARAMS["X_lower_row_len"]
        elif X_data_max_row_len >= self.PARAMS["X_upper_row_len"]:
            X_used_row_len = self.PARAMS["X_upper_row_len"]
        else:
            X_used_row_len = X_data_max_row_len
        # ----------------------------------
        train_data_obj.make_numpy_arrays(X_used_row_len,
                                         y_max_value,
                                         padding_side=self.args.padding_side)
        # ----------------------------------
        devel_data_obj.make_numpy_arrays(X_used_row_len,
                                         y_max_value,
                                         padding_side=self.args.padding_side)
        # ----------------------------------
        test_data_obj.make_numpy_arrays(X_used_row_len,
                                        y_max_value,
                                        padding_side=self.args.padding_side)
        # ----------------------------------
        # Need to check again due to potential removal of the O label column
        y_max_value = max([
            train_data_obj.get_y_max_value(),
            devel_data_obj.get_y_max_value(),
            test_data_obj.get_y_max_value()
        ])

        train_data_size = train_data_obj.get_size()
        devel_data_size = devel_data_obj.get_size()
        test_data_size = test_data_obj.get_size()

        MSG = ["*" * 80, "Information about data:", "-" * 30]
        MSG.append(
            GF.NVLR('word max value', 40) + ": " + str(X_word_max_value))
        MSG.append(
            GF.NVLR('lemma max value', 40) + ": " + str(X_lemma_max_value))
        MSG.append(GF.NVLR('pos max value', 40) + ": " + str(X_pos_max_value))
        MSG.append(GF.NVLR('used row length', 40) + ": " + str(X_used_row_len))
        MSG.append(
            GF.NVLR('max row length', 40) + ": " + str(X_data_max_row_len))
        MSG.append(GF.NVLR('max value', 40) + ": " + str(y_max_value))

        MSG.append(
            GF.NVLR('Train data', 40) + ": " + self.PARAMS["train_filename"])
        MSG.append(GF.NVLR('Train size', 40) + ": " + str(train_data_size))
        MSG.append("")

        MSG.append(
            GF.NVLR('Devel data', 40) + ": " + self.PARAMS["devel_filename"])
        MSG.append(GF.NVLR('Devel size', 40) + ": " + str(devel_data_size))
        MSG.append("")

        MSG.append(
            GF.NVLR('Test data', 40) + ": " + self.PARAMS["test_filename"])
        MSG.append(GF.NVLR('Test size', 40) + ": " + str(test_data_size))
        MSG.append("")
        self.lp(MSG)

        self.train_data_obj = train_data_obj
        self.devel_data_obj = devel_data_obj
        self.test_data_obj = test_data_obj

        self.PARAMS["X_word_max_value"] = X_word_max_value
        self.PARAMS["X_lemma_max_value"] = X_lemma_max_value
        self.PARAMS["X_pos_max_value"] = X_pos_max_value
        self.PARAMS["X_used_row_len"] = X_used_row_len
        self.PARAMS["y_max_value"] = y_max_value