Ejemplo n.º 1
0
def train():
    train_dataset = YoutubeTrendingDataset("./data/train.csv")
    train_dataloader = DataLoader(train_dataset, batch_size=1)
    preprocess_dict = {
        "publish_time": [TimeStampPreprocessing],
        "likes": [FrequencyPreprocessing],
        "category_id": [CategoricalPrerprocessing]
    }
    preprocessor = Preprocessor(preprocess_dict)

    for batch_ix, data in enumerate(train_dataloader):
        train_x, train_y = preprocessor.run(data)
Ejemplo n.º 2
0
 def test_smoke(self):
     """
     Smoke test - check that the preprocessor runs without exploding
     """
     pp = Preprocessor(file_name=self.file_name,
                       input_dir=self.input_dir,
                       output_dir=self.output_dir)
     pp.process()
     out_dir_files = os.listdir(self.output_dir)
     for file_name in out_dir_files:
         name = utilities.path.get_name(self.file_name,
                                        extension=False)
         if name in file_name:
             file_path = os.path.join(self.output_dir, file_name)
             doc = Document.from_json(file_path)
             self.assertNotEqual(doc.pre_file_name, self.file_name)
             self.assertEqual(doc.file_name, 'test_preprocessed/lorem.json')
Ejemplo n.º 3
0
 def by_infile(self, infile):
     try:
         shutil.rmtree(self.OUTPUT_DIR)
     except:
         pass
     self.db_open()
     json_data = self.get_events_from_infile(infile)
     # build preprocessor
     ppr = Preprocessor()
     # Process raw data
     #X, Y, events_found = ppr.get_raw_data(DIMENSION, [RAW_FILE], bad)
     X, Y, events_found = ppr.get_from_json(self.DIMENSION, json_data)
     X, Y = ppr.remove_outliers(X, Y)
     X, Y = ppr.normalize(X, Y)
     trX, trY, teX, teY, vaX, vaY = ppr.partition_for_training(
         X, Y, 0.0, 1.0)
     ppr.store_training_partitions(trX, trY, teX, teY, vaX, vaY,
                                   self.INPUT_DIR)
     # build adapter
     adapter = MACAdapter(self.INPUT_DIR, self.DIMENSION, self.FOLDS)
     # build model
     convnet = ConvNet(self.DIMENSION)
     # build server
     server = ConvNetServer(adapter,
                            self.OUTPUT_DIR,
                            batch_size=self.BATCH_SIZE,
                            verbose=True,
                            use=True)
     x, durs, _ = server.get_testing_batch()
     with tf.Session() as sess:
         init = tf.global_variables_initializer()
         sess.run(init)
         convnet.restore(sess, self.INITIAL_WEIGHTS)
         predictions = sess.run((convnet.predictor),
                                feed_dict={
                                    convnet.x: x,
                                    convnet.durs: durs
                                })
     # Get event ids
     _, _, ids = adapter.get_ids()
     results = [{
         "eventID": int(ids[i]),
         "ml": {
             "aircraftProbability":
             round(np.around(predictions[i][0], decimals=4), 4),
             "model":
             self.MODEL
         }
     } for i in range(0, len(ids))]
     for result in results:
         self.insert_result_for_event(result)
     self.db_close()
Ejemplo n.º 4
0
    # Set output path
    if args.out_path:
        out_path = Path(args.out_path)
    else:
        out_path = Path('data_' + str(patch_size) + '/test/masks_predicted_' +
                        time.strftime("%y%m%d-%H%M%S"))

    if not out_path.exists():
        out_path.mkdir(parents=True)

    # log all arguments including default ones
    with open(Path(out_path, 'options.json'), 'w') as f:
        f.write(json.dumps(vars(args)))

    # Preprocessing
    preprocessor = Preprocessor()
    if args.denoise:
        preprocessor.add_filter(
            filter.get_denoise_filter(args.denoise, args.denoise_parms))

    # get loss function from function name
    loss_function = get_loss_function(args.loss, args.loss_parms)

    if 'bayes' in args.model or 'uncert' in args.model:
        mc_iterations = args.mc_iterations
    else:  # set Nr iterations to 1 for regular u-net
        mc_iterations = 1

    # 2-Stage Optimization Process
    if args.model == 'two_stage':
        #1st Stage
Ejemplo n.º 5
0
def main():

    input_dir = ""
    output_dir = ""
    max_length_sentence = 100
    lc = False
    verbose = False

    try:
        opts, args = getopt.getopt(sys.argv[1:], "hvls:i:o:", [
            "help", "verbose", "lowercase", "max_sen_length=", "input_dir=",
            "output_dir="
        ])
    except getopt.GetoptError as err:
        print(str(err))
        usage()
        sys.exit(2)

    for o, a in opts:
        if o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("-v", "--verbose"):
            verbose = True
        elif o in ("-s", "--max_sen_length"):
            max_length_sentence = int(a)
        elif o in ("-l", "--lowercase"):
            lc = True
        elif o in ("-i", "--input_dir"):
            input_dir = os.path.expanduser(a)
        elif o in ("-o", "--output_dir"):
            output_dir = os.path.expanduser(a)
        else:
            assert False, "unhandled option"

    if not os.path.exists(input_dir):
        print("input directory does not exists... exiting")
        sys.exit()

    if output_dir == "":
        output_dir = input_dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        if verbose:
            print("directory {} created".format(output_dir))

    path_trainset = os.path.join(input_dir, TRAIN_NAME)
    path_testset = os.path.join(input_dir, TEST_NAME)

    if not os.path.exists(path_trainset):
        print("training set file is absent ({})".format(path_trainset))
        sys.exit()
    if not os.path.exists(path_testset):
        print("test set file is absent ({})".format(path_testset))
        sys.exit()

    path_save_voc_w = os.path.join(output_dir, VOCABULARY_NAME)
    path_save_voc_c = os.path.join(output_dir, CLASSES_NAME)

    prep_train = Preprocessor(path_trainset)
    prep_test = Preprocessor(path_testset, train=False)
    size_train = len(prep_train)
    size_test = len(prep_test)
    if verbose:
        print("Starting pre-processing on files of {} sentences".format(
            size_train + size_test))
    voc_w, voc_c, max_length = get_vocabulary([prep_train, prep_test], verbose)
    voc_w.add(PAD)
    write_vocabulary(voc_w, path_save_voc_w)
    write_vocabulary(voc_c, path_save_voc_c)
    embeddings_size = len(voc_w)
    del voc_w, voc_c, prep_train

    vocab_words = load_vocabulary(path_save_voc_w)

    # Loading vocabularies as dictionary
    if verbose:
        print(
            "\nvocabulary loaded back ... {} words "
            "(might be different from before due to utf-8 encoding issues...)".
            format(len(vocab_words)))
    vocab_classes = load_vocabulary(path_save_voc_c)
    max_length = min(max_length, max_length_sentence)
    processing_words = get_word_preprocessing(vocab_words,
                                              max_length=max_length)
    processing_class = get_classes_preprocessing(vocab_classes)
    prep_to_int_train = Preprocessor(path_trainset,
                                     processing_words=processing_words,
                                     processing_class=processing_class)
    prep_to_int_test = Preprocessor(path_testset,
                                    train=False,
                                    processing_words=processing_words,
                                    processing_class=processing_class)
    train, _ = fill_matrix(size_train,
                           max_length,
                           prep_to_int_train,
                           train=True)
    test, ids_test = fill_matrix(size_test,
                                 max_length,
                                 prep_to_int_test,
                                 train=False)
    np.save(os.path.join(output_dir, TRAIN_OUTPUT_NAME), train)
    np.save(os.path.join(output_dir, TEST_OUTPUT_NAME), test)
    with codecs.open(os.path.join(output_dir, IDS_OUTPUT_NAME),
                     "w",
                     encoding='utf-8') as f:
        f.write("\n".join(ids_test))

    path_w2v = os.path.join(input_dir, EMBEDDINGS_INPUT_NAME)
    path_ngrams_w2v = os.path.join(input_dir, EMBEDDINGS_INPUT_NAME_NG)
    path_save_embeddings = os.path.join(output_dir, EMBEDDINGS_OUTPUT_NAME)
    min_n = 3
    max_n = 6
    create_embeddings(vocab_words, embeddings_size, path_w2v, path_ngrams_w2v,
                      path_save_embeddings, min_n, max_n)
Ejemplo n.º 6
0
from utilities import file_io
from sklearn.feature_extraction import FeatureHasher
from sklearn.neighbors import KNeighborsClassifier
import time
from preprocessing.preprocessor import Preprocessor
from LyricsProcessor import LyricsProcessor

if __name__ == "__main__":
    #chunks = file_io.read_lastfm_user_art_file("data/userid-timestamp-artid-artname-traid-traname.tsv")
    chunks = file_io.read_lastfm_user_art_file("data/test_shorter.tsv")

    # read songs
    vectorizer = FeatureHasher()
    pre = Preprocessor(chunks, vectorizer)
    songs = pre.read_songs(20)
    print(songs)

    # reset file reader
    #chunks = file_io.read_lastfm_user_art_file("data/tmp.tsv")
    #pre.reset_file_reader(chunks)

    # read user song mapping
    pre.read_user_songs(1000)
    # convert to user-song matrix
    X = pre.get_user_song_matrix()

    start_time = time.time()
    clf = KNeighborsClassifier(n_neighbors=1)
    clf.fit(X, list(range(X.shape[0])))
    print(clf.predict(pre.user_song_dict["user_000001"]))
                type=int,
                default=1,
                help="# of nearest neighbors for classification")
ap.add_argument(
    "-j",
    "--jobs",
    type=int,
    default=-1,
    help="# of jobs for k-NN distance (-1 uses all available cores)")

args = vars(ap.parse_args())

print("[INFO] loading images...")
imagePaths = list(paths.list_images(args["dataset"]))

sp = Preprocessor(32, 32)
sdl = DatasetLoader(preprocessors=[sp])
(data, labels) = sdl.load(imagePaths, verbose=1000)
data = data.reshape((data.shape[0], 3072))

print("[INFO] features matrix: {:.1f}MB".format(data.nbytes / (1024 * 1000.0)))

le = LabelEncoder()
labels = le.fit_transform(labels)
(trainX, testX, trainY, testY) = train_test_split(data,
                                                  labels,
                                                  test_size=0.25,
                                                  random_state=42)

#k-NN
print("[INFO] evaluating k-NN classifier...")
Ejemplo n.º 8
0
def main(argv):
    """Main entrypoint for the anonymization tool"""

    # Default parameters
    configuration_file = ''
    input_file = ''
    use_cache = True
    weight = 0.5
    strategy = "gdf"
    result_dir = None

    # Read and set tool parameters
    try:
        opts, _ = getopt.getopt(argv, "c:i:r:w:v", ["config=", "input=", "weight=", "result_dir=", "verbose"])
    except getopt.GetoptError:
        logger.error('experiment_runner.py -c <config_file> -i <input_file> -w <relational_weight>')
        sys.exit(2)
    for opt, arg in opts:
        if opt in ("-c", "--config"):
            configuration_file = arg
        if opt in ("-i", "--input"):
            input_file = arg
            base = os.path.basename(input_file)
            if not result_dir:
                result_dir = os.path.splitext(base)[0]
        if opt in ("-w", "--weight"):
            weight = float(arg)
            strategy = "mondrian"
        if opt in ("-r", "--result_dir"):
            result_dir = arg
        if opt in ("-v", "--verbose"):
            logging.getLogger().setLevel(logging.DEBUG)

    result_path = Path("experiment_results") / result_dir
    result_path.mkdir(parents=True, exist_ok=True)

    # Let's get started
    logger.info("Anonymizing input file %s", input_file)

    # Initialize and read configuration
    configuration_reader = ConfigurationReader()
    config = configuration_reader.read(configuration_file)

    # Read data using data types defined in the configuration
    data_reader = DataReader(config)
    df = data_reader.read(input_file)

    # Initialize the sensitive terms recognizer
    sensitive_terms_recognizer = SensitiveTermsRecognizer(config, use_cache)

    # Initialize the preprocessor (preprocessor is stateful, so pass df at the beginning)
    pp = Preprocessor(sensitive_terms_recognizer, config, df)

    # Run through preprocessing of dataframe: Data cleansing, analysis of textual attributes, resolving of redundant information, and compression
    pp.clean_textual_attributes()
    pp.analyze_textual_attributes()
    pp.find_redundant_information()
    pp.compress()

    # Get sensitive terms dictionary and preprocessed dataframe
    terms = pp.get_sensitive_terms()
    df = pp.get_df()

    # Initialize the anonymization kernel by providing the sensitive terms dictionary, the configuration, the sensitive terms recognizer, and the preprocessor
    kernel = AnonymizationKernel(terms, config, sensitive_terms_recognizer, pp)
    unanonymized = df

    # Determine k values for experiment
    k_values = [2, 3, 4, 5, 10, 20, 50]
    biases = config.get_biases()

    # Set strategy names
    if strategy == "mondrian":
        strategy_name = "mondrian-{}".format(weight)
    elif strategy == "gdf":
        strategy_name = strategy

    # Parameters for calculating metrics
    quasi_identifiers = config.get_quasi_identifiers()
    textual_attribute_mapping = pp.get_textual_attribute_mapping()

    # Prepare dataframes and json to store experiment results
    total_information_loss = pd.DataFrame(index=k_values, columns=[strategy_name])
    total_information_loss.index.name = 'k'

    relational_information_loss = pd.DataFrame(index=k_values, columns=[strategy_name])
    relational_information_loss.index.name = 'k'

    textual_information_loss = pd.DataFrame(index=k_values, columns=[strategy_name])
    textual_information_loss.index.name = 'k'

    detailed_loss_level_0 = [k for k in textual_attribute_mapping]
    detailed_loss_level_1 = set()
    for k in textual_attribute_mapping:
        for e in textual_attribute_mapping[k]:
            detailed_loss_level_1.add(e.replace("{}_".format(k), ''))
    detailed_loss_level_1 = ["total"] + list(detailed_loss_level_1)
    detailed_textual_information_loss = pd.DataFrame(index=k_values, columns=pd.MultiIndex.from_product([detailed_loss_level_0, detailed_loss_level_1]))
    detailed_textual_information_loss.index.name = 'k'

    partition_sizes = {}
    partition_sizes[strategy_name] = {}

    partition_splits = {}
    partition_splits[strategy_name] = {}

    # Let's start the experiments
    for k in k_values:
        logger.info("-------------------------------------------------------------------------------")
        logger.info("Anonymizing dataset with k=%d and strategy %s", k, strategy_name)

        # Anonymize dataset for a specific k
        anonymized_df, partitions, partition_split_statistics = kernel.anonymize_quasi_identifiers(df, k, strategy, biases, weight)

        # Calculating the total, relational, and textual information loss based on the original and anonymized data frame
        total_il, relational_il, textual_il = calculate_normalized_certainty_penalty(unanonymized, anonymized_df, quasi_identifiers, textual_attribute_mapping)

        # Calculating the mean and std for partition size as well as split statistics
        mean_partition_size = calculate_mean_partition_size(partitions)
        std_partition_size = calculate_std_partition_size(partitions)
        if partition_split_statistics:
            number_of_relational_splits, number_of_textual_splits = get_partition_split_share(partition_split_statistics, textual_attribute_mapping)

        # Notify about the results
        logger.info("Information loss for relational attributes is %4.4f", relational_il)
        if textual_il:
            logger.info("Information loss for textual attribute is %4.4f", textual_il["total"])
        logger.info("Total information loss is %4.4f", total_il)
        logger.info("Ended up with %d partitions with a mean size of %.2f and a std of %.2f", len(partitions), mean_partition_size, std_partition_size)
        if partition_split_statistics:
            logger.info("Split %d times on a relational attribute", number_of_relational_splits)
            logger.info("Split %d times on a textual attribute", number_of_textual_splits)

        # Store experiment results
        total_information_loss.at[k, strategy_name] = total_il
        relational_information_loss.at[k, strategy_name] = relational_il
        if textual_il:
            textual_information_loss.at[k, strategy_name] = textual_il["total"]
            for key in textual_il:
                if isinstance(textual_il[key], dict):
                    for subkey in textual_il[key]:
                        if subkey == "total":
                            detailed_textual_information_loss.at[k, (key, "total")] = textual_il[key]["total"]
                        else:
                            entity_type = subkey.replace("{}_".format(key), '')
                            detailed_textual_information_loss.at[k, (key, entity_type)] = textual_il[key][subkey]

        partition_sizes[strategy_name][k] = get_partition_lengths(partitions)
        if partition_split_statistics:
            partition_splits[strategy_name][k] = {
                "relational": number_of_relational_splits,
                "textual": number_of_textual_splits
            }

    # Define file info
    if strategy == "mondrian":
        file_info = str(weight).replace(".", "_")
    elif strategy == "gdf":
        file_info = strategy

    # Save the experiment results
    with open(result_path / 'partition_distribution_{}.json'.format(file_info), 'w') as f:
        json.dump(partition_sizes, f, ensure_ascii=False)

    if partition_split_statistics:
        with open(result_path / 'partition_splits_{}.json'.format(file_info), 'w') as f:
            json.dump(partition_splits, f, ensure_ascii=False)

    total_information_loss.to_csv(result_path / "total_information_loss_{}.csv".format(file_info))
    relational_information_loss.to_csv(result_path / "relational_information_loss_{}.csv".format(file_info))
    if textual_il:
        textual_information_loss.to_csv(result_path / "textual_information_loss_{}.csv".format(file_info))
        detailed_textual_information_loss.to_csv(result_path / "detailed_textual_information_loss_{}.csv".format(file_info))
    def __predict_output__():

        plt.interactive(False)
        cfg = Configuration()
        GPU = True

        if GPU != True:
            os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
            os.environ["CUDA_VISIBLE_DEVICES"] = ""

        # Input Path

        root_dir = os.path.dirname(os.path.abspath(__file__))

        image_path = cfg.image_path

        json_path = os.path.join(root_dir, cfg.input_filename)

        testingset = os.path.join(root_dir, 'testingset')

        Preprocessor.__generate_kijiji_set__(root_dir, image_path, json_path,
                                             testingset, 'model')

        # ------------------generator to compile training data of kijiji dataset----------------------------------------

        image_path = os.path.join(root_dir, 'testingset')

        data_path = glob(image_path + "/*")

        # Image Segmentation Parameters

        model_path = os.path.expanduser(cfg.model_path)
        assert model_path.endswith('.h5'), 'Keras model must be a .h5 file.'
        anchors_path = os.path.expanduser(cfg.anchors_path)
        classes_path = os.path.expanduser(cfg.classes_path)
        test_path = os.path.expanduser(cfg.test_path)
        output_path = os.path.expanduser(cfg.segmented_output_path)
        json_path = os.path.expanduser(cfg.json_output)

        if not os.path.exists(output_path):
            print('Creating output path {}'.format(output_path))
            os.mkdir(output_path)

        sess = K.get_session()

        class_names = Preprocessor.__return_class_names__(classes_path)

        anchors = Preprocessor.__return_anchors__(anchors_path)

        yolo_model = load_model(model_path)

        # Verify model, anchors, and classes are compatible

        num_classes = len(class_names)

        num_anchors = len(anchors)

        info = 'Mismatch between model and given anchor and class sizes. ' \
               'Specify matching anchors and classes with --anchors_path and --classes_path flags.'
        model_output_channels = yolo_model.layers[-1].output_shape[-1]
        assert model_output_channels == num_anchors * (num_classes + 5), info
        print('{} model, anchors, and classes loaded.'.format(model_path))

        # Check if model is fully convolutional, assuming channel last order.

        model_image_size = yolo_model.layers[0].input_shape[1:3]

        is_fixed_size = model_image_size != (None, None)

        # Generate Colors for drawing bounding boxes

        hsv_tuples, colors = Preprocessor.__generate_colors_for_bounding_boxes__(
            class_names)

        yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names))

        input_image_shape = K.placeholder(shape=(2, ))
        boxes, scores, classes = yolo_eval(yolo_outputs,
                                           input_image_shape,
                                           score_threshold=cfg.score_threshold,
                                           iou_threshold=cfg.iou_threshold)

        # Load Images from the root folder

        input_images_model_1, all_images, data_path, data_path_with_image_name = Preprocessor.__load_image_data_thumbnails__(
            data_path,
            cfg.compressed_image_height,
            cfg.compressed_image_width,
            cfg.compressed_channel,
            cfg.number_of_categories,
            cfg.number_of_images_per_category,
            root_dir,
            is_fixed_size,
            model_image_size,
            sess,
            yolo_model,
            input_image_shape,
            boxes,
            scores,
            classes,
            cfg.font_path,
            class_names,
            colors,
            output_path,
            json_path,
            test_path,
            True,  # Segmentation Flag
            False,  # Edge-detection Flag
            True,  # Extract object Flag
            False)  # Gray Scale Flag

        input_images_model_2, all_images, data_path, data_path_with_image_name = Preprocessor.__load_image_data_thumbnails__(
            data_path, cfg.compressed_image_height, cfg.compressed_image_width,
            cfg.compressed_channel, cfg.number_of_categories,
            cfg.number_of_images_per_category, root_dir, is_fixed_size,
            model_image_size, sess, yolo_model, input_image_shape, boxes,
            scores, classes, cfg.font_path, class_names, colors, output_path,
            json_path, test_path, False, True, False, False)

        input_images_model_3, all_images, data_path, data_path_with_image_name = Preprocessor.__load_image_data_thumbnails__(
            data_path, cfg.image_height, cfg.image_width, cfg.channel,
            cfg.number_of_categories, cfg.number_of_images_per_category,
            root_dir, is_fixed_size, model_image_size, sess, yolo_model,
            input_image_shape, boxes, scores, classes, cfg.font_path,
            class_names, colors, output_path, json_path, test_path, False,
            False, False, False)

        input_shape = [
            cfg.compressed_image_height, cfg.compressed_image_width,
            cfg.compressed_channel
        ]

        input_shape_3 = [cfg.image_height, cfg.image_width, cfg.channel]

        # load (pre-trained) weights for model_1

        print('-' * 30)
        print('Loading model weights...\n')
        weight_folder = cfg.model_1_save  # the path where the model weights are stored
        weight_file = 'model_1.h5'
        model_1 = Preprocessor.__load_model_weights__(weight_folder,
                                                      weight_file, input_shape,
                                                      input_shape_3, "Model_1")

        # load (pre-trained) weights for model_2

        print('-' * 30)
        print('Loading model weights...\n')
        weight_folder = cfg.model_2_save  # the path where the model weights are stored
        weight_file = 'model_2.h5'
        model_2 = Preprocessor.__load_model_weights__(weight_folder,
                                                      weight_file, input_shape,
                                                      input_shape_3, "Model_2")

        # load (pre-trained) weights for model_2

        print('-' * 30)
        print('Loading model weights...\n')
        weight_folder = cfg.model_3_save  # the path where the model weights are stored
        weight_file = 'model_3.h5'
        model_3 = Preprocessor.__load_model_weights__(weight_folder,
                                                      weight_file, input_shape,
                                                      input_shape_3, "Model_3")
        print(root_dir)
        print(os.path.join(root_dir, cfg.output_model_1))

        output_path_model_1 = os.path.join(root_dir + cfg.output_model_1)
        output_path_model_2 = os.path.join(root_dir + cfg.output_model_2)
        output_path_model_3 = os.path.join(root_dir + cfg.output_model_3)

        Preprocessor.__create_output_directories__(output_path_model_1)
        Preprocessor.__create_output_directories__(output_path_model_2)
        Preprocessor.__create_output_directories__(output_path_model_3)

        features_from_model_1 = Preprocessor.__get_score_model__(
            model_1, input_images_model_1, output_path_model_1)
        features_from_model_2 = Preprocessor.__get_score_model__(
            model_2, input_images_model_2, output_path_model_2)
        features_from_model_3 = Preprocessor.__get_score_model__(
            model_3, input_images_model_3, output_path_model_3)

        features_from_model_1 = Preprocessor.__flatten_img_data__(
            features_from_model_1)
        features_from_model_2 = Preprocessor.__flatten_img_data__(
            features_from_model_2)
        features_from_model_3 = Preprocessor.__flatten_img_data__(
            features_from_model_3)

        fused_features = np.concatenate([
            features_from_model_1, features_from_model_2, features_from_model_3
        ],
                                        axis=1)

        fused_features = [
            Preprocessor.__binarize__(features) for features in fused_features
        ]

        counter_for_predictions = 0

        sub_average_precision_make, sub_average_precision_color = [], []
        sub_average_precision_body, sub_average_precision_model = [], []

        cum_average_precision_make, cum_average_precision_color = [], []
        cum_average_precision_body, cum_average_precision_model = [], []

        precision_at_3_5_10_all = ''.join(cfg.precision_counter).split(',')

        while counter_for_predictions <= 2:

            test_image_idx = int(len(input_images_model_1) * random())

            if test_image_idx < len(data_path_with_image_name):

                idx_closest = Preprocessor.__get_closest_images__(
                    test_image_idx, fused_features, cfg.number_of_predictions)
                test_image = Preprocessor.__get_concatenated_images__(
                    data_path_with_image_name, [test_image_idx],
                    cfg.compressed_image_width)
                results_image = Preprocessor.__get_concatenated_images__(
                    data_path_with_image_name, idx_closest,
                    cfg.compressed_image_width)

                source_category = str(
                    data_path_with_image_name[test_image_idx]).split('/')
                similar_image = []
                similar_idx_closest = []

                for counter_for_recommendations in range(0, len(idx_closest)):

                    category = str(data_path_with_image_name[
                        idx_closest[counter_for_recommendations]]).split('/')

                    if str(source_category[-2]).strip() == str(
                            category[-2].strip()):
                        similar_image.append(data_path_with_image_name[
                            idx_closest[counter_for_recommendations]])
                        similar_idx_closest.append(
                            idx_closest[counter_for_recommendations])

                print("Test Image ID:", test_image_idx)
                print("\n")
                print("Closest Images ID:", idx_closest)
                print("\n")
                print("Similar Images ID", similar_idx_closest)
                print("\n")

                precision_per_make, precision_per_color = [], []
                precision_per_body_wise, precision_per_model_wise = [], []
                results_image_recommendations = []

                for i in range(0, len(precision_at_3_5_10_all)):

                    results_image_recommendations = Preprocessor.__get_concatenated_images__(
                        data_path_with_image_name, similar_idx_closest,
                        cfg.compressed_image_width)

                    list_of_similar_image_names = Preprocessor.__return_image_names__(
                        data_path_with_image_name, similar_idx_closest)

                    name_of_test_image = Preprocessor.__return_image_names__(
                        data_path_with_image_name, [test_image_idx])

                    dict_of_attributes_of_similar_images = Preprocessor.__get_attributes_list__(
                        list_of_similar_image_names,
                        os.path.join(root_dir, cfg.input_filename))

                    dict_of_attributes_of_test_image = Preprocessor.__get_attributes_list__(
                        name_of_test_image,
                        os.path.join(root_dir, cfg.input_filename))

                    similar_make_wise = Preprocessor.__get_similar__(
                        dict_of_attributes_of_test_image,
                        dict_of_attributes_of_similar_images[:int(
                            precision_at_3_5_10_all[i])], 'make')

                    similar_color_wise = Preprocessor.__get_similar__(
                        dict_of_attributes_of_test_image,
                        dict_of_attributes_of_similar_images[:int(
                            precision_at_3_5_10_all[i])], 'color')

                    similar_body_wise = Preprocessor.__get_similar__(
                        dict_of_attributes_of_test_image,
                        dict_of_attributes_of_similar_images[:int(
                            precision_at_3_5_10_all[i])], 'body')

                    similar_model_wise = Preprocessor.__get_similar__(
                        dict_of_attributes_of_test_image,
                        dict_of_attributes_of_similar_images[:int(
                            precision_at_3_5_10_all[i])], 'model')

                    precision_per_make.append(
                        float(
                            float(len(similar_make_wise)) /
                            int(precision_at_3_5_10_all[i])))
                    precision_per_color.append(
                        float(
                            float(len(similar_color_wise)) /
                            int(precision_at_3_5_10_all[i])))
                    precision_per_body_wise.append(
                        float(
                            float(len(similar_body_wise)) /
                            int(precision_at_3_5_10_all[i])))
                    precision_per_model_wise.append(
                        float(
                            float(len(similar_model_wise)) /
                            int(precision_at_3_5_10_all[i])))

                sub_average_precision_make.append(precision_per_make)
                sub_average_precision_color.append(precision_per_color)
                sub_average_precision_body.append(precision_per_body_wise)
                sub_average_precision_model.append(precision_per_model_wise)

                imsave('test.png', test_image)
                imsave('recommendations.png', results_image_recommendations)
                imsave('total_results.png', results_image)
                counter_for_predictions += 1
                time.sleep(1)

            else:

                print("Index is out of bound")

            cum_average_precision_make.append(
                map(Preprocessor.__mean__, zip(*sub_average_precision_make)))
            cum_average_precision_color.append(
                map(Preprocessor.__mean__, zip(*sub_average_precision_color)))
            cum_average_precision_body.append(
                map(Preprocessor.__mean__, zip(*sub_average_precision_body)))
            cum_average_precision_model.append(
                map(Preprocessor.__mean__, zip(*sub_average_precision_model)))

        print("\n \n \n")
        print(
            "-----------------------------------------------------------------------------------"
        )
        print("Average Precision Make-Wise", precision_at_3_5_10_all,
              map(Preprocessor.__mean__, zip(*cum_average_precision_make)))
        print("Average Precision Color-Wise", precision_at_3_5_10_all,
              map(Preprocessor.__mean__, zip(*cum_average_precision_color)))
        print("Average Precision Body-Wise", precision_at_3_5_10_all,
              map(Preprocessor.__mean__, zip(*cum_average_precision_body)))
        print("Average Precision Model-Wise", precision_at_3_5_10_all,
              map(Preprocessor.__mean__, zip(*cum_average_precision_model)))

        writer = csv.writer(open(os.path.join(root_dir, 'results.csv'), 'w'))

        writer.writerow([
            "Make-Wise: Precision at 3", "Make-Wise: Precision at 5",
            "Make-Wise: Precision at 10"
        ])
        for row in zip(*cum_average_precision_make):
            writer.writerow(row)

        writer.writerow('\n')

        writer.writerow([
            "Color-Wise: Precision at 3", "Color-Wise: Precision at 5",
            "Color-Wise: Precision at 10"
        ])

        for row in zip(*cum_average_precision_color):
            writer.writerow(row)

        writer.writerow('\n')

        writer.writerow([
            "Body-Wise: Precision at 3", "Body-Wise: Precision at 5",
            "Body-Wise: Precision at 10"
        ])

        for row in zip(*cum_average_precision_body):
            writer.writerow(row)

        writer.writerow('\n')

        writer.writerow([
            "Model-Wise: Precision at 3", "Model-Wise: Precision at 5",
            "Model-Wise: Precision at 10"
        ])

        for row in zip(*cum_average_precision_model):
            writer.writerow(row)

        writer.writerow('\n')
Ejemplo n.º 10
0
from model.nn import BasicNeuralNetwork
from preprocessing.preprocessor import Preprocessor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.decomposition import PCA, FastICA

BASE_DIR = 'data/'

#########################
# Preprocess data
#########################

train = pd.read_csv(BASE_DIR + 'train.csv')
test = pd.read_csv(BASE_DIR + 'test.csv')

preprocessor = Preprocessor(magicFeature=True)
train_p, test_p = preprocessor.transform(train, test)

#########################
# Create models
#########################

gb = GradientBoostingRegressor(n_estimators=1000,
                               max_features=0.95,
                               learning_rate=0.005,
                               max_depth=4)
las = Lasso(alpha=5)
lgb = {
    'objective': 'regression',
    'metric': 'rmse',
    'boosting': 'gbdt',
Ejemplo n.º 11
0
def main(argv):
    """Main entrypoint for the anonymization tool"""

    # Default parameters
    configuration_file = ''
    input_file = ''
    output_file = ''
    use_cache = False

    # Read and set tool parameters
    try:
        opts, _ = getopt.getopt(
            argv, "c:i:o:vs",
            ["config=", "input=", "output=", "verbose", "use_chached_docs"])
    except getopt.GetoptError:
        logger.error(
            'main.py -c <config_file> -i <input_file> -o <output_file>')
        sys.exit(2)
    for opt, arg in opts:
        if opt in ("-c", "--config"):
            configuration_file = arg
        if opt in ("-i", "--input"):
            input_file = arg
        if opt in ("-o", "--output"):
            output_file = arg
        if opt in ("-s", "--use_chached_docs"):
            use_cache = True
        if opt in ("-v", "--verbose"):
            logging.getLogger().setLevel(logging.DEBUG)

    # Let's get started
    logger.info("Anonymizing input file %s", input_file)

    # Initialize and read configuration
    configuration_reader = ConfigurationReader()
    config = configuration_reader.read(configuration_file)

    # Read data using data types defined in the configuration
    data_reader = DataReader(config)
    df = data_reader.read(input_file)

    # Initialize the sensitive terms recognizer
    sensitive_terms_recognizer = SensitiveTermsRecognizer(config, use_cache)

    # Initialize the preprocessor (preprocessor is stateful, so pass df at the beginning)
    pp = Preprocessor(sensitive_terms_recognizer, config, df)

    # Run through preprocessing of dataframe: Data cleansing, analysis of textual attributes, resolving of redundant information, and compression
    pp.clean_textual_attributes()
    pp.analyze_textual_attributes()
    pp.find_redundant_information()
    pp.compress()

    # Get sensitive terms dictionary and preprocessed dataframe
    terms = pp.get_sensitive_terms()
    df = pp.get_df()

    # Initialize the anonymization kernel by providing the sensitive terms dictionary, the configuration, the sensitive terms recognizer, and the preprocessor
    kernel = AnonymizationKernel(terms, config, sensitive_terms_recognizer, pp)

    # Save the unanonymized dataframe for later
    unanonymized_df = df.copy()

    # Parameters for anonymization
    k = config.parameters["k"]
    strategy = config.parameters["strategy"]
    biases = config.get_biases()
    relational_weight = config.get_relational_weight()

    # Anonymize quasi identifier (applying k-anonymity) and recode textual attributes
    anonymized_df, partitions, partition_split_statistics = kernel.anonymize_quasi_identifiers(
        df, k, strategy, biases, relational_weight)
    anonymized_df = kernel.recode_textual_attributes(anonymized_df)

    # Parameters for calculating metrics
    quasi_identifiers = config.get_quasi_identifiers()
    textual_attribute_mapping = pp.get_textual_attribute_mapping()

    # Calculating the total, relational, and textual information loss based on the original and anonymized data frame
    total_information_loss, relational_information_loss, textual_information_loss = calculate_normalized_certainty_penalty(
        unanonymized_df, anonymized_df, quasi_identifiers,
        textual_attribute_mapping)

    # Calculating the mean and std for partition size as well as split statistics
    mean_partition_size = calculate_mean_partition_size(partitions)
    std_partition_size = calculate_std_partition_size(partitions)
    if partition_split_statistics:
        number_of_relational_splits, number_of_textual_splits = get_partition_split_share(
            partition_split_statistics, textual_attribute_mapping)

    # Notify about the results
    logger.info("Information loss for relational attributes is %4.4f",
                relational_information_loss)
    if textual_information_loss:
        logger.info("Information loss for textual attribute is %4.4f",
                    textual_information_loss["total"])
    logger.info("Total information loss is %4.4f", total_information_loss)
    logger.info(
        "Ended up with %d partitions with a mean size of %.2f and a std of %.2f",
        len(partitions), mean_partition_size, std_partition_size)
    if partition_split_statistics:
        logger.info("Split %d times on a relational attribute",
                    number_of_relational_splits)
        logger.info("Split %d times on a textual attribute",
                    number_of_textual_splits)

    # Initialize the postprocessor with the config and the preprocessor
    post_processor = PostProcessor(config, pp)

    # Perform post processing actions on the anonymized data frame
    anonymized_df = post_processor.clean(anonymized_df)
    anonymized_df = post_processor.uncompress(anonymized_df)
    anonymized_df = post_processor.pretty(anonymized_df)

    # Don't forget to drop the direct identifiers since they are now not needed anymore
    anonymized_df = kernel.remove_direct_identifier(anonymized_df)

    # Notify and save
    logger.info("Saving anonymized file to %s", output_file)
    anonymized_df.to_csv(output_file, index=False)
Ejemplo n.º 12
0
from metrics import evaluation
from utilities import cluster_cf


if __name__ == "__main__":
    start_time = time.time()
    np.set_printoptions(threshold=np.nan)

    vectorizer = DictVectorizer()
    # reset file reader
    chunks = file_io.read_lastfm_user_art_file("data/halfid_20%_train.tsv")

    valid_songs = []    # don't filter with valid songs
    valid_songs = file_io.get_all_valid_songs('data/song_word2vec_whole_truncate_60000_new.csv')

    pre = Preprocessor(chunks, vectorizer, valid_songs)
    pre.reset_file_reader(chunks)

    # read user song mapping
    pre.read_user_songs(3000000)
    # convert to user-song matrix
    X = pre.get_user_song_matrix()
    print("non zeros: {0}".format(X.count_nonzero()))
    print("pre-processed in {0:.2f} sec".format(time.time() - start_time))

    #cluster_cf.cluster_usr(X, k=5)
    print("non zeros: {0}".format(X.count_nonzero()))
    pred = recommendation.predict_by_user(X)

    #pred = recommendation.predict_by_factorize(X)
    recommended = recommendation.recommend_all(X, pred, masked=False)
Ejemplo n.º 13
0
    print('export_path = {}\n'.format(export_path))
    if os.path.isdir(export_path):
        print('\nAlready saved a model, cleaning up\n')

    from keras import backend
    sess = backend.get_session()

    tf.compat.v1.saved_model.simple_save(
        sess,
        export_path,
        inputs = {'input_image':model.input},
        outputs = {t.name:t for t in model.outputs})


# working with dataload_ecg
df_train, df_test = Preprocessor.load_mitbih("")
df_train_ecg, df_test_ecg = Preprocessor.load_ecg("")

df_train.append(df_train_ecg)
df_test.append(df_test_ecg)

X_train, y_train, X_val, y_val, X_test, y_test = Preprocessor.CNN_preprocessor(df_train, df_test, 800, 2000)

# define model
n_obs, feature, depth = X_train.shape
model = CNNModel.get_model(n_obs, feature, depth)
h_params = CNNModel.CNN_hyperparameters(n_obs)

model.compile(loss = h_params ['loss'], optimizer = h_params ['optimizer'], metrics = h_params ['metrics'])

history = model.fit(X_train, y_train,
Ejemplo n.º 14
0
OUTPUT_DIR = "use_out"
# dummy to make the network happy
BATCH_SIZE = None
'''
Network
'''
# file location of weights to restore from (i.e. weights/model1.ckpt)
INITIAL_WEIGHTS = 'checkpoints/cvd_model.ckpt'
'''
SCRIPT
'''
# Only run if this is the main module to be run
if __name__ == '__main__':

    # build preprocessor
    ppr = Preprocessor()

    # Process raw data
    X, Y, events_found = ppr.get_raw_data(DIMENSION, [RAW_FILE], bad)
    X, Y = ppr.remove_outliers(X, Y)
    X, Y = ppr.normalize(X, Y)
    trX, trY, teX, teY, vaX, vaY = ppr.partition_for_training(X, Y, 0.0, 1.0)
    ppr.store_training_partitions(trX, trY, teX, teY, vaX, vaY, INPUT_DIR)

    # build adapter
    adapter = MACAdapter(INPUT_DIR, DIMENSION, FOLDS)

    # build model
    convnet = ConvNet(DIMENSION)

    # build server
Ejemplo n.º 15
0
        if not Path(args.gt_path).exists():
            print(args.gt_path + " does not exist")
            exit(-1)
    if args.uncert_path:
        if not Path(args.uncert_path).exists():
            print(args.uncert_path + " does not exist")
            exit(-1)
        uncert_path = Path(args.uncert_path)
    else:
        uncert_path = None

    model_path = Path(args.model_path)
    options = json.load(open(Path(model_path, 'options.json'), 'r'))

    # Setup Preprocessing filters
    preprocessor = Preprocessor()
    if 'denoise' in options:
        if 'denoise_parms' in options:
            preprocessor.add_filter(
                filter.get_denoise_filter(options['denoise']))
        else:
            preprocessor.add_filter(
                filter.get_denoise_filter(options['denoise'],
                                          options['denoise_parms']))

    if 'contrast' in options and options['contrast']:
        clahe = cv2.createCLAHE(
            clipLimit=2.0,
            tileGridSize=(25, 25))  # CLAHE adaptive contrast enhancement
        preprocessor.add_filter(clahe.apply)
Ejemplo n.º 16
0
def compare_preprocessing():
    # Loading train and test data:

    all_categories = [
        'alt.atheism', 'comp.graphics', 'comp.os.ms-windows.misc',
        'comp.sys.ibm.pc.hardware', 'comp.sys.mac.hardware', 'comp.windows.x',
        'misc.forsale', 'rec.autos', 'rec.motorcycles', 'rec.sport.baseball',
        'rec.sport.hockey', 'sci.crypt', 'sci.electronics', 'sci.med',
        'sci.space', 'soc.religion.christian', 'talk.politics.guns',
        'talk.politics.mideast', 'talk.politics.misc', 'talk.religion.misc'
    ]

    print("Loading 20 newsgroups...")

    newsgroups_train = fetch_20newsgroups(subset='train',
                                          remove=('headers', 'footers',
                                                  'quotes'),
                                          categories=all_categories)

    newsgroups_test = fetch_20newsgroups(subset='test',
                                         remove=('headers', 'footers',
                                                 'quotes'),
                                         categories=all_categories)

    print("{} training documents loaded.".format(
        newsgroups_train.filenames.shape[0]))

    print("Buidling Preprocessor combinations...")
    # flags: special_character_removal, number_removal, url_email_removal, stopword_removal, lower, stemming, lemmatize
    num_of_preprocessor_flags = 7
    # Creates a list of all possible permutations of a boolean list with the length of number of flags
    booleans = [
        False, True
    ]  # Creates a list of all possible permutations of a boolean list
    flags_list = [
        list(b)
        for b in itertools.product(booleans, repeat=num_of_preprocessor_flags)
    ]

    invalid_flags = []
    for i in range(len(flags_list)):
        if flags_list[i][5] and flags_list[i][
                6]:  # Removes simultaneous Stemming and Lemmatization
            invalid_flags.append(flags_list[i])
        elif flags_list[i][5] and not flags_list[i][
                4]:  # Remove Stemming without lowercase (lowercase is inbuilt)
            invalid_flags.append(flags_list[i])

    flags_list = [x for x in flags_list if x not in invalid_flags]
    print("{} Combinations built.".format(len(flags_list)))

    # Initialize vectorizer, machine learning algorithm and data frame to store the results
    vectorizer = TfidfVectorizer(analyzer="word",
                                 tokenizer=dummy,
                                 lowercase=False,
                                 preprocessor=dummy,
                                 stop_words=None)
    clf = MultinomialNB(alpha=.01)
    columns = [
        'Special Character Removal', 'Number Removal',
        'URL and E-Mail Removal', 'Stopword Removal', 'Lowercase', 'Stemming',
        'Lemmatization', 'Unique Words', 'Accuracy'
    ]
    rows = []

    for flags in flags_list:  # loops through all combinations
        prep = Preprocessor(special_character_removal=flags[0],
                            number_removal=flags[1],
                            url_email_removal=flags[2],
                            stopword_removal=flags[3],
                            lower=flags[4],
                            stemming=flags[5],
                            lemmatize=flags[6])

        preprocessed_train_data = [
            prep.preprocess(d) for d in newsgroups_train.data
        ]

        preprocessed_test_data = [
            prep.preprocess(d) for d in newsgroups_test.data
        ]

        vectors = vectorizer.fit_transform(preprocessed_train_data)

        # Train machine learning model
        clf.fit(vectors, newsgroups_train.target)

        # Transform test data to the model fitted to the training data
        vectors_test = vectorizer.transform(preprocessed_test_data)

        # Evaluate
        pred = clf.predict(vectors_test)
        vocab = vectors.shape[1]
        accuracy = metrics.accuracy_score(newsgroups_test.target, pred)
        rows.append([
            flags[0], flags[1], flags[2], flags[3], flags[4], flags[5],
            flags[6], vocab, accuracy
        ])

        print(
            "Spec: {} , Numbers: {} , EmailUrl: {} , SWR: {}, low: {}, Stem: {} , Lem: {} -> Vocab: {}, Acc: {}"
            .format(flags[0], flags[1], flags[2], flags[3], flags[4], flags[5],
                    flags[6], vocab, accuracy))

    # Organize data frame and save the results
    df = pd.DataFrame(np.array(rows), columns=columns)
    df = df.sort_values(by=['Accuracy'], ascending=False)
    pprint(df)
    df.to_csv('results.csv', sep=';')
Ejemplo n.º 17
0
Archivo: csap.py Proyecto: iahuang/csap
if __name__ == "__main__":
    csrc_path = argv[0]

    with open(csrc_path) as fl:
        avr_code = gcc.compile(fl.read())

    with open("lib/avrheader.sap") as fl:
        avrheader = fl.read()

    translator = Translator(avrheader)
    sap = translator.to_sap(avr_code)

    with open("build/build.sap.superset", "w") as fl:
        fl.write(sap)

    proc = Preprocessor()
    proc.load_extension("ext/sapplus.json")

    sap = proc.preprocess(sap)

    with open("build/build.sap", "w") as fl:
        fl.write(sap)

    out = assemble(sap)

    if out.success:
        pass
    else:
        print("SAP output did not compile successfully")
# The location in which to save the model
SAVE_NAME = "example_training.ckpt"
'''
SCRIPT
'''
# Only run if this is the main module to be run
if __name__ == '__main__':

    # JSON object returned from api_call
    # replace this with however you would like it to work in production
    json_data = json.load(open(EXAMPLE_FILE))
    # NOTE if events in json object have neither "aircraft" nor "community" fields
    # in they will be labeled as community for training - probably try to avoid this

    # build preprocessor
    ppr = Preprocessor()

    # Process raw data
    #X, Y, events_found = ppr.get_raw_data(DIMENSION, [RAW_FILE], bad)
    X, Y, events_found = ppr.get_from_json(DIMENSION, json_data)
    X, Y = ppr.remove_outliers(X, Y)
    X, Y = ppr.normalize(X, Y)
    # Shove all events into the "training" subdirectory
    trX, trY, teX, teY, vaX, vaY = ppr.partition_for_training(X, Y, 1.0, 0.0)
    # Store events in intermediate directory (will be deleted on subsequent trainings)
    ppr.store_training_partitions(trX, trY, teX, teY, vaX, vaY, INPUT_DIR)

    # build adapter
    adapter = MACAdapter(INPUT_DIR, DIMENSION, FOLDS)

    # build model
Ejemplo n.º 19
0
"""
Demo file for usage of the Preprocessor class
Initialize the object with the required parameters, then
call the process_images() method
"""

import sys, os
lib_path = os.path.abspath(os.path.join(__file__, '../..'))
sys.path.append(lib_path)

from preprocessing.preprocessor import Preprocessor

# inputs
size = 299
data_dir = './data/'
annotations_dir = './annotations/'
dest_dir = './data_preprocessed_{}/'.format(size)

train_annotations = '{}train2017.json'.format(annotations_dir)
val_annotations = '{}val2017.json'.format(annotations_dir)

preprocessor_train = Preprocessor(data_dir, train_annotations, (size, size))
preprocessor_train.process_images(dest_dir)
preprocessor_val = Preprocessor(data_dir, val_annotations, (size, size))
preprocessor_val.process_images(dest_dir)

print("Preprocessing in", dest_dir, "completed.")
Ejemplo n.º 20
0
    def __run_training__():

        cfg = Configuration()

        # These variable would be parametrized

        GPU = True

        if GPU != True:
            os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
            os.environ["CUDA_VISIBLE_DEVICES"] = ""

        # Input Path

        root_dir = os.path.dirname(os.path.abspath(__file__))

        image_path = cfg.image_path

        json_path = os.path.join(root_dir, cfg.input_filename)

        trainingset = os.path.join(root_dir, 'trainingset')

        Preprocessor.__generate_kijiji_set__(root_dir, image_path, json_path,
                                             trainingset, 'make')

        # --------------------------------------------------------------------------------------------------------------

        image_path = os.path.join(root_dir, 'trainingset')

        data_path = glob(image_path + "/*")

        # Image Segmentation Parameters

        model_path = os.path.expanduser(cfg.model_path)
        assert model_path.endswith('.h5'), 'Keras model must be a .h5 file.'
        anchors_path = os.path.expanduser(cfg.anchors_path)
        classes_path = os.path.expanduser(cfg.classes_path)
        test_path = os.path.expanduser(cfg.test_path)
        output_path = os.path.expanduser(cfg.segmented_output_path)
        json_path = os.path.expanduser(cfg.json_output)

        if not os.path.exists(output_path):
            print('Creating output path {}'.format(output_path))
            os.mkdir(output_path)

        sess = K.get_session()

        class_names = Preprocessor.__return_class_names__(classes_path)

        anchors = Preprocessor.__return_anchors__(anchors_path)

        yolo_model = load_model(model_path)

        # Verify model, anchors, and classes are compatible

        num_classes = len(class_names)

        num_anchors = len(anchors)

        info = 'Mismatch between model and given anchor and class sizes. ' \
               'Specify matching anchors and classes with --anchors_path and --classes_path flags.'

        model_output_channels = yolo_model.layers[-1].output_shape[-1]
        assert model_output_channels == num_anchors * (num_classes + 5), info
        print('{} model, anchors, and classes loaded.'.format(model_path))

        # Check if model is fully convolutional, assuming channel last order.

        model_image_size = yolo_model.layers[0].input_shape[1:3]

        is_fixed_size = model_image_size != (None, None)

        # Generate Colors for drawing bounding boxes

        hsv_tuples, colors = Preprocessor.__generate_colors_for_bounding_boxes__(
            class_names)

        yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names))

        input_image_shape = K.placeholder(shape=(2, ))
        boxes, scores, classes = yolo_eval(yolo_outputs,
                                           input_image_shape,
                                           score_threshold=cfg.score_threshold,
                                           iou_threshold=cfg.iou_threshold)

        # Load Images from the root folder

        input_images_model_1, all_images, data_path, data_path_with_image_name = Preprocessor.__load_image_data_thumbnails__(
            data_path,
            cfg.compressed_image_height,
            cfg.compressed_image_width,
            cfg.compressed_channel,
            cfg.number_of_categories,
            cfg.number_of_images_per_category,
            root_dir,
            is_fixed_size,
            model_image_size,
            sess,
            yolo_model,
            input_image_shape,
            boxes,
            scores,
            classes,
            cfg.font_path,
            class_names,
            colors,
            output_path,
            json_path,
            test_path,
            True,  # Segmentation Flag
            False,  # Edge-detection Flag
            True,  # Extract object Flag
            False)  # Gray Scale Flag

        input_images_model_2, all_images, data_path, data_path_with_image_name = Preprocessor.__load_image_data_thumbnails__(
            data_path,
            cfg.compressed_image_height,
            cfg.compressed_image_width,
            cfg.compressed_channel,
            cfg.number_of_categories,
            cfg.number_of_images_per_category,
            root_dir,
            is_fixed_size,
            model_image_size,
            sess,
            yolo_model,
            input_image_shape,
            boxes,
            scores,
            classes,
            cfg.font_path,
            class_names,
            colors,
            output_path,
            json_path,
            test_path,
            False,  # Segmentation Flag
            True,  # Edge-detection Flag
            False,  # Extract object Flag
            False)  # Gray Scale Flag

        input_images_model_3, all_images, data_path, data_path_with_image_name = Preprocessor.__load_image_data_thumbnails__(
            data_path,
            cfg.image_height,
            cfg.image_width,
            cfg.channel,
            cfg.number_of_categories,
            cfg.number_of_images_per_category,
            root_dir,
            is_fixed_size,
            model_image_size,
            sess,
            yolo_model,
            input_image_shape,
            boxes,
            scores,
            classes,
            cfg.font_path,
            class_names,
            colors,
            output_path,
            json_path,
            test_path,
            False,  # Segmentation Flag
            False,  # Edge-detection Flag
            False,  # Extract object Flag
            False)  # Gray Scale Flag

        input_shape = [
            cfg.compressed_image_height, cfg.compressed_image_width,
            cfg.compressed_channel
        ]

        input_shape_3 = [cfg.image_height, cfg.image_width, cfg.channel]

        # Model Save Paths

        model_1_save_path = os.path.join(root_dir + cfg.model_1_save)
        model_2_save_path = os.path.join(root_dir + cfg.model_2_save)
        model_3_save_path = os.path.join(root_dir + cfg.model_3_save)

        Preprocessor.__create_output_directories__(model_1_save_path)
        Preprocessor.__create_output_directories__(model_2_save_path)
        Preprocessor.__create_output_directories__(model_3_save_path)

        # Instantiating the training class

        train = Train(input_images_model_1, input_images_model_2,
                      input_images_model_3, input_shape, input_shape_3,
                      cfg.batch_size, cfg.epochs, model_1_save_path,
                      model_2_save_path, model_3_save_path)

        # Output Path

        output_path_model_1 = os.path.join(root_dir + cfg.output_model_1)
        output_path_model_2 = os.path.join(root_dir + cfg.output_model_2)
        output_path_model_3 = os.path.join(root_dir + cfg.output_model_3)

        Preprocessor.__create_output_directories__(output_path_model_1)
        Preprocessor.__create_output_directories__(output_path_model_2)
        Preprocessor.__create_output_directories__(output_path_model_3)

        # FCN Model

        model_1 = train.__train_model_1__()

        # VGG Model

        model_2 = train.__train_model_2__()

        # Inception-v3

        model_3 = train.__train_model_3__()

        features_from_model_1 = Preprocessor.__get_score_model__(
            model_1, input_images_model_1, output_path_model_1)
        features_from_model_2 = Preprocessor.__get_score_model__(
            model_2, input_images_model_2, output_path_model_2)
        features_from_model_3 = Preprocessor.__get_score_model__(
            model_3, input_images_model_3, output_path_model_3)

        print("Output FeatureMap For Model 1 \n")
        print(features_from_model_1.shape)
        print("\n")

        print("Output FeatureMap For Model 2 \n")
        print(features_from_model_2.shape)
        print("\n")

        print("Output FeatureMap For Model 3 \n")
        print(features_from_model_3.shape)
        print("\n")