Ejemplo n.º 1
0
def generate_vec():

    #getting dorsal vectors and class
    dorsal_paths = filter_images('dorsal')
    _, dorsal_vectors = get_all_vectors(model, f={'path': {'$in': dorsal_paths}})
    
    #getting palmar vectors and class
    palmar_paths = filter_images('palmar')
    _, palmar_vectors = get_all_vectors(model, f={'path': {'$in': palmar_paths}})
    
    #getting test vectors and their path
    test_data_paths, test_data = get_all_vectors(model, f={}, unlabelled_db=True)
    
    # Return the calculated values
    return dorsal_vectors, palmar_vectors, test_data, test_data_paths
Ejemplo n.º 2
0
    def get_data_matrix(cls,
                        feature,
                        label=None,
                        unlabelled=False,
                        ignore_metadata=False):
        min_max_scaler = MinMaxScaler()

        f = {}
        if label:
            label_images = utils.filter_images(label)
            f = {'path': {'$in': label_images}}

        # Build and scale feature matrix
        images, feature_space = utils.get_all_vectors(feature,
                                                      f=f,
                                                      unlabelled_db=unlabelled)
        feature_space = min_max_scaler.fit_transform(feature_space)
        # Not including metadata boosts accuracy of Set 2
        # Including metadata boosts accuracy of Set 1
        if ignore_metadata:
            meta = utils.get_metadata(unlabelled_db=unlabelled)
            # Mapping between image file path name and the metadata
            meta = {m['path']: m for m in meta}
            return images, meta, feature_space

        # Build and scale metadata matrix
        meta, metadata_space = cls.get_metadata_space(images,
                                                      unlabelled_db=unlabelled)
        metadata_space = min_max_scaler.fit_transform(metadata_space)

        # Column stack them
        data_matrix = np.c_[feature_space, metadata_space]

        return images, meta, data_matrix
Ejemplo n.º 3
0
def build_unlabelled(model):
    #DORSAL TEST_DATA
    dorsal_paths = filter_images('dorsal', unlabelled_db=True)
    _, u_dorsal_vectors = get_all_vectors(model, f={'path': {'$in': dorsal_paths}}, unlabelled_db=True)
    dorsal_class = np.array([-1] * len(u_dorsal_vectors))
    
    #PALMAR TEST DATA
    palmar_paths = filter_images('palmar', unlabelled_db=True)
    _, u_palmar_vectors = get_all_vectors(model, f={'path': {'$in': palmar_paths}}, unlabelled_db=True)
    palmar_class = np.array([1] * len(u_palmar_vectors))

    #STACK ALL TEST DATA AND LABELS
    test_data = np.vstack((u_dorsal_vectors, u_palmar_vectors))
    test_labels = np.concatenate((dorsal_class,palmar_class))

    return test_data, test_labels, np.concatenate((dorsal_paths,palmar_paths))
Ejemplo n.º 4
0
def build_labelled(model):
    # DORSAL TRAIN DATA
    dorsal_paths = filter_images('dorsal')
    _, dorsal_vectors = get_all_vectors(model, f={'path':{'$in': dorsal_paths}})
    dorsal_class = np.array([-1] * len(dorsal_vectors))

    #PALMAR TRAIN DATA
    _, palmar_vectors = get_all_vectors(model, f={'path': {'$nin': dorsal_paths}})
    palmar_class = np.array([1] * len(palmar_vectors))

    #TRAIN DATA STACKED AND CLASSES
    train_data = np.vstack((dorsal_vectors, palmar_vectors))
    train_class = np.concatenate((dorsal_class, palmar_class))

    return train_data, train_class
Ejemplo n.º 5
0
                                            palmar_kmeans.centroids,
                                            return_min=True)
    return 'dorsal' if dorsal_dist <= palmar_dist else 'palmar'


if __name__ == "__main__":
    parser = prepare_parser()
    args = parser.parse_args()
    n_clusters = args.n_clusters

    #get the absolute data path and models whose features to concatenate
    data_path = Path(settings.path_for(settings.DATA_PATH))
    model = settings.TASK2_CONFIG.MODEL

    #Fetch training data for dorsal and palmer images from LABELLED DB
    dorsal_paths = filter_images('dorsal')
    dorsal_paths, dorsal_vectors = get_all_vectors(
        model, f={'path': {
            '$in': dorsal_paths
        }})
    palmar_paths, palmar_vectors = get_all_vectors(
        model, f={'path': {
            '$nin': dorsal_paths
        }})

    #Fetch test data from UNLABELLED DB
    test_data_paths, test_data = get_all_vectors(model, unlabelled_db=True)

    #Get centroids and centroid_labels for dorsal and palmar vectors
    print("Clustering dorsal vectors")
    dorsal_kmeans = Kmeans(dorsal_vectors, n_clusters)
Ejemplo n.º 6
0
    print("converting annotations ...")
    # Convert annotations
    if args.task == "bbox":
        oi["annotations"] = utils.convert_instance_annotations(
            original_annotations,
            oi["images"],
            oi["categories"],
            start_index=0,
            classes=args.classes,
        )
    elif args.task == "panoptic":
        oi["annotations"] = utils.convert_segmentation_annotations(
            original_segmentations,
            oi["images"],
            oi["categories"],
            original_mask_dir,
            segmentation_out_dir,
            start_index=0,
        )
        oi["images"] = utils.filter_images(oi["images"], oi["annotations"])

    # Write annotations into .json file
    filename = os.path.join(
        base_dir,
        "annotations/",
        "openimages_{}_{}_{}.json".format(args.version, subset, args.task),
    )
    print("writing output to {}".format(filename))
    json.dump(oi, open(filename, "w"))
    print("Done")
                      {'id': 2, 'name': 'Attribution-NonCommercial License', 'url': 'http://creativecommons.org/licenses/by-nc/2.0/'},
                      {'id': 3, 'name': 'Attribution-NonCommercial-NoDerivs License', 'url': 'http://creativecommons.org/licenses/by-nc-nd/2.0/'},
                      {'id': 4, 'name': 'Attribution License', 'url': 'http://creativecommons.org/licenses/by/2.0/'},
                      {'id': 5, 'name': 'Attribution-ShareAlike License', 'url': 'http://creativecommons.org/licenses/by-sa/2.0/'},
                      {'id': 6, 'name': 'Attribution-NoDerivs License', 'url': 'http://creativecommons.org/licenses/by-nd/2.0/'},
                      {'id': 7, 'name': 'No known copyright restrictions', 'url': 'http://flickr.com/commons/usage/'},
                      {'id': 8, 'name': 'United States Government Work', 'url': 'http://www.usa.gov/copyright.shtml'}]

    # Convert category information
    print('converting category info')
    oi['categories'] = utils.convert_category_annotations(original_category_info)

    # Convert image mnetadata
    print('converting image info ...')
    image_dir = os.path.join(base_dir, subset)
    oi['images'] = utils.convert_image_annotations(original_image_metadata, original_image_annotations, original_image_sizes, image_dir, oi['categories'], oi['licenses'])

    # Convert instance annotations
    print('converting annotations ...')
    # Convert annotations
    if args.task == 'bbox':
        oi['annotations'] = utils.convert_instance_annotations(original_annotations, oi['images'], oi['categories'], start_index=0)
    elif args.task == 'panoptic':
        oi['annotations'] = utils.convert_segmentation_annotations(original_segmentations, oi['images'], oi['categories'], original_mask_dir, segmentation_out_dir, start_index=0)
        oi['images'] = utils.filter_images(oi['images'], oi['annotations'])

    # Write annotations into .json file
    filename = os.path.join(base_dir, 'annotations/', 'openimages_{}_{}_{}.json'.format(args.version, subset, args.task))
    print('writing output to {}'.format(filename))
    json.dump(oi,  open(filename, "w"))
    print('Done')