Exemple #1
0
    def test_insert_before_extension(self, mock_dt):
        # Unix path with extension
        r = insert_before_extension(
            filename='/dir/subdir/file.jpg',
            s='newstring')
        self.assertEqual(r, '/dir/subdir/file.newstring.jpg')

        # Windows path with extension
        r = insert_before_extension(
            filename=r'c:\dir\file.jpg',
            s='newstring')
        self.assertEqual(r, r'c:\dir\file.newstring.jpg')

        # Unix path without extension
        r = insert_before_extension(
            filename='/dir/subdir/file',
            s='newstring')
        self.assertEqual(r, '/dir/subdir/file.newstring')

        # Windows path without extension
        r = insert_before_extension(
            filename=r'c:\dir\file',
            s='newstring')
        self.assertEqual(r, r'c:\dir\file.newstring')

        # Unix path without extra string (i.e., use timestamp)
        mock_dt.now = mock.Mock(return_value=datetime(2020, 7, 15, 10, 13, 46))
        timestamp = '2020.07.15.10.13.46'
        r = insert_before_extension(filename='/dir/subdir/file.jpg')
        self.assertEqual(r, f'/dir/subdir/file.{timestamp}.jpg')
Exemple #2
0
def divide_files_into_tasks(file_list_json,
                            n_files_per_task=default_n_files_per_api_task):
    """
    Divides the file *file_list_json*, which should contain a single json-encoded list
    of strings, into a set of json files, each containing *n_files_per_task* files (the last 
    file will contain <= *n_files_per_task* files).
    
    If the input .json is blah.json, output_files will be blah.chunk001.json, blah.002.json, etc.
    
    Returns the list of .json filenames and the list of lists of files.
    
    return output_files,chunks
    """

    with open(file_list_json) as f:
        file_list = json.load(f)

    chunks = divide_chunks(file_list, n_files_per_task)

    output_files = []

    # i_chunk = 0; chunk = chunks[0]
    for i_chunk, chunk in enumerate(chunks):
        chunk_id = 'chunk{0:0>3d}'.format(i_chunk)
        output_file = path_utils.insert_before_extension(
            file_list_json, chunk_id)
        output_files.append(output_file)
        s = json.dumps(chunk, indent=1)
        with open(output_file, 'w') as f:
            f.write(s)

    return output_files, chunks
def divide_list_into_tasks(
    file_list: Sequence[str],
    save_path: str,
    n_files_per_task: int = MAX_FILES_PER_API_TASK
) -> Tuple[List[str], List[Sequence[Any]]]:
    """
    Divides a list of filenames into a set of JSON files, each containing a
    list of length *n_files_per_task* (the last file will contain <=
    *n_files_per_task* files).

    Output JSON files are saved to *save_path* except the extension is replaced
    with `*.chunkXXX.json`. For example, if *save_path* is `blah.json`, output
    files will be `blah.chunk000.json`, `blah.chunk001.json`, etc.

    Args:
        file_list: list of str, filenames to split across multiple JSON files
        save_path: str, base path to save the chunked lists
        n_files_per_task: int, max number of files to include in each API task

    Returns:
        output_files: list of str, output JSON file names
        chunks: list of list of str, chunks[i] is the content of output_files[i]
    """

    chunks = divide_chunks(file_list, n_files_per_task)
    output_files = []

    for i_chunk, chunk in enumerate(chunks):
        chunk_id = 'chunk{:0>3d}'.format(i_chunk)
        output_file = path_utils.insert_before_extension(save_path, chunk_id)
        output_files.append(output_file)
        with open(output_file, 'w') as f:
            json.dump(chunk, f, indent=1)
    return output_files, chunks
Exemple #4
0
options.bRenderHtml = False
options.imageBase = image_base
rde_string = 'rde_{:.2f}_{:.2f}_{}_{:.1f}'.format(
    options.confidenceMin, options.iouThreshold, options.occurrenceThreshold,
    options.maxSuspiciousDetectionSize)
options.outputBase = os.path.join(filename_base, rde_string)
options.filenameReplacements = {'': ''}

options.debugMaxDir = -1
options.debugMaxRenderDir = -1
options.debugMaxRenderDetection = -1
options.debugMaxRenderInstance = -1

api_output_filename = list(folder_name_to_combined_output_file.values())[0]
filtered_output_filename = path_utils.insert_before_extension(
    api_output_filename, 'filtered_{}'.format(rde_string))

suspiciousDetectionResults = repeat_detections_core.find_repeat_detections(
    api_output_filename, None, options)

#%% Manual RDE step

## DELETE THE ANIMALS ##

#%% Re-filtering

from api.batch_processing.postprocessing import remove_repeat_detections

remove_repeat_detections.remove_repeat_detections(
    inputFile=api_output_filename,
    outputFile=filtered_output_filename,
train_annotations = []
test_annotations = []

for ann in tqdm(annotations):
    category_id = ann['category_id']
    image_id = ann['image_id']
    category_name = category_id_to_category[category_id]['name']
    im = image_id_to_image[image_id]
    if category_name == 'test':
        test_images.append(im)
        test_annotations.append(ann)
    else:
        train_images.append(im)
        train_annotations.append(ann)

train_fn = insert_before_extension(output_json_filename, 'train')
test_fn = insert_before_extension(output_json_filename, 'test')

data['images'] = train_images
data['annotations'] = train_annotations
json.dump(data, open(train_fn, 'w'), indent=2)

data['images'] = test_images
data['annotations'] = test_annotations
json.dump(data, open(test_fn, 'w'), indent=2)

#%% Validate .json files

options = sanity_check_json_db.SanityCheckOptions()
options.baseDir = input_base_dir
options.bCheckImageSizes = False
Exemple #6
0
options.outputBase = os.path.join(filename_base,'rde_0.6_0.85_10_0.2')
options.filenameReplacements = {'':''}

options.confidenceMin = 0.6
options.confidenceMax = 1.01 
options.iouThreshold = 0.85
options.occurrenceThreshold = 10
options.maxSuspiciousDetectionSize = 0.2

options.debugMaxDir = -1
options.debugMaxRenderDir = -1
options.debugMaxRenderDetection = -1
options.debugMaxRenderInstance = -1

api_output_filename = list(folder_name_to_combined_output_file.values())[0]
filtered_output_filename = path_utils.insert_before_extension(api_output_filename,'filtered')

suspiciousDetectionResults = repeat_detections_core.find_repeat_detections(api_output_filename,
                                                                           None,
                                                                           options)


#%% Manual RDE step

## DELETE THE ANIMALS ##


#%% Re-filtering

from api.batch_processing.postprocessing import remove_repeat_detections