def main(secrets, directory): with open(secrets) as f: keys = json.load(f) print(keys.keys()) projectid = keys['projectid'] endpoint = keys['endpoint'] key = keys['key'] credentials = ApiKeyCredentials(in_headers={"Training-key": key}) trainer = CustomVisionTrainingClient(endpoint, credentials) if os.path.exists(directory): shutil.rmtree(directory) os.makedirs(directory) tags = trainer.get_tags(project_id=projectid) for tag in tags: os.makedirs(os.path.join(directory, tag.name)) skip = 0 images = trainer.get_tagged_images(project_id=projectid, take=50, skip=skip) while (len(images) > 0): for img in images: new_image = os.path.join(directory, img.tags[0].tag_name, f'{str(uuid.uuid4()).lower()}.png') print(f'\rdownloading {img.original_image_uri} to {new_image}', end="") request.urlretrieve(url=img.original_image_uri, filename=new_image) skip += 50 images = trainer.get_tagged_images(project_id=projectid, take=50, skip=skip) print('\nDone!')
class LabelUtility: """ Utility for interacting with the Custom Vision label tool. """ def __init__(self, ws_name, project_id, project_key): endpoint_url = "https://{}.cognitiveservices.azure.com/".format( ws_name) self.project_id = project_id self.client = CustomVisionTrainingClient(project_key, endpoint=endpoint_url) self.project = self.client.get_project(project_id=project_id) self.tags = self.client.get_tags(project_id=project_id) def upload_directory(self, data_dir, img_ext="*.jpg", img_dir="images", lbl_file="labels.csv", default_tag_name="important"): """ upload_directory - Upload images from a given directory into the CV workspace :param str data_dir: Source folder of the files. :param str img_ext: image extension. :param str img_dir: image folder. :param str lbl_file: labels file. :param str default_tag_name: default tag name. :returns: None """ label_fn = os.path.join(data_dir, lbl_file) img_folder = os.path.join(data_dir, img_dir) # Check if required folders exist. if not (os.path.isdir(img_folder) and os.path.exists(label_fn)): print("Input data not found") return # Read labels and image list. labels_df = pd.read_csv(os.path.join(label_fn)) image_list = glob.glob(os.path.join(img_folder, img_ext)) # Upload each image with regions for _, path in enumerate(image_list): tagged_images_with_regions = [] regions = [] file_name = path.split("\\")[-1] img = Image.open(path) img_width, img_height = img.size for _, row in labels_df[labels_df.FileName == file_name].iterrows(): x, y, w, h = row.XMin, row.YMin, row.XMax - row.XMin, row.YMax - row.YMin x = x / img_width w = w / img_width y = y / img_height h = h / img_height if "DefectType" in row: default_tag_name = row.DefectType tag = None for a_tag in self.tags: if a_tag.name == default_tag_name: tag = a_tag if not tag: tag = self.client.create_tag(self.project_id, default_tag_name) self.tags = self.client.get_tags(self.project_id) regions.append( Region(tag_id=tag.id, left=x, top=y, width=w, height=h)) with open(path, mode="rb") as image_contents: tagged_images_with_regions.append( ImageFileCreateEntry(name=file_name, contents=image_contents.read(), regions=regions)) upload_result = self.client.create_images_from_files( self.project.id, images=tagged_images_with_regions) if not upload_result.is_batch_successful: print("Image batch upload failed.") for image in upload_result.images: print("Image status: ", image.status) def export_images(self, data_dir, img_dir="images", lbl_file="labels.csv"): """ export_images - Export any tagged images that may exist and preserve their tags and regions. :param str data_dir: Output folder. :param str img_ext: image extension. :param str img_dir: image folder. :param str lbl_file: labels file. :returns: None """ img_folder = os.path.join(data_dir, img_dir) # Check if required folders exist. if not os.path.isdir(img_folder): print("Output folder not found") return count = self.client.get_tagged_image_count(self.project_id) print("Found: ", count, " tagged images.") exported, idx = 0, 0 data = [] while count > 0: count_to_export = min(count, 256) print("Getting", count_to_export, "images") images = self.client.get_tagged_images(self.project_id, take=count_to_export, skip=exported) for image in images: file_name = f'file_{idx}.jpg' img_fname = os.path.join(img_folder, file_name) data += self.download_image(image, img_fname) idx += 1 exported += count_to_export count -= count_to_export df = pd.DataFrame(data, columns=[ "image_name", "DefectName", "xmin", "xmax", "ymin", "ymax" ]) classes = sorted(list(set(df['DefectName']))) class_ids = {} f = open(os.path.join(data_dir, 'label_map.pbtxt'), "w+") for i, clas in enumerate(classes): class_ids[clas] = i + 1 f.write('item {\n') f.write('\tid: ' + str(i + 1) + '\n') f.write('\tname: \'' + clas + '\'\n') f.write('}\n') f.write('\n') f.close() df['classid'] = [ class_ids[the_defect] for the_defect in df['DefectName'] ] df.to_csv(os.path.join(data_dir, lbl_file), index=False) @staticmethod def download_image(image, img_fname): """ download_image - Export an image. :param pyImg3 image: Image object. :param str img_fname: Filename of the image. :returns: None """ regions = [] if hasattr(image, "regions"): regions = image.regions url = image.original_image_uri width = image.width height = image.height # Download the image responseFromURL = req.get(url).content with open(img_fname, 'wb') as f: f.write(responseFromURL) f.close() # Format the regions data = [] for r in regions: left, top, wide, high = r.left, r.top, r.width, r.height left = left * width top = top * height wide = wide * width high = high * height data.append([ img_fname.split("\\")[-1], r.tag_name, int(left), int(left + wide), int(top), int(top + high) ]) return data
tagged_image_count = trainer.get_tagged_image_count(project_id) print("Number of tagged images to download: %d" % tagged_image_count) NUM_PER_REQUEST = 100 REQUEST_NUM = math.ceil(tagged_image_count / NUM_PER_REQUEST) with open('../id.json', 'r') as f: id_to_class = json.load(f) class_to_id = {v: k for k, v in id_to_class.items()} image_count = 0 anno = {} for i in range(REQUEST_NUM): image_list = trainer.get_tagged_images(project_id, take=NUM_PER_REQUEST, skip=i * NUM_PER_REQUEST) for image in image_list: image_url = image.original_image_uri region_list = image.regions img_data = requests.get(image_url).content with open(IMAGES_PATH + '/%05d.jpg' % image_count, 'wb') as handler: handler.write(img_data) json_region_list = [{ "id": class_to_id[r.tag_name.lower()], "bbox": [r.left, r.top, r.width, r.height] } for r in region_list] anno["%05d" % image_count] = json_region_list
tagged_image_count=0 tagged_image_count=trainer.get_tagged_image_count(Project.id) batchStartIndex=0 batchMaxIndex=256 num_batches=math.ceil(tagged_image_count/256) print(tagged_image_count) print(batchStartIndex) print(batchMaxIndex) print(num_batches) tagged_images = [] for index in range(batchStartIndex, num_batches): print(batchStartIndex, batchMaxIndex) tagged_images.extend( trainer.get_tagged_images(Project.id, skip=batchStartIndex, take=256) ) batchStartIndex+=256 batchMaxIndex+=256 if(batchMaxIndex > tagged_image_count): batchMaxIndex = tagged_image_count # In[50]: tagged_images_with_tags = [] for image in tagged_images: #for each tagged image on origin dest_tags_ids = [] for tag in image.tags: #for each tag on the origin image
def main(): # Parse arguments parser = argparse.ArgumentParser(description='Fetch data from CustomVision. Grab endpoint, training_key and project_id from https://www.customvision.ai/projects/<project_id>#/settings') parser.add_argument('--endpoint', type=str, help='"Endpoint" from Custom Vision project settings, e.g. https://westus2.api.cognitive.microsoft.com/') parser.add_argument('--training_key', type=str, help='"Key" from Custom Vision project settings, e.g. e46b53**************************') parser.add_argument('--project_id', type=str, help='"Project Id" from Custom Vision project settings, e.g. 5539cc35-****-****-****-************') parser.add_argument('--output_directory', '-o', type=str, help='e.g. "./downloads/"') args = parser.parse_args() endpoint = args.endpoint training_key = args.training_key project_id = args.project_id download_directory = args.output_directory # Prepare for Custom Vision download credentials = ApiKeyCredentials(in_headers={"Training-key": training_key}) trainer = CustomVisionTrainingClient(endpoint, credentials) unique_tags = [] image_count = trainer.get_tagged_image_count(project_id=project_id) progress_bar = tqdm(total=image_count) num_batches = math.ceil(image_count / download_batch_size) print(f"There will be {num_batches} batches downloaded ({image_count} images in total).") for batch_index in range(num_batches): image_batch = trainer.get_tagged_images(project_id=project_id, take=download_batch_size, skip=batch_index * download_batch_size) image_urls = [] for index, image in enumerate(image_batch): base_filename = str((batch_index * download_batch_size) + index) lines = [] for region in image.regions: # Register tag in unique_tags if region.tag_name not in unique_tags: unique_tags.append(region.tag_name) # Get index of region tag in unique_tags tag_index = unique_tags.index(region.tag_name) # Construct YOLO line (format is "<object-class> <x-center> <y-center> <width> <height>", all numbers normalized between 0 and 1) line = f"{tag_index} {region.left + (region.width / 2)} {region.top + (region.height / 2)} {region.width} {region.height}" lines.append(line) # Create data file data_filename = base_filename + ".txt" with open(os.path.join(download_directory, data_filename), "w+") as data_file: data_file.write("\n".join(lines)) # Queue image URL for download image_download_url = image.original_image_uri image_destination_path = os.path.join(download_directory, base_filename + ".jpg") image_urls.append((image_download_url, image_destination_path)) def download_url(data): request.urlretrieve(url=data[0], filename=data[1]) progress_bar.update(1) # Download all image files in the batch simultaneously ThreadPool(download_batch_size).map(download_url, image_urls) progress_bar.close() # Save unique tags into class.names file with open(os.path.join(download_directory, "class.names"), "w+") as tags_file: tags_file.write("\n".join(unique_tags))