def main(dataset_path, percent_eval): OUTPUT_PATH = "/opt/ml/model" EXTRACT_PATH = "/home" TMP_PATH = "/home/tmp" if not os.path.exists(TMP_PATH): os.makedirs(TMP_PATH) if not os.path.exists(EXTRACT_PATH): os.makedirs(EXTRACT_PATH) try: shutil.copy(dataset_path, join(EXTRACT_PATH, 'data.tar')) except: print('unable to retrieve the dataset tar file.') sys.exit(1) with tarfile.open(join(EXTRACT_PATH, 'data.tar')) as tar_file: tar_file.extractall(join(EXTRACT_PATH, 'out')) if percent_eval > 100 or percent_eval < 100: percent_eval = 30 json_to_csv.main(percent_eval) generate_tfrecord.main(TMP_PATH + "/train.csv", join(OUTPUT_PATH, 'train.record')) generate_tfrecord.main(TMP_PATH + "/eval.csv", join(OUTPUT_PATH, 'eval.record')) parse_meta.main(join(OUTPUT_PATH, 'map.pbtxt')) print(".\nRecords generated")
def main(dataset_paths, percent_eval, directory): OUTPUT_PATH = directory EXTRACT_PATH = "/home" TMP_PATH = "/home/tmp" if not os.path.exists(TMP_PATH): os.makedirs(TMP_PATH) if not os.path.exists(EXTRACT_PATH): os.makedirs(EXTRACT_PATH) try: for i in dataset_paths: shutil.copy(i, join(EXTRACT_PATH, 'data.tar')) except: print('unable to retrieve a dataset tar file:') sys.exit(1) for dataset in dataset_paths: with tarfile.open(dataset) as tar_file: tar_file.extractall(join(EXTRACT_PATH, 'out')) if percent_eval > 100 or percent_eval < 0: percent_eval = 30 json_to_csv.main(percent_eval) try: generate_tfrecord.main(TMP_PATH + "/train.csv", join(OUTPUT_PATH, 'train.record')) generate_tfrecord.main(TMP_PATH + "/eval.csv", join(OUTPUT_PATH, 'eval.record')) parse_meta.main(join(OUTPUT_PATH, 'map.pbtxt')) print(".\nRecords generated") except ValueError: print( "The datasets provided do not have the same class labels. Please make sure that labels are spelt the same in both datasets, or label the same objects for both datasets." )
def main(dataset_paths, percent_eval, directory): ROOT_PATH, PATH_EXT = os.path.splitext(dataset_paths) DATASET_NAME = ROOT_PATH.split('/')[-1] OUTPUT_PATH = directory EXTRACT_PATH = "/home" TMP_PATH = "/home/tmp" # Placeholder for enum, here 1 is tar, 0 is ZIP NORMAL_MODE = 1 # Assume this is a tar if not os.path.exists(TMP_PATH): os.makedirs(TMP_PATH) if not os.path.exists(EXTRACT_PATH): os.makedirs(EXTRACT_PATH) if PATH_EXT == '.zip': print( '.zip file extension found, interpreting as tensorflow object detection csv zip' ) NORMAL_MODE = 0 # Not a tar file if NORMAL_MODE: # Perform working tar code print("normal mode") try: shutil.copy(dataset_paths, join(EXTRACT_PATH, 'data.tar')) except: print('unable to retrieve a dataset tar file:') sys.exit(1) with tarfile.open(dataset_paths) as tar_file: tar_file.extractall(join(EXTRACT_PATH, 'out')) if percent_eval > 100 or percent_eval < 0: percent_eval = 30 json_to_csv.main(percent_eval) try: generate_tfrecord.main(TMP_PATH + "/train.csv", join(OUTPUT_PATH, 'train.record'), NORMAL_MODE, "/home/") generate_tfrecord.main(TMP_PATH + "/eval.csv", join(OUTPUT_PATH, 'eval.record'), NORMAL_MODE, "/home/") parse_meta.main(join(OUTPUT_PATH, 'map.pbtxt'), NORMAL_MODE, TMP_PATH + "/eval.csv") print(".\nRecords generated") except ValueError: print( "The datasets provided do not have the same class labels. Please make sure that labels are spelt the same in both datasets, or label the same objects for both datasets." ) if not NORMAL_MODE: print('treating as zip of tf obj detect') #Psuedocode #Unzip the zip in correct dir with zipfile.ZipFile( dataset_paths, 'r' ) as zip_file: # Unzip the file (Assuming 1 zip at this time) namelist = zip_file.namelist()[-1] if any( [namelist.startswith(i) for i in ["valid", "train", "test"]]): zip_file.extractall(EXTRACT_PATH + "/" + DATASET_NAME) else: zip_file.extractall(EXTRACT_PATH) from fnmatch import fnmatch pattern = "*.csv" for path, subdirs, files in os.walk(EXTRACT_PATH): for name in files: if fnmatch(name, pattern): print("CSV:", os.path.join(path, name)) #Generate the records # try: print(EXTRACT_PATH + "/" + DATASET_NAME + "/test/_annotations.csv") generate_tfrecord.main( EXTRACT_PATH + "/" + DATASET_NAME + "/test/_annotations.csv", join(OUTPUT_PATH, 'eval.record'), NORMAL_MODE, EXTRACT_PATH + "/" + DATASET_NAME + "/test/") generate_tfrecord.main( EXTRACT_PATH + "/" + DATASET_NAME + "/train/_annotations.csv", join(OUTPUT_PATH, 'train.record'), NORMAL_MODE, EXTRACT_PATH + "/" + DATASET_NAME + "/train/") print('main records generated') parse_meta.main( join(OUTPUT_PATH, 'map.pbtxt'), NORMAL_MODE, EXTRACT_PATH + "/" + DATASET_NAME + "/train/_annotations.csv") # Edge case of missing label in one csv print(".\nRecords generated")