def rollup( ds, units, parallel=1): # not used. came from old code just in case we want it. # this need a local timing also. with f.timing("make pipline from dataset " + str(units) + " batches with parallel=" + str(parallel), units=units): ds2 = ds.map(lambda x: x + 1, num_parallel_calls=parallel) ds3 = ds2.shuffle(min(units, 10000)) repeat = 10 ds4 = ds3.repeat(repeat) batch = 100 ds5 = ds4.batch(batch) batches = units * repeat // batch # Use prefetch() to overlap the producer and consumer. ds6 = ds5.prefetch(1) #batches #print(batches,"batches") with f.timing("iterate" + str(batches), units=batches): print("iterate", flush=True) i = 0 n = 0 for i, x in enumerate(ds6): if i < 0 or i % 5000 == 0: print(i, type(x), len(x), str(x)[:20]) n += 1 print("after enumerationm, n: ", n, flush=True) if n != batches: print(n, "!=", batches)
def run(): print("-----------------------------------------") print("local ssd") ssd = "X:/cam_front_center" process_folder(ssd) # on a local ssd. print("-----------------------------------------") print("nas") with f.timing("get camera folders", units=1): folders, jsons, labels = f.get_camera_folders(f.get_path(), root=f.get_root()) print(len(folders), " camera folders") for folder in folders: process_folder(folder) if True: break print("-----------------------------------------") [print("local ssd")] process_folder(ssd) # on a local ssd.ls .. print("-----------------------------------------") print("just read some files in a camera folder.") print("nas") with f.timing("get camera folders", units=1): folders, jsons, labels = f.get_camera_folders(f.get_path(), root=f.get_root()) print(len(folders), " camera folders") for folder in folders: process_folder(folder) if True: break
def time_one_pass(list_of_filenames, title=""): units = len(list_of_filenames) with f.timing("one full pass of enumerating mapped " + str(units) + " units outside of dataset.", units=units, title=title): do_one_pass(list_of_filenames)
def time_dataset_map(ds, parse, units, parallel, title=""): with f.timing(" map " + str(units) + " units with parallel=" + str(parallel), units=units, title=title): # why does this require the f.? mapped = map_dataset(ds, parse, num_parallel_calls=parallel) return mapped
def time_make_tensor_slices_dataset_glob(path, pattern, title=""): path = pathlib.Path(path) from_glob = f.get_files(path, pattern) files = len(from_glob) with f.timing("dataset from tensor slices glob with: " + str(files), units=files, title=title): return tf.data.Dataset.from_tensor_slices(from_glob)
def time_make_list_files_dataset_str(path, pattern): # this is a list files dataset. path = pathlib.Path(path) from_glob = f.get_flowers_files(path, pattern) files = len(from_glob) with f.timing("dataset list files str with: " + str(files), files): ds = tf.data.Dataset.list_files(str(path / pattern), shuffle=False) # was str(path/'*.jpg') return ds
def time_enumerations(ds, units=1, title=""): with f.timing( "enumerate over:", type(ds), "dataset of " + str(units) + " units and map outside of dataset.", units, title): # why does this require the f.? do_enumeration(ds, parse=parse1) print("1", flush=True) mapped = time_dataset_map( ds, parse1and, units, autotune, title=title + "time dataset.map inside dataset (no enumeration).") print("2", flush=True) with f.timing( "enumerate over mapped dataset" + str(units) + " units outside of dataset.", units, title): do_enumeration(mapped, parse2=write_file)
def time_make_list_files_dataset_glob(path, pattern, limit=None ): # this is a list files dataset. from_glob = f.get_flowers_files(path, pattern) files = len(from_glob) if limit is not None: from_glob = from_glob[:limit] with f.timing("dataset list files glob with: " + str(files), units=files): return tf.data.Dataset.list_files(from_glob, shuffle=False)
def run(): print("--------------------------------------------") x,y,z=f.get_camera_folders(f.get_path(),root=f.get_root()) title="cars by folder:" for i,folder in enumerate(x): files=f.get_files(folder,"*.png") units=len(files) print(i,"folder: "+folder,"has:",len(files),"files.") with f.timing("folder: "+folder+" ["+str(i)+"] has: "+str(len(files))+" files.",units,title): ds=tff.make_tensor_slices_dataset_list(files) print("----------------------------------------------") for i,folder in enumerate(x): files=f.get_files(folder,"*.png") units=len(files) print(i,"folder: "+folder,"has:",len(files),"files.") with f.timing("folder: "+folder+" ["+str(i)+"] has: "+str(len(files))+" files.",units,title): ds=tff.make_tensor_slices_dataset_list(files) mapped=ds.map(tff.parse1and,tff.autotune) tff.do_enumeration(mapped,parse2=tff.write_file) print("--------------------------------------------")
def run(arguments, units=None): dataList = getList(arguments.old, units=units) print("process", len(dataList), "units using", len(threadNames), "threads.") print("first element is:", dataList[0]) gl['lock'] = threading.Lock() gl['queue'] = mp.Queue() f1 = g = None if not arguments.old: f1 = readFile g = slow4040 print("f:", f1, ", g:", g) threads = [ myThread(i + 1, name, gl['queue'], f=f1, g=g) for i, name in enumerate(threadNames) ] for thread in threads: thread.start() if verbose: print("run() enqueue.") gl['lock'].acquire() enqueue(dataList) gl['lock'].release() if verbose: print("run() enqueued.") if verbose: print("run() before timing.") with bf.timing("run() says queue is empty", units=units, title="", before="wait for queue to empty"): if verbose: print("run() time wait.") while not gl['queue'].empty(): pass if verbose: print("run() says queue is empty.") if verbose: print("set done true.") gl['done'] = True if verbose: print("wait for join.") for t in threads: t.join() if verbose: print("exit run()")
def time_make_tensor_slices_dataset_list(files, title=""): with f.timing("dataset from tensor slices file list with: " + str(len(files)), units=len(files), title=title): return make_tensor_slices_dataset_list(files)
def time_load_and_save_images(folder,limit=None): with f.timing("load and save images: "+folder): n,l=load_and_save_some(folder,limit=limit) print('read:',n,', loaded:',len(l),'images.')
def time_load_images(folder,limit=None): with f.timing("load from camera folder: "+folder): n,l=load_some_from_one_camera_folder(folder,limit=limit) print('read:',n,', loaded:',len(l),'images.')
from os.path import join import pathlib import matplotlib.pyplot as plt import pandas as pd import math import numpy as np import glob import ntpath import json import PIL print('Pillow Version:', PIL.__version__) from contextlib import contextmanager from timeit import default_timer as timer import functions as f print("importing tensorflow",flush=True) with f.timing("import tensorflow",1): import tensorflow as tf print(tf.__version__,flush=True) import tffunctions as tff x,y,z=f.get_lists_of_filenames() print("got (",len(x),len(y),len(z),") files.",flush=True) units=len(x) units=100 print("---------------------------------") with f.timing("make dataset with "+str(units)+" units.",units): ds=tff.make_tensor_slices_dataset_list(x[:units]) print("enumerate original.",flush=True) tff.time_enumeration(ds,units) print("---------------------------------") with f.timing("map filename to image: "+str(units)+" units.",units): mapped=ds.map(tff.parse1and,tff.autotune) # was parse1and
from __future__ import absolute_import, division, print_function, unicode_literals import os from os.path import join import pathlib import math import numpy as np import glob import ntpath from os import listdir import os.path from contextlib import contextmanager from timeit import default_timer as timer import functions as f import json print("importing tensorflow") with f.timing("import tensorflow",1): import tensorflow as tf print(tf.__version__) import tffunctions as tff x,y,z=f.get_lists_of_filenames() print("got (",len(x),len(y),len(z),") files.",flush=True) first=x[0] print("first file:",first,os.path.exists(first)) path=f.path_head(first) filename=f.path_leaf(first) print(path,filename) maybe='L:/ss/sem/20180807_145028/camera/cam_front_center/20180807145028_camera_frontcenter_000000091.png' if maybe==first: print("maybe =") print("maybe file:",maybe,os.path.exists(maybe)) x,y,z=f.get_camera_folders(f.get_path(),root=f.get_root())
import datetime t = datetime.datetime.utcnow().timestamp() key = str(t - math.floor(t)) print("key:", key) from os.path import join import pathlib import matplotlib.pyplot as plt import pandas as pd import numpy as np import glob import ntpath import json from contextlib import contextmanager from timeit import default_timer as timer import functions as f with f.timing("import tensorflow", 1): import tensorflow as tf # datetime object containing current date and time one = False #one=True autotune = tf.data.experimental.AUTOTUNE def makeDatasets(filesets): datasets = [] for i, fileset in enumerate(filesets): ds = tf.data.Dataset.from_tensor_slices( fileset) # each dataset is one list datasets.append(ds) return datasets
def process_folder(folder): l = listdir(folder) print(folder, 'folder has:', len(l), 'files.') with f.timing("process:" + str(len(l)) + " files from: " + str(folder), units=len(l)): process_files(folder, l)
def time_enumeration(ds, units=1): print("start enumeration.", flush=True) with f.timing("enumerate over dataset " + str(units) + " units.", units): do_enumeration(ds)