def preprocess(self, im): log('starting preprocess') file = None if is_file(im): file = im im = im.load() if len(im.shape) == 2: im = np.reshape(im, tuple(list(im.shape) + [1])) if im.shape[2] == 1: im = np.repeat(im, 3, axis=2) assert self.data_format == 'channels_last' assert self.channel_axis == 3 assert self.nchan == 3 log('starting preprocess ops') if len(im.shape) == 2: im = np.stack((im, im, im), axis=2) return self._preprocess_im(im, file) elif len(im.shape) == 3: return self._preprocess_im(im, file) elif len(im.shape) == 4: err('maybe this is the problem?') return arr([self._preprocess_im(i, file) for i in im]) else: err('or this?')
def makefigs( root, fig_backend: str, overwrite=False, force=False, silent=True, filter='' ): def inTags(fd): tagFile = File(fd['dataFile']).resrepext(FigData.TAG_EXT) if not tagFile: return False return filter in [s.lower() for s in json.loads( tagFile.read() )] figDats = li([ { 'dataFile': mfig, 'imgFile' : mfig.resrepext('png') if mfig.resrepext('png').exists else mfig.resrepext('svg') } for mfig in root.rglob('*.mfig') # root.glob('**/*.mfig') ]).filter(lambda fd: filter in fd['dataFile'].name.lower() or inTags(fd)) log(f'{len(figDats)=}') if fig_backend == 'wolfram': from mlib.wolf.wolf_figs import WolfMakeFigsBackend backend = WolfMakeFigsBackend else: backend = MPLFigsBackend figDats = [obj(fd) for fd in figDats if overwrite or not fd['imgFile'].exists] backend.makeAllPlots(figDats, overwrite, force, silent=silent)
def todict(obj, classkey=None): global debug_i debug_i = debug_i + 1 if debug_i == 100: raise Exception log('todict(' + classname(obj) + '): ' + str(obj)) if isinstance(obj, dict): data = {} for (k, v) in obj.items(): data[k] = todict(v, classkey) return data elif hasattr(obj, "_ast"): # noinspection PyCallingNonCallable,PyProtectedMember return todict(obj._ast()) elif hasattr(obj, "__iter__") and not isinstance(obj, str): return [todict(v, classkey) for v in obj] elif hasattr(obj, "__dict__"): # noinspection PyUnresolvedReferences data = dict([(key, todict(value, classkey)) for key, value in obj.__dict__.items() if not callable(value) and not key.startswith('_')]) if classkey is not None and hasattr(obj, "__class__"): data[classkey] = obj.__class__.__name__ return data else: return obj
def count_split(spl): data = {} root = DATA_FOLDER.resolve('ImageNet/output_tf') filenames = root.glob(f'{spl}*').map( lambda x: x.abspath).tolist() # validation ds = tf.data.TFRecordDataset(filenames) image_feature_description = { 'image/height': tf.io.FixedLenFeature([], tf.int64), 'image/width': tf.io.FixedLenFeature([], tf.int64), 'image/colorspace': tf.io.FixedLenFeature([], tf.string), 'image/channels': tf.io.FixedLenFeature([], tf.int64), 'image/class/label': tf.io.FixedLenFeature([], tf.int64), 'image/class/synset': tf.io.FixedLenFeature([], tf.string), 'image/class/text': tf.io.FixedLenFeature([], tf.string), 'image/format': tf.io.FixedLenFeature([], tf.string), 'image/filename': tf.io.FixedLenFeature([], tf.string), 'image/encoded': tf.io.FixedLenFeature([], tf.string), } log('looping imagenet') for i, raw_record in enum(ds): example = tf.io.parse_single_example(raw_record, image_feature_description) if i % 100 == 0: log(f'on image {i}') classname = utf_decode(example['image/class/text'].numpy()) if classname not in data: data[classname] = 1 else: data[classname] += 1 return data
def __call__(self, *args, **kwargs): from mlib.boot import log from mlib.boot.mlog import LOG_LEVEL if level is not None and LOG_LEVEL.value < level.value: return ff(*args, **kwargs) ags = '' if not with_args else f'{args=}{kwargs=}' inst = '' if not with_instance else f' of {args[0]}' cls = '' if not with_class else f'{cn(args[0])}.' first_str = '' if not first_only else 'first ' s = f'{first_str}{cls}.{ff.__name__}({ags}){inst}' if single_stack: self.my_stacker = MagicTermLineLogger(ff) if not first_only or self.first_call: log(f'Invoking {s}...', ref=1, stacker=self.my_stacker) from time import time start_time = time() result = ff(*args, **kwargs) duration = time() - start_time r_str = '' if not with_result else f' (result={result=})' t = '' if not timer else f' ({duration=:.2f}s)' if not invoke_only and (not first_only or self.first_call): log(f'Finished {s}!{r_str}{t}', ref=1, stacker=self.my_stacker) if single_stack: self.my_stacker.done = True self.first_call = False return result
def image(cls, fd): log('making image') im = wl.Image(str(fd.x).replace('[', '{').replace(']', '}')) # "Byte" im = str(im).replace("'", "") from mlib.wolf.wolfpy import weval im = weval(im) log('made image') return im
def deleteIfExists(self, silent=False): # try: if self.exists: self.delete(silent=silent) else: if not silent: log(f'not deleting {self} because it does not exist') # except: # breakpoint() return self
def run(self, cfg): self.daily_reminder('remember to update algorithm versions') from HEP_lib import compare_IBI, HEP_Subject from mlib.boot import log from mlib.web.shadow import Shadow from qrsalg import ECGLAB_Original from mlib import term term.Progress.PROGRESS_DISABLED = False shadow = Shadow(show=False) SUBJECTS = [ HEP_Subject( f'EP1163_{x}', [ 'TEN_SECOND_PILOT' # 'TEN_MINUTE_TEST' # 'FULL' ][0], [ # (ManualPeakDetection, inf, 'CALC'), None, (ECGLAB_Original, inf, 'CALC'), (ECGLAB_Original, inf, 'CALC'), # (ecglab_fast, inf, 'CALC'), # (ecglab_slow, inf, 'CALC'), # (pan_tompkins, inf, 'CALC'), ][x], (2, 5) # (555,562) ) for x in range(1, 2) ] for sub in SUBJECTS: # build shadow docs sub.rPeaks # build shadow docs if SUBJECTS[0].times()[-1] > 602: HR_ASSERTION_THRESH = 1.33 # Hz Park et. al SEC_WIN = 600 # Hz Park et. al for sub in SUBJECTS: t = sub.times() random.seed(sub.mindex) start = random.randint(0, int(t[-1] - (SEC_WIN + 2))) end = start + SEC_WIN beats = sub.rPeaks / sub.Fs beats = beats[np.bitwise_and(beats >= start, beats < end)] beats_per_sec = len(beats) / SEC_WIN log(f'heartrate(10min sample) of subject {sub.mindex} = {beats_per_sec}Hz' ) assert beats_per_sec < HR_ASSERTION_THRESH if len(SUBJECTS) > 1 and COMPARE_IBI: shadow.fig([[plot] for s in SUBJECTS for plot in s.plots()] + [[compare_IBI(SUBJECTS[0], SUBJECTS[1])]]) if _SAVE_DATA: [s.savepeaks() for s in SUBJECTS]
def FigureTable(*figs_captions, resources_root=None, exp_id=None, editable=False): children = [Script(js='''hotElements=[]''')] my_stacker = MagicTermLineLogger(FigureTable) for maybe_pair in [f for f in figs_captions if f]: was_tuple = istuple(maybe_pair) if was_tuple: fig, caption = maybe_pair else: fig = maybe_pair caption = None if is_file(fig): if not fig: continue fig = File(fig).copy_into(resources_root, overwrite=True) fig = HTMLImage(fig.abspath, fix_abs_path=True) if not caption: children.append(fig) else: # the_id = f'{exp_id}.{".".join(File(fig).names(keepExtension=False)[-1:])}' the_id = f'{exp_id}.{".".join(File(fig).names(keepExtension=False)[-1:])}' log(f'creating figure: {the_id}', stacker=my_stacker) children.append( TableRow( DataCell(fig), DataCell( HTML_P( caption, id=the_id, ) if not editable else TextArea( caption, id=the_id, **{'class': 'textcell'}), Script( js= '''(() => {hotElements.push(document.currentScript.parentNode.childNodes[0])})()''' ), **{'class': 'parentcell'}, ))) my_stacker.done = True return Table( *children, Script(js=''' onload_funs.push(() => { hotElements.forEach((e)=>{ original_value = apiGET(e.id).de_quote() e.setText(original_value) if (e.tagName === 'TEXTAREA') { $(e).on('input', _=> { apiFun(e.id,e.value) }) } } )}) '''))
def pingChecker(): f = File('_logs/local/pingchecker.log', w='') p = shell('ping www.google.com') while True: line = p.readline() if len(line) == 0: log('pingchecker got EOF') f.append(f'({toc_str()})got EOF') break else: f.append(f'({toc_str()}){utf_decode(line)}')
def paths(self): if not self.isdir: log(f'{self}:{self.isdir=}') err('not dir!') # assert self.isdir a = li([self.join(name) for name in sort(os.listdir(self.abspath))]) if self.DELETE_DS_STORE: Folder(self)['.DS_Store'].deleteIfExists(silent=True) if self.IGNORE_DS_STORE: a = a.filtered(lambda n: File(n).name != '.DS_Store') return a
def __init__(self, goal, verb='doing', pnoun='things'): from mlib.boot import log self.last = 0 self.goal = goal self._internal_n = 1 log(f'{verb} $ {pnoun}', f'{goal:,}') self._instances += [self] self.entered = False super().__init__() self.DISABLED = self.PROGRESS_DISABLED
def _after_thing(datagen, nam): log('saving examples') exs = datagen.examples() for idx, ex in enum(exs): save_dnn_data( resampleim( ex[1], 100, 100, 3 ), # it was taking up to 3 seconds before with large images nam, ex[0], 'png') log('finished saving examples')
def print(self, normally=False): from mlib.boot import log s = self.current_line() if len(s) < self.last_len: for _ in range(self.last_len - len(s)): s += ' ' if self.ended() or normally: print(s) else: print(f'{s}\r', end="", flush=True) log(s, silent=True) self.last_len = len(s)
def __delitem__(self, key): if self.isdirsafe: del Folder(self)[key] else: assert self.exists data = self.load() if not self.default_quiet: log(f'deleting {key}') del data[key] self.save(data) if not self.default_quiet: log(f'deleted {key}')
def class_model_report(root_class): # because I can't find a good python plantUML library report = StringExtension('~~MY MODEL~~\n') report.append_by_lines() superclasses = all_superclasses(root_class) subclasses = all_subclasses(root_class) report += f'\troot:{root_class.__name__}' for s in superclasses: report += f'\t\tsuper :{s.__name__}' for s in subclasses: report += f'\t\tsub :{s.__name__}' log(report) return report
def fill_cmat(y_true, y_pred): [inc(cmat, (pred, tru)) for tru, pred in zip(*prep_ys(y_true, y_pred))] # if (nnstate.PIPELINE_PHASE == 'VAL') or (nnstate.MET_PHASE == 'epoch' + str(10) + ':fit'): # breakpoint() global batch_count, total_steps, batch_sub_count if batch_sub_count is not None: batch_sub_count += 1 if batch_sub_count is None or batch_sub_count == 3: log(f'Finished {batch_count}/{total_steps} steps') batch_count += 1 if batch_sub_count == 3: batch_sub_count = 1 return 0
def smallify(): err('dev') files = glob.glob(sys.argv[1] + "/**/*.png", recursive=True) i = 0 log('found ' + str(len(files)) + ' images') with Progress(len(files)) as prog: for f in files: p = shell(['convert', f, '-resize', '20x20', f], silent=True) p.interact() i = i + 1 prog.tick() log('resized ' + str(i) + ' images') sys.exit()
def __setitem__(self, key, value): if self.isdirsafe: Folder(self)[key] = value else: if self.exists: data = self.load() else: data = {} if not self.default_quiet: log('saving ' + str(key)) # +' to ' + str(value) data[key] = value self.save(data) if not self.default_quiet: log('saved ' + str(key)) # +' to ' + str(value)
async def runasync(): log('running runasync') async with WolframEvaluatorPool() as pool: countr = Counter(a=1) async def logAfter(wlexp, c, total): await pool.evaluate(wlexp) log(f'Finished making {c["a"]}/{total} figures') c['a'] += 1 tasks = [] for exp in wolfram_expressions2: # for exp in wolfram_expressions: tasks += [logAfter(exp, countr, len(figDats))] await asyncio.wait(tasks)
def xcorr(x, y, Fs, lagSecs=30): log('in xcorr') maxlags = np.floor(Fs * lagSecs) x = x - np.mean(x) y = y - np.mean(y) x = x.flatten() y = y.flatten() Nx = len(x) if Nx != len(y): raise ValueError('x and y must be equal length') ccc = np.correlate(x, y, mode='full') if maxlags is None: maxlags = Nx - 1 if maxlags >= Nx or maxlags < 1: raise ValueError('maxlags must be None or strictly positive < %d' % Nx) # zero_lag_idx = int((len(ccc) - 1) / 2) ccc = ccc[int(Nx - 1 - maxlags):int(Nx + maxlags)] # denom = sqrt((x[zero_lag_idx]**2) * y[zero_lag_idx]**2) denom = sqrt( np.correlate(x, x, mode='full')[len(x) - 1] * np.correlate(y, y, mode='full')[len(y) - 1]) if denom == 0: return None, None, None, None, None for idx, cc in enumerate(ccc): ccc[idx] = cc * 1 / denom mx = max(ccc) mn = min(ccc) mx_latency = ccc.tolist().index(mx) mn_latency = ccc.tolist().index(mn) # [max_mi,i] = max(abs(xc)); mx_latency_secs = (mx_latency - (maxlags + 1)) / Fs mn_latency_secs = (mn_latency - (maxlags + 1)) / Fs return ccc, mx, mn, mx_latency_secs, mn_latency_secs
def build_net(self, FLAGS): dims = [self.HEIGHT_WIDTH, self.HEIGHT_WIDTH, self.HEIGHT_WIDTH] dims[self.CI] = 3 from tensorflow.python.keras import Input self.inputs = Input(tuple(dims)) self.net = self.tf.python.keras.models.Model( inputs=self.inputs, outputs=self.assemble_layers(), name=self.FULL_NAME.replace(' ', '_')) if self.WEIGHTS is not None and FLAGS.TRANSFER_LEARNING: # transfer learning # breakpoint() self._load_weights() self.write_weight_reports() if self.FLIPPED_CONV_WEIGHTS: self._flip_conv_weights() elif (self.WEIGHTS is not None) and (not FLAGS.TRANSFER_LEARNING): log('not loading weights because TRANSFER_LEARNING is disabled') self._compile(net_mets.METS_TO_USE())
def _take_into_vagrant(p): log('upping') p.sendatprompt('vagrant up') log('sshing') p.sendatprompt('vagrant ssh') log('setting prompt') p.setprompt() log('cding') # p.prompt() # an extra prompt expect like in the build process i think p.sendatprompt('cd ../dnn')
def count(): log('count here 1') data = { 'train': count_split("train"), 'validation': count_split("validation"), } real_data = {} for k, v in listitems(data['train']): real_data[k] = {'train': v} for k, v in listitems(data['validation']): real_data[k]['validation'] = v real_data = json.dumps(real_data, indent=2) log(f'data sample: {real_data[:20]}') Folder('_data').mkdir() File('_data/imagenet_count.json').write(real_data)
def gc(*args, AUTO_LOGIN=False, RECURSE=False): SSH = len(args) <= 1 arg = 'ssh' if SSH else args[1] STOPIN = arg == 'stopin' if STOPIN: SSH = True arg = 'ssh' STOP = arg == 'stop' START = arg == 'start' LIST = arg == 'list' PUT = arg == 'put' GET = arg == 'get' if PUT or GET: arg = 'scp' COMPUTE = ['/Users/matt/google-cloud-sdk/bin/gcloud', 'compute'] if STOP or START or LIST: COMPUTE += ['instances'] COMMAND = COMPUTE + [arg] if STOP or START or SSH: COMMAND += PROJECT if PUT or GET: FROM = ((PROJECT_NAME + ':') if GET else '') + abspath(args[2], remote=GET) TO = ((PROJECT_NAME + ':') if PUT else '') + abspath(args[3], remote=PUT) if File(FROM).isdir() or RECURSE: COMMAND.append('--recurse') COMMAND.extend([FROM, TO]) if SSH: COMMAND.append('--ssh-flag="-vvv"') p = GCShell(COMMAND) if STOPIN: p.login() if args[2] == 'w': p.sendline( './stopinw' ) p.readline() # input line w = utf_decode(p.readline()) if '1969' in w: log('no shutdown is scheduled') else: log(f'shutdown is scheduled for {w}') elif args[2] == 'c': p.sudo(['shutdown', '-c']) log('cancelled shutdown') else: p.sudo(['shutdown', '-h', args[2]]) log(f'scheduled shutdown for {args[2]} mins') p.close() return None else: p = GCProcess(COMMAND) if AUTO_LOGIN: p.login() return p
def prep_log_file(filename, new=False): if filename is None: filename = os.path.basename(sys.argv[0]).replace('.py', '') if ismac(): filename = f'_logs/local/{filename}.log' else: filename = f'_logs/remote/{filename}.log' from mlib.file import Folder filename = Folder(pwd())[filename] if new: filename = getNextIncrementalFile(filename) if Project.LOG_FILE is None: Project.LOG_FILE = File(filename) Project.LOG_FILE.deleteIfExists() Project.LOG_FILE.write('') mlog.LOG_FILE = Project.LOG_FILE if not mlog.QUIET: log(f'Initialized log file: {File(Project.LOG_FILE).relpath}')
def save(self, data, silent=None): import mlib.JsonSerializable as JsonSerializable for ext in save_extensions: new_data, was_converted = ext(data) if was_converted: data = new_data if isinstsafe(data, JsonSerializable.JsonSerializable): import json data = json.loads(data.to_json()) elif isinstance(data, JsonSerializable.obj): data = data.toDict() if not silent and not self.default_quiet or (silent is False): log('saving ' + self.abspath) if self.ext in ['yml', 'yaml']: import yaml self.mkparents() self.write(yaml.dump(data, sort_keys=False)) elif self.ext in JSON_EXTS: self.mkparents() import json self.write(json.dumps(data, indent=4)) elif self.ext == 'mat': self.mkparents() from scipy.io import loadmat, savemat savemat(self.abspath, data) elif self.ext in PICKLE_EXTS: self.mkparents() with open(self.abspath, 'wb') as f: import pickle pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL) elif self.ext == 'png': self.mkparents() im_data = np.vectorize(np.uint8)(data) import imageio imageio.imwrite(self.abspath, im_data) else: err(f'saving does not yet support .{self.ext} files')
def __init__(self, file, just_sync_at_end=True, offline=False): if offline: Database.offline_mode = True self.__dict__['just_sync_at_end'] = just_sync_at_end super().__init__(file) if not self.offline_mode: if file.wc.exists: self.pull() else: if not file.exists: self._hard_reset() self.push() else: warn( f'{self} is not preforming initial sync since {self.offline_mode=}' ) write_webloc(file.abspath.replace('.json', '.webloc'), file.wcurl) log(f'database url: {file.wcurl=}') if just_sync_at_end: atexit.register(self.push)
def push(self): if self.GIT.is_dirty(): log( f'A diff between the index and the commit’s tree your HEAD points to: {self.GIT.index.diff(self.GIT.head.commit)}') log( f'A diff between the index and the commit’s tree your HEAD points to: {self.GIT.index.diff(self.GIT.head.commit)}') log(f'A list of untracked files: {self.GIT.untracked_files}') inp = input('Ok to add, commit and push? [y/n] >') inp = inp in ['y', 'Y'] if inp: self.GIT.index.add('--all') inp = "Commit Message: " self.GIT.index.commit(inp.strip()) self.GIT.remotes[0].push() else: log('repo is not dirty')
def write_weight_reports(self): import h5py weights_file = h5py.File(self.weightsf(), "r") weights_report_file = self.arch_summary_folder[ f'{self.ARCH_LABEL}_weights.txt'] o_weights_report_file = self.arch_summary_folder[ f'{self.ARCH_LABEL}_weights_matlab.txt'] weights_report_file.write('') def processGroup(group, rep, indent=0): for ke in listkeys(group): rep += '\t' * indent rep += ke item = group[ke] if 'Dataset' in cn(item): # c = 'Dataset' rep += f'\t\t{item.shape} {item.dtype}\n' elif 'Group' in cn(item): # c = 'Group' rep += '\n' rep = processGroup(item, rep, indent + 1) # sub = f'{item.shape} {item.dtype}' else: err(f'what is this: {cn(item)}') return rep report = '' report = processGroup(weights_file, report) log('writing weights report...') weights_report_file.write(report) log('finished writing weights report') log('writing matlab weight report...') warn( 'THERE ARE 2 VERSIONS OF THE ONNX FILES IN _weights/matlab AND I DONT KNOW THE DIFFERENCE' ) import onnx o_model = onnx.load(self.oweightsf()) o_weights_report_file.write(repr(o_model.graph.node)) log('finished writing matlab weight report...')